def log(self, input_count, batch_count, additional_values): logdict = OrderedDict() delta_t = time.time() - self.last_time delta_count = input_count - self.last_input_count self.last_time = time.time() self.last_input_count = input_count logdict['time_spent'] = delta_t logdict['cumulative_time_spent'] = time.time() - self.start_time logdict['input_count'] = delta_count logdict['cumulative_input_count'] = input_count logdict['cumulative_batch_count'] = batch_count if delta_t > 0: logdict['inputs_per_sec'] = delta_count / delta_t else: logdict['inputs_per_sec'] = 0.0 for k in sorted(viewkeys(additional_values)): logdict[k] = additional_values[k] # Write the headers if they are not written yet if self.headers is None: self.headers = list(viewkeys(logdict)) self.logstr(",".join(self.headers)) self.logstr(",".join(str(v) for v in viewvalues(logdict))) for logger in self.external_loggers: try: logger.log(logdict) except Exception as e: logging.warn( "Failed to call ExternalLogger: {}".format(e), e)
def prepare_graph(graph1, graph2): """Fix ids on graphs, match the <main> node Return hashable nodes1 and nodes2 from g1 and g2, respectively """ fix_caller_id(graph1) fix_caller_id(graph2) nodes1 = [HashableDict(x) for x in graph1["nodes"]] nodes2 = [HashableDict(x) for x in graph2["nodes"]] graph1["hnodes"], graph2["hnodes"] = nodes1, nodes2 graph1["node_indexes"] = set(range(len(nodes1))) graph2["node_indexes"] = set(range(len(nodes2))) graph1["levels"] = defaultdict(set) graph2["levels"] = defaultdict(set) for node in nodes1: graph1["levels"][node["node"]["level"]].add(node["index"]) for node in nodes2: graph2["levels"][node["node"]["level"]].add(node["index"]) if nodes1 and nodes2: if nodes1[0]['name'] != nodes2[0]['name']: nodes1[0]["name"] = "<main>" nodes2[0]["name"] = "<main>" graph1["max_level"] = max(viewkeys(graph1["levels"])) graph2["max_level"] = max(viewkeys(graph2["levels"])) else: graph1["max_level"] = -1 graph2["max_level"] = -1 return nodes1, nodes2
def prepare_graph(graph1, graph2): """Fix ids on graphs, match the <main> node Return hashable nodes1 and nodes2 from g1 and g2, respectively """ fix_caller_id(graph1) fix_caller_id(graph2) nodes1 = [HashableDict(x) for x in graph1["nodes"]] nodes2 = [HashableDict(x) for x in graph2["nodes"]] graph1["hnodes"], graph2["hnodes"] = nodes1, nodes2 graph1["node_indexes"] = set(range(len(nodes1))) graph2["node_indexes"] = set(range(len(nodes2))) graph1["levels"] = defaultdict(set) graph2["levels"] = defaultdict(set) for node in nodes1: graph1["levels"][node["node"]["level"]].add(node["index"]) for node in nodes2: graph2["levels"][node["node"]["level"]].add(node["index"]) if nodes1 and nodes2: nodes1[0]["name"] = "<main>" nodes2[0]["name"] = "<main>" graph1["max_level"] = max(viewkeys(graph1["levels"])) graph2["max_level"] = max(viewkeys(graph2["levels"])) else: graph1["max_level"] = -1 graph2["max_level"] = -1 return nodes1, nodes2
def testFilterInclExclDirectories(self): "Test MSUnmerged with including and excluding directories filter" toDeleteDict = { "/store/unmerged/express/prod/2020/1/12": [ "/store/unmerged/express/prod/2020/1/12/log8.tar", "/store/unmerged/express/prod/2020/1/12/log9.tar" ] } rseData = getBasicRSEData() self.msUnmerged.msConfig['dirFilterIncl'] = [ "/store/unmerged/data/prod/2018/", "/store/unmerged/express" ] self.msUnmerged.msConfig['dirFilterExcl'] = [ "/store/unmerged/logs", "/store/unmerged/data/prod", "/store/unmerged/alan/prod" ] self.msUnmerged.protectedLFNs = set() filterData = self.msUnmerged.filterUnmergedFiles(rseData) self.assertEqual(filterData['counters']['dirsToDeleteAll'], 1) self.assertItemsEqual(viewkeys(filterData['files']['toDelete']), viewkeys(toDeleteDict)) self.assertItemsEqual( list(filterData['files']['toDelete'] ['/store/unmerged/express/prod/2020/1/12']), toDeleteDict['/store/unmerged/express/prod/2020/1/12'])
def main(): """ Expects a dataset name as input argument. It then queries Rucio and DBS and compare their blocks and number of files. """ if len(sys.argv) != 2: print("A dataset name must be provided in the command line") sys.exit(1) datasetName = sys.argv[1] logger = loggerSetup(logging.INFO) rucioOutput = getFromRucio(datasetName, logger) dbsOutput, dbsFilesCounter = getFromDBS(datasetName, logger) logger.info("*** Dataset: %s", datasetName) logger.info("Rucio file count : %s", sum(viewvalues(rucioOutput))) logger.info("DBS file count : %s", dbsFilesCounter['valid'] + dbsFilesCounter['invalid']) logger.info(" - valid files : %s", dbsFilesCounter['valid']) logger.info(" - invalid files : %s", dbsFilesCounter['invalid']) logger.info("Blocks in Rucio but not in DBS: %s", set(viewkeys(rucioOutput)) - set(viewkeys(dbsOutput))) logger.info("Blocks in DBS but not in Rucio: %s", set(viewkeys(dbsOutput)) - set(viewkeys(rucioOutput))) for blockname in rucioOutput: if blockname not in dbsOutput: logger.error("This block does not exist in DBS: %s", blockname) continue if rucioOutput[blockname] != sum(viewvalues(dbsOutput[blockname])): logger.warning("Block with file mismatch: %s", blockname) logger.warning("\tRucio: %s\t\tDBS: %s", rucioOutput[blockname], sum(viewvalues(dbsOutput[blockname])))
def oob_list(session, mode, *args, **kwargs): """ Called with the `LIST <MODE>` MSDP command. Args: session (Session): The Session asking for the information mode (str): The available properties. One of "COMMANDS" Request an array of commands supported by the server. "LISTS" Request an array of lists supported by the server. "CONFIGURABLE_VARIABLES" Request an array of variables the client can configure. "REPORTABLE_VARIABLES" Request an array of variables the server will report. "REPORTED_VARIABLES" Request an array of variables currently being reported. "SENDABLE_VARIABLES" Request an array of variables the server will send. Examples: oob in: LIST COMMANDS oob out: (COMMANDS, (SEND, REPORT, LIST, ...) """ mode = mode.upper() if mode == "COMMANDS": session.msg(oob=("COMMANDS", ("LIST", "REPORT", "UNREPORT", # "RESET", "SEND"))) elif mode == "REPORTABLE_VARIABLES": session.msg(oob=("REPORTABLE_VARIABLES", tuple(key for key in viewkeys(OOB_REPORTABLE)))) elif mode == "REPORTED_VARIABLES": # we need to check so as to use the right return value depending on if it is # an Attribute (identified by tracking the db_value field) or a normal database field # reported is a list of tuples (obj, propname, args, kwargs) reported = OOB_HANDLER.get_all_monitors(session) reported = [rep[0].key if rep[1] == "db_value" else rep[1] for rep in reported] session.msg(oob=("REPORTED_VARIABLES", reported)) elif mode == "SENDABLE_VARIABLES": session.msg(oob=("SENDABLE_VARIABLES", tuple(key for key in viewkeys(OOB_REPORTABLE)))) elif mode == "CONFIGURABLE_VARIABLES": # Not implemented (game specific) oob_error(session, "Not implemented (game specific)") else: # mode == "LISTS" or not given session.msg(oob=("LISTS",("REPORTABLE_VARIABLES", "REPORTED_VARIABLES", # "CONFIGURABLE_VARIABLES", "SENDABLE_VARIABLES")))
def test__iter_stat_sources(self): st = fbstat.StatStorage("test/fess/data/test1.stats", [ "test/fess/data/fallback1.stats", "test/fess/data/fallback2.stats" ]) source_iter = st._iter_stat_sources() first = next(source_iter) self.assertEqual(viewkeys(first["stem"]), {5}) self.assertEqual(len(first["stem"][5]), 1) second = next(source_iter) self.assertEqual(viewkeys(second["stem"]), {6}) self.assertEqual(len(second["stem"][6]), 1) third = next(source_iter) self.assertEqual(viewkeys(third["stem"]), {5, 10}) self.assertEqual(len(third["stem"][5]), 2)
def calcDistMatchArr(matchArr, tKey, mKey): """Calculate the euclidean distance of all array positions in "matchArr". :param matchArr: a dictionary of ``numpy.arrays`` containing at least two entries that are treated as cartesian coordinates. :param tKey: #TODO: docstring :param mKey: #TODO: docstring :returns: #TODO: docstring {'eucDist': numpy.array([eucDistance, eucDistance, ...]), 'posPairs': numpy.array([[pos1, pos2], [pos1, pos2], ...]) } """ #Calculate all sorted list of all eucledian feature distances matchArrSize = listvalues(matchArr)[0].size distInfo = {'posPairs': list(), 'eucDist': list()} _matrix = numpy.swapaxes(numpy.array([matchArr[tKey], matchArr[mKey]]), 0, 1) for pos1 in range(matchArrSize - 1): for pos2 in range(pos1 + 1, matchArrSize): distInfo['posPairs'].append((pos1, pos2)) distInfo['posPairs'] = numpy.array(distInfo['posPairs']) distInfo['eucDist'] = scipy.spatial.distance.pdist(_matrix) distSort = numpy.argsort(distInfo['eucDist']) for key in list(viewkeys(distInfo)): distInfo[key] = distInfo[key][distSort] return distInfo
def InitFromLSTMParams(lstm_pblobs, param_values): ''' Set the parameters of LSTM based on predefined values ''' weight_params = GetLSTMParamNames()['weights'] bias_params = GetLSTMParamNames()['biases'] for input_type in viewkeys(param_values): weight_values = [ param_values[input_type][w].flatten() for w in weight_params ] wmat = np.array([]) for w in weight_values: wmat = np.append(wmat, w) bias_values = [ param_values[input_type][b].flatten() for b in bias_params ] bm = np.array([]) for b in bias_values: bm = np.append(bm, b) weights_blob = lstm_pblobs[input_type]['weights'] bias_blob = lstm_pblobs[input_type]['biases'] cur_weight = workspace.FetchBlob(weights_blob) cur_biases = workspace.FetchBlob(bias_blob) workspace.FeedBlob( weights_blob, wmat.reshape(cur_weight.shape).astype(np.float32)) workspace.FeedBlob( bias_blob, bm.reshape(cur_biases.shape).astype(np.float32))
def __lt__(self, other): if not isinstance(other, self.__class__): return NotImplemented if self.quality < other.quality: return True elif self.quality > other.quality: return False if self.type != other.type: return self.type is STAR if self.subtype != other.subtype: return self.subtype is STAR return viewkeys(self.parameters) < viewkeys(other.parameters)
def __init__(self, url, dispatcher=None, agent=None, logger_name=__name__): super(ServiceStatusClient, self).__init__(url) self._state_lock = Lock() self._rpc_lock = Lock() self._rpc_dispatcher = dispatcher self._queued_rpcs = {} self._logger = logging.getLogger(logger_name) self.services = {} self._name_map = {} self._on_change_callback = None # Register callbacks for all of the status notifications self.add_message_type(command_formats.ServiceStatusChanged, self._on_status_change) self.add_message_type(command_formats.ServiceAdded, self._on_service_added) self.add_message_type(command_formats.HeartbeatReceived, self._on_heartbeat) self.add_message_type(command_formats.NewMessage, self._on_message) self.add_message_type(command_formats.NewHeadline, self._on_headline) self.add_message_type(command_formats.RPCCommand, self._on_rpc_command) self.add_message_type(command_formats.RPCResponse, self._on_rpc_response) self.start() with self._state_lock: self.services = self.sync_services() for i, name in enumerate(viewkeys(self.services)): self._name_map[i] = name if agent is not None: self.register_agent(agent)
def __eq__(self, other): """Approximate numerical equality.""" if not isinstance(other, type(self)): return NotImplemented for term in viewkeys(self.terms) | viewkeys(other.terms): if term in self.terms and term in other.terms: if not numpy.isclose(self.terms[term], other.terms[term]): return False elif term in self.terms: if not numpy.isclose(self.terms[term], 0.0): return False else: if not numpy.isclose(other.terms[term], 0.0): return False return True
def update(dic1, dic2): '''update dic1 with dic2 recursively''' dickeys = {k: dic2.pop(k) for k in viewkeys(dic1) if isinstance(dic1[k], dict) and isinstance(dic2.get(k, None), dict)} dic1.update(dic2) for k in dickeys: update(dic1[k], dickeys[k])
def clean_object_caches(obj): """ Clean all object caches on the given object. Args: obj (Object instace): An object whose caches to clean. Notes: This is only the contents cache these days. """ global _TYPECLASSMODELS, _OBJECTMODELS if not _TYPECLASSMODELS: from evennia.typeclasses import models as _TYPECLASSMODELS if not obj: return # contents cache try: _SA(obj, "_contents_cache", None) except AttributeError: pass # on-object property cache [_DA(obj, cname) for cname in viewkeys(obj.__dict__) if cname.startswith("_cached_db_")] try: hashid = _GA(obj, "hashid") _TYPECLASSMODELS._ATTRIBUTE_CACHE[hashid] = {} except AttributeError: pass
def ShiftActivationDevices(model, activations, shifts): ''' Function to enable simple model-parallellism for data_parallel_model models. 'shifts' is a dictionary from_gpu -> to_gpu, and activations is a list of activation blobs (wout gpu_x/ prefix -- use GetActivationBlobs()). Operators handling these activations are shifted to the gpu declared in 'shifts'. Also related operators such as gradient operators will be moved. Appropriate copy-ops are inserted. This allows shifting memory usage from one gpu to another, enabling bigger models to be trained. ''' assert set(viewvalues(shifts)).intersection(set(viewkeys(shifts))) == set() for from_device, to_device in viewitems(shifts): log.info( "Shifting {} activations from {} --> {}". format(len(activations), from_device, to_device) ) _ShiftActivationDevices(model, activations, from_device, to_device) param_init_net, blob_to_device = core.InjectCrossDeviceCopies(model.param_init_net) net, _blob_to_device = core.InjectCrossDeviceCopies(model.net, blob_to_device) model.param_init_net = param_init_net model.net = net
def calcDistMatchArr(matchArr, tKey, mKey): """Calculate the euclidean distance of all array positions in "matchArr". :param matchArr: a dictionary of ``numpy.arrays`` containing at least two entries that are treated as cartesian coordinates. :param tKey: #TODO: docstring :param mKey: #TODO: docstring :returns: #TODO: docstring {'eucDist': numpy.array([eucDistance, eucDistance, ...]), 'posPairs': numpy.array([[pos1, pos2], [pos1, pos2], ...]) } """ #Calculate all sorted list of all eucledian feature distances matchArrSize = listvalues(matchArr)[0].size distInfo = {'posPairs': list(), 'eucDist': list()} _matrix = numpy.swapaxes(numpy.array([matchArr[tKey], matchArr[mKey]]), 0, 1) for pos1 in range(matchArrSize-1): for pos2 in range(pos1+1, matchArrSize): distInfo['posPairs'].append((pos1, pos2)) distInfo['posPairs'] = numpy.array(distInfo['posPairs']) distInfo['eucDist'] = scipy.spatial.distance.pdist(_matrix) distSort = numpy.argsort(distInfo['eucDist']) for key in list(viewkeys(distInfo)): distInfo[key] = distInfo[key][distSort] return distInfo
def loadByID(self, configID): """ _loadByID_ Load a document from the server given its couchID """ try: self.document = self.database.document(id=configID) if 'owner' in self.document: self.connectUserGroup( groupname=self.document['owner'].get('group', None), username=self.document['owner'].get('user', None)) if '_attachments' in self.document: # Then we need to load the attachments for key in viewkeys(self.document['_attachments']): self.loadAttachment(name=key) except CouchNotFoundError as ex: msg = "Document with id %s not found in couch\n" % (configID) msg += str(ex) msg += str(traceback.format_exc()) logging.error(msg) raise ConfigCacheException(message=msg) except Exception as ex: msg = "Error loading document from couch\n" msg += str(ex) msg += str(traceback.format_exc()) logging.error(msg) raise ConfigCacheException(message=msg) return
def ShiftActivationDevices(model, activations, shifts): ''' Function to enable simple model-parallellism for data_parallel_model models. 'shifts' is a dictionary from_gpu -> to_gpu, and activations is a list of activation blobs (wout gpu_x/ prefix -- use GetActivationBlobs()). Operators handling these activations are shifted to the gpu declared in 'shifts'. Also related operators such as gradient operators will be moved. Appropriate copy-ops are inserted. This allows shifting memory usage from one gpu to another, enabling bigger models to be trained. ''' assert set(viewvalues(shifts)).intersection(set(viewkeys(shifts))) == set() for from_device, to_device in viewitems(shifts): log.info("Shifting {} activations from {} --> {}".format( len(activations), from_device, to_device)) _ShiftActivationDevices(model, activations, from_device, to_device) param_init_net, blob_to_device = core.InjectCrossDeviceCopies( model.param_init_net) net, _blob_to_device = core.InjectCrossDeviceCopies( model.net, blob_to_device) model.param_init_net = param_init_net model.net = net
def write(self, segment_id, result): # result is surely not None csvwriter, isdict, seg_id_colname = \ self.csvwriter, self.csvwriterisdict, SEGMENT_ID_COLNAME if csvwriter is None: # instantiate writer according to first input isdict = self.csvwriterisdict = isinstance(result, dict) # write first column(s): if isdict: # we need to pass a list and not an iterable cause the iterable needs # to be consumed twice (the doc states differently, however...): fieldnames = [seg_id_colname] fieldnames.extend(viewkeys(result)) csvwriter = self.csvwriter = csv.DictWriter( self.outputfilehandle, fieldnames=fieldnames, **self.csvwriterkwargs) # write header if we need it (file does not exists, append is False, or # file exist, append=True but file has no row): if not self.append or self.outputfileempty: csvwriter.writeheader() else: csvwriter = self.csvwriter = csv.writer( self.outputfilehandle, **self.csvwriterkwargs) if isdict: result[seg_id_colname] = segment_id else: # we might have numpy arrays, we should support variable types (numeric, strings,..) res = [segment_id] res.extend(result) result = res csvwriter.writerow(result)
def write_gul_input_files( exposure_fp, keys_fp, target_dir, exposure_profile=get_default_exposure_profile(), oasis_files_prefixes={ 'items': 'items', 'complex_items': 'complex_items', 'coverages': 'coverages', 'gulsummaryxref': 'gulsummaryxref' }, write_inputs_table_to_file=False): """ Writes the standard Oasis GUL input files, namely:: items.csv coverages.csv gulsummaryxref.csv with the addition of a complex items file in case of a complex/custom model """ # Clean the target directory path target_dir = as_path(target_dir, 'Target IL input files directory', is_dir=True, preexists=False) gul_inputs_df, exposure_df = get_gul_input_items( exposure_fp, keys_fp, exposure_profile=exposure_profile) if write_inputs_table_to_file: gul_inputs_df.to_csv(path_or_buf=os.path.join(target_dir, 'gul_inputs.csv'), index=False, encoding='utf-8', chunksize=1000) if not gul_inputs_df[['model_data']].any().any(): gul_inputs_df.drop(['model_data'], axis=1, inplace=True) if oasis_files_prefixes.get('complex_items'): oasis_files_prefixes.pop('complex_items') gul_input_files = { k: os.path.join(target_dir, '{}.csv'.format(oasis_files_prefixes[k])) for k in viewkeys(oasis_files_prefixes) } concurrent_tasks = (Task(getattr(sys.modules[__name__], 'write_{}_file'.format(f)), args=( gul_inputs_df.copy(deep=True), gul_input_files[f], ), key=f) for f in gul_input_files) num_ps = min(len(gul_input_files), multiprocessing.cpu_count()) for _, _ in multithread(concurrent_tasks, pool_size=num_ps): pass return gul_input_files, gul_inputs_df, exposure_df
def cleanup_bin_directory(directory): """ Clean the tar and binary files. """ for file in chain([TAR_FILE], (f + '.bin' for f in viewkeys(INPUT_FILES))): file_path = os.path.join(directory, file) if os.path.exists(file_path): os.remove(file_path)
def __init__(self, d, sample_size) : if len(d) <= sample_size : super(Sample, self).__init__(d) else : super(Sample, self).__init__({k : d[k] for k in random.sample(viewkeys(d), sample_size)}) self.original_length = len(d)
def __init__(self, d, sample_size, original_length): if len(d) <= sample_size: super(Sample, self).__init__(d) else: super(Sample, self).__init__( {k: d[k] for k in random.sample(viewkeys(d), sample_size)}) self.original_length = original_length
def add_arguments(self): super(NowSetDefault, self).add_arguments() add_arg = self.add_argument add_arg("--model", type=str, default="*", choices=["*"] + list(viewkeys(MetaModel.__classes__)), help="""specifies the model""") add_arg("defaults", nargs=argparse.REMAINDER, help="Default assingments. Use the format var=value")
def visit_group(self, group): nodes = list(viewkeys(group.nodes)) _group = Group() _group.use_id = False _group.initialize(nodes[1].visit(self), nodes[0].visit(self)) for element in nodes[2:]: _group.add_subelement(element.visit(self)) _group.level = group.level return _group
def expectedLabelPosition(peptide, labelStateInfo, sequence=None, modPositions=None): """Returns a modification description of a certain label state of a peptide. :param peptide: Peptide sequence used to calculat the expected label state modifications :param labelStateInfo: An entry of :attr:`LabelDescriptor.labels` that describes a label state :param sequence: unmodified amino acid sequence of :var:`peptide`, if None it is generated by :func:`maspy.peptidemethods.removeModifications()` :param modPositions: dictionary describing the modification state of "peptide", if None it is generated by :func:`maspy.peptidemethods.returnModPositions()` :returns: {sequence position: sorted list of expected label modifications on that position, ... } """ if modPositions is None: modPositions = maspy.peptidemethods.returnModPositions(peptide, indexStart=0 ) if sequence is None: sequence = maspy.peptidemethods.removeModifications(peptide) currLabelMods = dict() for labelPosition, labelSymbols in viewitems(labelStateInfo['aminoAcidLabels']): labelSymbols = aux.toList(labelSymbols) if labelSymbols == ['']: pass elif labelPosition == 'nTerm': currLabelMods.setdefault(0, list()) currLabelMods[0].extend(labelSymbols) else: for sequencePosition in aux.findAllSubstrings(sequence, labelPosition): currLabelMods.setdefault(sequencePosition, list()) currLabelMods[sequencePosition].extend(labelSymbols) if labelStateInfo['excludingModifications'] is not None: for excludingMod, excludedLabelSymbol in viewitems(labelStateInfo['excludingModifications']): if excludingMod not in modPositions: continue for excludingModPos in modPositions[excludingMod]: if excludingModPos not in currLabelMods: continue if excludedLabelSymbol not in currLabelMods[excludingModPos]: continue if len(currLabelMods[excludingModPos]) == 1: del(currLabelMods[excludingModPos]) else: excludedModIndex = currLabelMods[excludingModPos].index(excludedLabelSymbol) currLabelMods[excludingModPos].pop(excludedModIndex) for sequencePosition in list(viewkeys(currLabelMods)): currLabelMods[sequencePosition] = sorted(currLabelMods[sequencePosition]) return currLabelMods
def diff_dict(before, after): """Compare dicts. Return a dict with keys shared by both dicts that have different values key -> [before[key], after[key]] """ result = OrderedDict() for key in viewkeys(before): if key != "id" and before[key] != after[key]: result[key] = [before[key], after[key]] return result
def generate_sample_xml(self, samples=None): """Generates the sample XML file Parameters ---------- samples : list of str, optional The list of samples to be included in the sample xml. If not provided or an empty list is provided, all the samples are used Returns ------- ET.Element Object with sample XML values """ sample_set = ET.Element( 'SAMPLE_SET', { 'xmlns:xsi': self.xmlns_xsi, "xsi:noNamespaceSchemaLocation": self.xsi_noNSL % "sample" }) if not samples: samples = viewkeys(self.samples) for sample_name in sorted(samples): sample_info = dict(self.samples[sample_name]) sample = ET.SubElement( sample_set, 'SAMPLE', { 'alias': self._get_sample_alias(sample_name), 'center_name': qiita_config.ebi_center_name }) sample_title = ET.SubElement(sample, 'TITLE') sample_title.text = escape(clean_whitespace(sample_name)) sample_sample_name = ET.SubElement(sample, 'SAMPLE_NAME') taxon_id = ET.SubElement(sample_sample_name, 'TAXON_ID') text = sample_info.pop('taxon_id') taxon_id.text = escape(clean_whitespace(text)) scientific_name = ET.SubElement(sample_sample_name, 'SCIENTIFIC_NAME') text = sample_info.pop('scientific_name') scientific_name.text = escape(clean_whitespace(text)) description = ET.SubElement(sample, 'DESCRIPTION') text = sample_info.pop('description') description.text = escape(clean_whitespace(text)) if sample_info: sample_attributes = ET.SubElement(sample, 'SAMPLE_ATTRIBUTES') self._add_dict_as_tags_and_values(sample_attributes, 'SAMPLE_ATTRIBUTE', sample_info) return sample_set
def _parse_port(self, port): if port is None or len(port) == 0: return if '@' in port: raise ArgumentError( "Configuration files are not yet supported as part of a port argument", port=port) pairs = port.split(';') for pair in pairs: name, _, value = pair.partition('=') if len(name) == 0 or len(value) == 0: continue name = name.strip() value = value.strip() if name == 'device': device_name = value if device_name in DEVICE_ALIASES: device_name = DEVICE_ALIASES[device_name] if device_name in KNOWN_DEVICES: self._default_device_info = KNOWN_DEVICES.get(device_name) else: raise ArgumentError( "Unknown device name or alias, please select from known_devices", device_name=device_name, known_devices=[x for x in viewkeys(DEVICE_ALIASES)]) elif name == 'serial': self._jlink_serial = value elif name == 'mux': mux = value if mux in KNOWN_MULTIPLEX_FUNCS: self._mux_func = KNOWN_MULTIPLEX_FUNCS[mux] else: raise ArgumentError( "Unknown multiplexer, please select from known_multiplex_funcs", mux=mux, known_multiplex_funcs=[ x for x in viewkeys(KNOWN_MULTIPLEX_FUNCS) ])
def test_output_and_bin_input_files_are_removed(self): with TemporaryDirectory() as d: Path(os.path.join(d, TAR_FILE)).touch() for f in viewvalues(INPUT_FILES): Path(os.path.join(d, f['name'] + '.bin')).touch() cleanup_bin_directory(d) self.assertFalse(os.path.exists(os.path.join(d, TAR_FILE))) for f in viewkeys(INPUT_FILES): self.assertFalse(os.path.exists(os.path.join(d, f + '.bin')))
def _trainClassifier(self, **kwargs): # pragma: no cover labels = numpy.array(self.training_data['label'] == b'match', dtype='int8') examples = self.training_data['distances'] classifier_args = backport.signature(self.classifier.fit).parameters classifier_args = {k : kwargs[k] for k in viewkeys(kwargs) & classifier_args} self.classifier.fit(examples, labels, **classifier_args)
def generate_sample_xml(self, samples=None): """Generates the sample XML file Parameters ---------- samples : list of str, optional The list of samples to be included in the sample xml. If not provided or an empty list is provided, all the samples are used Returns ------- ET.Element Object with sample XML values """ sample_set = ET.Element('SAMPLE_SET', { 'xmlns:xsi': self.xmlns_xsi, "xsi:noNamespaceSchemaLocation": self.xsi_noNSL % "sample"}) if not samples: samples = viewkeys(self.samples) for sample_name in sorted(samples): sample_info = dict(self.samples[sample_name]) sample = ET.SubElement(sample_set, 'SAMPLE', { 'alias': self._get_sample_alias(sample_name), 'center_name': qiita_config.ebi_center_name} ) sample_title = ET.SubElement(sample, 'TITLE') sample_title.text = escape(clean_whitespace(sample_name)) sample_sample_name = ET.SubElement(sample, 'SAMPLE_NAME') taxon_id = ET.SubElement(sample_sample_name, 'TAXON_ID') text = sample_info.pop('taxon_id') taxon_id.text = escape(clean_whitespace(text)) scientific_name = ET.SubElement( sample_sample_name, 'SCIENTIFIC_NAME') text = sample_info.pop('scientific_name') scientific_name.text = escape(clean_whitespace(text)) description = ET.SubElement(sample, 'DESCRIPTION') text = sample_info.pop('description') description.text = escape(clean_whitespace(text)) if sample_info: sample_attributes = ET.SubElement(sample, 'SAMPLE_ATTRIBUTES') self._add_dict_as_tags_and_values(sample_attributes, 'SAMPLE_ATTRIBUTE', sample_info) return sample_set
def getSusceptibility(self, size=None): """ Returns the susceptibility defined as: (Sum_{s != size(gc)} n_s * s * s) / (Sum_{s != size(gc)} n_s * s) Size is the number of nodes in the network. If it is given, it is assumed that communities of size 1 are not included in this community structure. If there is only 0 or 1 community, zero is returned. """ sd = self.getSizeDist() if len(sd) < 1: if size == None or size == 0: return 0.0 else: return 1.0 sizeSum = 0 for key in viewkeys(sd): sizeSum += key * sd[key] #If no size is given, assume that also communities of size 1 are included if size == None: sus = 0 size = sizeSum else: sus = size - sizeSum #s=1 assert (sus >= 0) #Remove largest component gc = max(viewkeys(sd)) sd[gc] = 0 #Calculate the susceptibility for key in viewkeys(sd): sus += key * key * sd[key] if (size - gc) == 0: return 0.0 else: return float(sus) / float(size - gc)
def analyze_step(analyzer, step): proto = step.Proto() with analyzer.set_workspace(do_copy=proto.create_workspace): if proto.report_net: with analyzer.set_workspace(do_copy=True): analyzer(step.get_net(proto.report_net)) all_new_blobs = set() substeps = step.Substeps() + [step.get_net(n) for n in proto.network] for substep in substeps: with analyzer.set_workspace( do_copy=proto.concurrent_substeps) as ws_in: analyzer(substep) if proto.should_stop_blob: analyzer.need_blob(proto.should_stop_blob) if proto.concurrent_substeps: new_blobs = set(viewkeys(ws_in)) - set(viewkeys(analyzer.workspace)) assert len(all_new_blobs & new_blobs) == 0, ( 'Error: Blobs created by multiple parallel steps: %s' % ( ', '.join(all_new_blobs & new_blobs))) all_new_blobs |= new_blobs for x in all_new_blobs: analyzer.define_blob(x)
def _parse_conn_string(self, conn_string): """Parse a connection string passed from 'debug -c' or 'connect_direct' Returns True if any settings changed in the debug port, which would require a jlink disconnection """ disconnection_required = False """If device not in conn_string, set to default info""" if conn_string is None or 'device' not in conn_string: if self._default_device_info is not None and self._device_info != self._default_device_info: disconnection_required = True self._device_info = self._default_device_info if conn_string is None or len(conn_string) == 0: return disconnection_required if '@' in conn_string: raise ArgumentError( "Configuration files are not yet supported as part of a connection string argument", conn_string=conn_string) pairs = conn_string.split(';') for pair in pairs: name, _, value = pair.partition('=') if len(name) == 0 or len(value) == 0: continue name = name.strip() value = value.strip() if name == 'device': if value in DEVICE_ALIASES: device_name = DEVICE_ALIASES[value] if device_name in KNOWN_DEVICES: device_info = KNOWN_DEVICES.get(device_name) if self._device_info != device_info: self._device_info = device_info disconnection_required = True else: raise ArgumentError( "Unknown device name or alias, please select from known_devices", device_name=value, known_devices=[x for x in viewkeys(DEVICE_ALIASES)]) elif name == 'channel': if self._mux_func is not None: if self._channel != int(value): self._channel = int(value) disconnection_required = True else: print( "Warning: multiplexing architecture not selected, channel will not be set" ) return disconnection_required
def _GroupByDevice(model, devices, params, non_data_params): ''' Groups blobs by device, returning a map of [blobname] = {0: BlobRef, 1: ..}. Returns ordered dictionary, ensuring the original order. ''' grouped = OrderedDict() # Only consider params that were created to be "data parallel" params = params[len(non_data_params):] assert len(params) % len(devices) == 0,\ "There should be equal number of params per device" num_params_per_device = int(len(params) / len(devices)) for i, p in enumerate(params): assert isinstance(p, core.BlobReference) or \ isinstance(p, core.GradientSlice), \ "Param {} is not BlobReference or GradientSlice".format(p) name = stripParamName(p) gpuid = devices[i // num_params_per_device] if isinstance(p, core.BlobReference): assert "{}_{}/".format(model._device_prefix, gpuid) in p.GetNameScope(),\ "Param {} expected to have namescope '{}_{}'".format(str(p), model._device_prefix, gpuid) else: assert "{}_{}/".format(model._device_prefix, gpuid) in p.indices.GetNameScope(),\ "Indices {} expected to have namescope '{}_{}'".format(str(p), model._device_prefix, gpuid) assert "{}_{}/".format(model._device_prefix, gpuid) in p.values.GetNameScope(),\ "Values {} expected to have namescope '{}_{}'".format(str(p), model._device_prefix, gpuid) if name not in grouped: grouped[name] = {} grouped[name][gpuid] = p # Confirm consistency for j, (p, ps) in enumerate(viewitems(grouped)): assert \ len(ps) == len(devices), \ "Param {} does not have value for each device (only {}: {})".format( p, len(ps), ps, ) # Ensure ordering if (ps[devices[0]] != params[j]): log.error("Params: {}".format(params)) log.error("Grouped: {}".format(list(viewkeys(grouped)))) assert ps[devices[0]] == params[j], \ "Incorrect ordering: {}".format(ps) return grouped
def execute(self, func, line, cell, magic_cls): formatter = DollarFormatter() cell = formatter.vformat(cell, args=[], kwargs=magic_cls.shell.user_ns.copy()) _, args = self.arguments(func, line) result = relational.query(text_to_native_str(cell)) if args.result: magic_cls.shell.user_ns[args.result] = result else: result = list(result) table = Table() if result: table.append(list(viewkeys(result[0]))) for line in result: table.append(list(viewvalues(line))) return table
def encode_basic_properties(body_size, props): pieces = [''] * 14 flags = 0 enc = ENCODE_PROPS_BASIC for key in BASIC_PROPS_SET & set(futils.viewkeys(props)): i, f, fun = enc[key] flags |= f pieces[i] = fun(props[key]) return (0x02, join_as_bytes(( pack('!HHQH', CLASS_BASIC, 0, body_size, flags), join_as_bytes(pieces), )))
def modAminoacidsFromLabelInfo(labelDescriptor): """Returns a set of all amino acids and termini which can bear a label, as described in "labelDescriptor". :param labelDescriptor: :class:`LabelDescriptor` describes the label setup of an experiment :returns: #TODO: docstring """ modAminoacids = set() for labelStateEntry in viewvalues(labelDescriptor.labels): for labelPositionEntry in viewkeys(labelStateEntry['aminoAcidLabels']): for modAminoacid in aux.toList(labelPositionEntry): if modAminoacid != '': modAminoacids.add(modAminoacid) return modAminoacids
def __mul__(self, other): if len(self) <= len(other): smaller, larger = self._d, other._d else: smaller, larger = other._d, self._d # it's meaningfully faster to check in the key dictview # of 'larger' than in the dict directly larger_keys = viewkeys(larger) common = {k: v * larger[k] for k, v in viewitems(smaller) if k in larger_keys} return Counter(common)
def print_replaced_attributes(replaced, ignore=("id",), extra=tuple(), names=None): """Print attributes diff""" names = names or {} for (removed, added) in replaced: print(" Name: {}".format(removed.name)) output = [] for key in viewkeys(removed.to_dict(ignore=ignore, extra=extra)): removed_attr = getattr(removed, key) added_attr = getattr(added, key) if removed_attr != added_attr: output.append(" {} changed from {} to {}".format( names.get(key, key.capitalize().replace("_", " ")), removed_attr or "<None>", added_attr or "<None>")) print("\n".join(output)) print()
def _fix_dependencies(self): """Propagate dependencies, removing missing nodes""" created = self.created synonyms = self.synonyms arriving_arrows = self.arriving_arrows departing_arrows = self.departing_arrows removed = ( set(viewvalues(self.variables)) - created - set(viewkeys(synonyms)) ) for variable in removed: variable_is_box = "box--" in variable.name for source, typ_sv in viewitems(arriving_arrows[variable]): if (variable_is_box and "box--" in source.name and not self.config.show_blackbox_dependencies): continue for target, typ_vt in viewitems(departing_arrows[variable]): if variable_is_box and source.type == target.type == "arg": continue typ = typ_sv or typ_vt if not typ and not variable_is_box: typ = "dashed" #del arriving_arrows[target][variable] #del departing_arrows[variable][target] departing_arrows[source][target] = typ arriving_arrows[target][source] = typ del arriving_arrows[variable] for target, typ_vt in viewitems(departing_arrows[variable]): if (variable_is_box and "box--" in target.name and not self.config.show_blackbox_dependencies): continue for source, typ_sv in viewitems(arriving_arrows[variable]): if variable_is_box and source.type == target.type == "arg": continue typ = typ_sv or typ_vt if not typ and not variable_is_box: typ = "dashed" #del arriving_arrows[variable][source] #del departing_arrows[source][variable] departing_arrows[source][target] = typ arriving_arrows[target][source] = typ del departing_arrows[variable]
def getContGroupArrays(arrays, groupPositions, arrayKeys=None): """Convinience function to generate a subset of arrays from specified array positions. :param arrays: a dictionary containing ``numpy.arrays`` :param groupPositions: arrays positions that should be included in the subset of arrays :param arrayKeys: a list of "arrays" keys that should be included in the subset of arrays, if None all keys are selected :returns: a dictionary containing ``numpy.arrays`` """ if arrayKeys is None: arrayKeys = list(viewkeys(arrays)) matchingArrays = dict() for key in arrayKeys: matchingArrays[key] = arrays[key][groupPositions] return matchingArrays
def set_classes_default(mcs, attr, value, instances=False, model="*"): """Set DEFAULT attribute for Model classes that match model filter Arguments: model -- name of model class attr -- attribute name value -- new attribute value Keyword arguments: instances -- update instances too (default=False) model -- filter model (default="*") """ if model == "*": for name in viewkeys(mcs.__classes__): mcs.set_class_default(name, attr, value, instances=instances) else: mcs.set_class_default(model, attr, value, instances=instances)
def _findSamesetProteins(protToPeps, proteins=None): """Find proteins that are mapped to an identical set of peptides. :param protToPeps: dict, for each protein (=key) contains a set of associated peptides (=value). For Example {protein: {peptide, ...}, ...} :param proteins: iterable, proteins that are tested for having equal evidence. If not specified all proteins are tested :returns: a list of sorted protein tuples that share equal peptide evidence """ proteins = viewkeys(protToPeps) if proteins is None else proteins equalEvidence = ddict(set) for protein in proteins: peptides = protToPeps[protein] equalEvidence[tuple(sorted(peptides))].add(protein) equalProteins = list() for proteins in viewvalues(equalEvidence): if len(proteins) > 1: equalProteins.append(tuple(sorted(proteins))) return equalProteins
def neighborhood1(graph1, graph2, mapping, cmp_node): """First neighborhood of VND. Add missing combinations""" tried = set() def add_to_mapping(to_add, new_mapping, swapped): """Add combination to mapping""" nodes1, nodes2 = graph1["hnodes"], graph2["hnodes"] added = [] for node_id1, node_id2 in to_add: if swapped: node_id1, node_id2 = node_id2, node_id1 node1, node2 = nodes1[node_id1], nodes2[node_id2] if cmp_node(node1, node2): added.append((node_id1, node_id2)) new_mapping[node1] = node2 return tuple(added) not_mapped1 = (graph1["node_indexes"] - {n["index"] for n in viewkeys(mapping)}) not_mapped2 = (graph2["node_indexes"] - {n["index"] for n in viewvalues(mapping)}) swapped = False if len(not_mapped2) > len(not_mapped1): swapped = True not_mapped1, not_mapped2 = not_mapped2, not_mapped1 possibilities = [ list(zip(x, not_mapped2)) for x in itertools.permutations(not_mapped1, len(not_mapped2))] for full_map in possibilities: for i in range(1, len(not_mapped2) + 1): for to_add in itertools.combinations(full_map, i): if to_add in tried: continue new_mapping = copy(mapping) to_add = add_to_mapping(to_add, new_mapping, swapped) if to_add in tried: continue tried.add(to_add) yield new_mapping
def from_dict(cls, dictionary): """Create a ``TabularMSA`` from a ``dict``. Parameters ---------- dictionary : dict Dictionary mapping keys to alphabet-aware scikit-bio sequence objects. The ``TabularMSA`` object will have its index labels set to the keys in the dictionary. Returns ------- TabularMSA ``TabularMSA`` object constructed from the keys and sequences in `dictionary`. See Also -------- to_dict sort Notes ----- The order of sequences and index labels in the resulting ``TabularMSA`` object is arbitrary. Use ``TabularMSA.sort`` to set a different order. Examples -------- >>> from skbio import DNA, TabularMSA >>> seqs = {'a': DNA('ACGT'), 'b': DNA('A--T')} >>> msa = TabularMSA.from_dict(seqs) """ # Python 2 and 3 guarantee same order of iteration as long as no # modifications are made to the dictionary between calls: # https://docs.python.org/2/library/stdtypes.html#dict.items # https://docs.python.org/3/library/stdtypes.html# # dictionary-view-objects return cls(viewvalues(dictionary), index=viewkeys(dictionary))
def __getitem__(self, item): """ item can be a tuple or list of ints or strings, or a single int or string. String item is a nested field name, e.g., "a", "a:b", "a:b:c". Int item is the index of a field at the first level of the Struct. """ if isinstance(item, list) or isinstance(item, tuple): keys = list(viewkeys(self.fields)) return Struct( * [ ( keys[k] if isinstance(k, int) else k, self[k] ) for k in item ] ) elif isinstance(item, int): return next(islice(viewvalues(self.fields), item, None)) else: field = self._get_field_by_nested_name(item) if field is None: raise KeyError('field "%s" not found' % (item)) return field
def loadBinaryItemContainer(zippedfile, jsonHook): """Imports binaryItems from a zipfile generated by :func:`writeBinaryItemContainer`. :param zipfile: can be either a path to a file (a string) or a file-like object :param jsonHook: a custom decoding function for JSON formated strings of the binaryItems stored in the zipfile. :returns: a dictionary containing binaryItems ``{binaryItem.id: binaryItem, ... }`` """ binaryItemContainer = dict() with zipfile.ZipFile(zippedfile, 'r') as containerZip: #Convert the zipfile data into a str object, necessary since #containerZip.read() returns a bytes object. metadataText = io.TextIOWrapper(containerZip.open('metadata'), encoding='utf-8' ).read() allMetadata = json.loads(metadataText, object_hook=jsonHook) metadataIndex = [str(_) for _ in sorted([int(i) for i in viewkeys(allMetadata) ]) ] binarydataFile = containerZip.open('binarydata') for index in metadataIndex: binaryItem = allMetadata[index][0] for binaryMetadata in allMetadata[index][1]: arrayKey = binaryMetadata['arrayKey'] rawdata = binarydataFile.read(binaryMetadata['end'] - binaryMetadata['start'] ) array = _arrayFromBytes(rawdata, binaryMetadata) binaryItem.arrays[arrayKey] = array binaryItemContainer[binaryItem.id] = binaryItem return binaryItemContainer
def __eq__(self, other): log.debug("Testing equality") if type(self) != type(other): log.debug("Typecheck failed") return NotImplemented if viewkeys(self._elem_names) != viewkeys(other._elem_names): log.debug("Keys different: self only: {}, other only: {}".format( viewkeys(self._elem_names) - viewkeys(other._elem_names), viewkeys(other._elem_names) - viewkeys(self._elem_names))) return False if np.all(np.isnan(self._coordinates)) and np.all(np.isnan(other._coordinates)): log.debug("True: All is NAN") return True for key in self: if not np.allclose(self[key], other[key]): log.debug("Values for key {} different: {}!={}".format( key, self[key], other[key])) return False log.debug("Equal!") return True
def position_entropies(self, base=None, nan_on_non_standard_chars=True): """Return Shannon entropy of positions in Alignment Parameters ---------- base : float, optional log base for entropy calculation. If not passed, default will be e (i.e., natural log will be computed). nan_on_non_standard_chars : bool, optional if True, the entropy at positions containing characters outside of the first sequence's `iupac_standard_characters` will be `np.nan`. This is useful, and the default behavior, as it's not clear how a gap or degenerate character should contribute to a positional entropy. This issue was described in [1]_. Returns ------- list List of floats of Shannon entropy at `Alignment` positions. Shannon entropy is defined in [2]_. See Also -------- position_counters position_frequencies References ---------- .. [1] Identifying DNA and protein patterns with statistically significant alignments of multiple sequences. Hertz GZ, Stormo GD. Bioinformatics. 1999 Jul-Aug;15(7-8):563-77. .. [2] A Mathematical Theory of Communication CE Shannon The Bell System Technical Journal (1948). Examples -------- >>> from skbio.core.alignment import Alignment >>> from skbio.core.sequence import DNA >>> sequences = [DNA('AC--', id="seq1"), ... DNA('AT-C', id="seq2"), ... DNA('TT-C', id="seq3")] >>> a1 = Alignment(sequences) >>> print(a1.position_entropies()) [0.63651416829481278, 0.63651416829481278, nan, nan] """ result = [] # handle empty Alignment case if self.is_empty(): return result iupac_standard_characters = self[0].iupac_standard_characters() for f in self.position_frequencies(): if (nan_on_non_standard_chars and len(viewkeys(f) - iupac_standard_characters) > 0): result.append(np.nan) else: result.append(entropy(list(f.values()), base=base)) return result