def _recursivemod(self, obj, parent=''): r"""SUMMARY @Return: """ if '' == parent and hasattr(obj, '__name__'): parent = obj.__name__ for memname, subobj in _inspect.getmembers(obj): if memname.startswith('_') and memname.endswith('__'): continue if _inspect.isfunction(subobj) or _inspect.ismethod(subobj): yield _defaultdict(str, name=parent + '.' + memname, file=getsafeabsfile(subobj), type=type(subobj), definition=getdef(subobj, parent + '.' + memname), summary=getdocsummary(subobj), doc=(_inspect.getdoc(subobj) or '')) if _inspect.isclass(subobj): yield _defaultdict(str, name=parent + '.' + memname, file=getsafeabsfile(subobj), type=type(subobj), definition=parent + '.' + memname + '()', summary=getdocsummary(subobj), doc=(_inspect.getdoc(subobj) or '')) self._recursivemod(subobj, parent=parent + '.' + memname + '()')
def __init__(self, tags): if not isinstance(tags, Tags): raise ValueError("Need an instance of Tags for construction.") self._nodes = _defaultdict(dict) self._ways = _defaultdict(dict) self._relations = _defaultdict(dict) self._populate(tags.from_nodes, self._nodes) self._populate(tags.from_ways, self._ways) self._populate(tags.from_relations, self._relations)
def __init__(self, file): self.from_nodes = _defaultdict(list) self.from_ways = _defaultdict(list) self.from_relations = _defaultdict(list) lookup = {"node" : self.from_nodes, "way": self.from_ways, "relation": self.from_relations } for element in _all_elements(file): d = lookup[element.name] for key_pair in element.tags.items(): d[key_pair].append(element.osm_id)
def __init__(self, csvName=None, googDrive=None, url=None): self.d = _defaultdict(lambda: _defaultdict(list)) if csvName: self._load_csv(csvName) elif googDrive: self._load_googDrive(googDrive) elif url: self._load_url(url) else: raise ValueError("Ba-durt")
def walk(self, obj, parent=''): r"""Generator for module info. @Arguments: - `parent`: @Return: collections.defaultdict include name, file, type, definition, object, summary, doc """ # for definitions if '' == parent and hasattr(obj, '__name__'): parent = obj.__name__ yield _defaultdict(str, name=parent, file=getsafeabsfile(obj), type=type(obj), definition=parent, object=obj, summary=(getdocsummary(obj) or ''), doc=(_inspect.getdoc(obj) or '')) # for subobj in obj for modname, subobj in _inspect.getmembers(obj): if modname.startswith('_'): continue # check hash for unique if self._unique: try: hash_ = hash(subobj) if hash_ in self._hashes: continue self._hashes.append(hash_) except TypeError: # unhashable type continue subparent = '.'.join([parent, modname]) if _inspect.ismodule(subobj) or _inspect.isclass(subobj): # recursively call for dic_ in self.walk(subobj, parent=subparent): # if os.path.basedir(subobj.__file__) == : yield dic_ else: yield _defaultdict(str, name=modname, file=getsafeabsfile(subobj), type=type(subobj), definition=getdef(subobj, subparent), object=subobj, summary=(getdocsummary(subobj) or ''), doc=(_inspect.getdoc(subobj) or ''))
def parse_builtin_functions(key, inname=True, insummary=True, infulldoc=False): r"""SUMMARY @Arguments: - `key`: - `name`: - `summary`: - `doc`: @Return: """ for bname, builtin in getbuiltins(): # prevent not callable and started with '_' if not callable(builtin) or bname.startswith('_'): continue # prevent not match name, summary or doc if not ((inname and _re.search(key, bname)) or (insummary and _re.search(key, getdocsummary(builtin))) or (infulldoc and _re.search(key, _getdoc(builtin)))): continue yield _defaultdict(str, name=bname, type=type(builtin), summary=getdocsummary(builtin), doc=_inspect.getdoc(builtin))
def parse_builtin_modules(key, inname=True, insummry=True, infulldoc=False): r"""SUMMARY @Arguments: - `key`: - `name`: - `summry`: - `detail`: - `source`: @Return: """ for modname in _sys.builtin_module_names: if modname == '__main__': continue obj = __import__(modname) # name match or summary match, docstring match if ((inname and _re.search(key, modname)) or (insummry and _re.search(key, getdocsummary(obj))) or (infulldoc and _re.search(key, _getdoc(obj)))): yield _defaultdict(str, name=modname, type=type(obj), file='', definition='', summary=getdocsummary(obj), doc=_inspect.getdoc(obj))
def show(self): """ Show the plots. """ _plt.show() self.__current_widgets = _defaultdict( lambda: []) # clean up the widgets.
def _resolve_GO_terms(terms_set, query_time): if not terms_set: return {} terms_set_copy = set(terms_set) terms_set = None # prevent accidental side effects resolved = {} # make a list to avoid modification while iterating for chunk in _chunkiter(list(terms_set_copy), _MAX_RE_QUERY_SIZE): c = list(chunk) # iterator to list res = _stored_query('GO_get_terms', {'ids': c, 'ts': query_time}) t_to_key = {t['id']: t['_key'] for t in res['results']} for t in c: if t in t_to_key: resolved[t] = t_to_key[t] terms_set_copy.remove(t) replaced_by = {} for chunk in _chunkiter(terms_set_copy, _MAX_RE_QUERY_SIZE): c = list(chunk) res = _stored_query('GO_get_merges_from', {'froms': c}) from_to_time = _defaultdict(list) # type: dict for e in res['results']: from_to_time[e['from']].append((e['to'], e['created'])) for f in from_to_time.keys(): to = sorted(from_to_time[f], key=lambda tt: tt[1])[-1] # get most recent edge replaced_by[f] = to[0] terms_set_copy = None # type: ignore res = _resolve_GO_terms(set(replaced_by.values()), query_time) for old, new in replaced_by.items(): if new in res: resolved[old] = res[new] return resolved
def main(): ws = _Workspace(_URL) wsinfo = ws.get_workspace_info({'id': _REF_WS}) ws_size = wsinfo[4] print('Processing workspace {} ({}) with {} objects'.format( wsinfo[1], _REF_WS, ws_size)) print() types = _defaultdict(int) with open(_OUT_FILE, 'w') as output: for i in xrange(0, ws_size, 10000): print('Processing objects from {} to {}'.format(i, i + 9999)) start = _time.time() objs = ws.list_objects({ 'ids': [_REF_WS], 'minObjectID': i, 'maxObjectID': i + 9999 }) end = _time.time() print('Got {} objects back in {} sec'.format( len(objs), end - start)) for o in objs: type = o[2] types[type] += 1 ref = str(o[6]) + '/' + str(o[0]) + '/' + str(o[4]) if type.split('-')[0] != _TYPE: print('Skipping {}, {}'.format(ref, type)) else: md5 = o[8] name = o[1] output.write('{} {} {} {}\n'.format(ref, name, md5, type)) print() print('Saw types:') for type, count in types.items(): print('{} {}'.format(type, count))
def _name_steps(steps, default='alt'): """Generate names for estimators.""" steps = [estimators if isinstance(estimators, list) else [estimators] for estimators in steps] names = [] for estimators in steps: estimators = estimators[:] if len(estimators) > 1: while None in estimators: estimators.remove(None) step_names = {type(estimator).__name__.lower() for estimator in estimators} if len(step_names) > 1: names.append(default) else: names.append(step_names.pop()) namecount = _defaultdict(int) for name in names: namecount[name] += 1 for k, v in list(namecount.items()): if v == 1: del namecount[k] for i in reversed(range(len(names))): name = names[i] if name in namecount: names[i] += "-%d" % namecount[name] namecount[name] -= 1 named_steps = list(zip(names, [step[0] for step in steps])) grid = {k: v for k, v in zip(names, steps) if len(v) > 1} return named_steps, grid
def check_primer_list(primerlist: list): """docstring.""" pl = primerlist unique_seqs = set(str(p.seq).lower() for p in pl) msg = f"{len(pl)} primers, {len(unique_seqs)} unique primer sequences\n" defined = [p for p in pl if set(p.seq.lower()) != set("n")] msg += f"{len(pl) - len(defined)} primer(s) without sequence (N)" for i, p in enumerate(pl): if not p.name.startswith(str(i)): msg += f"\nWrong number: {i} {p.format('tab')}" dct = _defaultdict(list) for u in unique_seqs: for p in defined: if u == str(p.seq).lower(): dct[u].append(p.name) for seq, names in dct.items(): if len(names) > 1: msg += " ".join(names) msg += f" {seq}\n" return _pretty_str(msg.strip())
def __setstate__(self, state_dict): cirIndexKeys = [cgs.expand() for cgs in state_dict['cirIndexKeys']] self.cirIndex = _OrderedDict( list(zip(cirIndexKeys, state_dict['cirIndexVals']))) self.olIndex = state_dict['olIndex'] self.oliDict = state_dict['oliDict'] self.timeDict = state_dict['timeDict'] self.repDict = state_dict['repDict'] self.collisionActions = state_dict['collisionActions'] self.comments = state_dict['comments'] self.comment = state_dict['comment'] self.auxInfo = state_dict.get('auxInfo', _defaultdict(dict)) if not isinstance(self.auxInfo, _defaultdict) and isinstance( self.auxInfo, dict): self.auxInfo = _defaultdict(dict, self.auxInfo)
def __init__(self, site_url, input_dir, output_dir, home=None): self.site_url = site_url self.input_dir = input_dir self.output_dir = output_dir self.home = home self.verbose = False self.quiet = False self.config_dir = _join(self.input_dir, "_transom") self.config_file = _join(self.config_dir, "config.py") self.config = None self.outer_template_path = _join(self.config_dir, "outer-template.html") self.inner_template_path = _join(self.config_dir, "inner-template.html") self.input_files = list() self.output_files = list() self.config_files = list() self.links = _defaultdict(set) self.link_targets = set() self.ignored_file_patterns = [ "*/.git", "*/.svn", ] self.ignored_link_patterns = list() self._markdown_converter = _markdown.Markdown(extras=_markdown_extras) self.start_time = None
def _iter_overlapping_pairs(contigsof, threshold=0.5): """This creates a generator that yields (clst1, clst2), overlap for all pairs of clusters with nonzero overlap.""" pairs = set() clustersof = _defaultdict(list) for clustername, contigs in contigsof.items(): for contig in contigs: clustersof[contig].append(clustername) for clusterlist in clustersof.values(): pairs.update(set(_itertools.combinations(clusterlist, 2))) del clustersof while pairs: cluster1, cluster2 = pairs.pop() contigs1 = contigsof[cluster1] contigs2 = contigsof[cluster2] intersection = contigs1.intersection(contigs2) overlap = len(intersection) / min(len(contigs1), len(contigs2)) if overlap >= threshold: yield (cluster1, cluster2), overlap
def _setup(self, time: str): """Context setup / teardown, initializes internal attributes. Args: time: name of the time period """ self._mazs = None self._demand = _defaultdict(lambda: []) self._max_dist = 0 self._network = None self._root_index = None self._leaf_index = None attributes = [ ("LINK", "@link_cost", "total cost MAZ-MAZ"), ("LINK", "@link_cost_maz", "cost MAZ-MAZ, unused MAZs blocked"), ("NODE", "@maz_root", "Flag for MAZs which are roots"), ("NODE", "@maz_leaf", "Flag for MAZs which are leaves"), ] with self.controller.emme_manager.temp_attributes_and_restore( self._scenario, attributes): try: yield finally: if not self._debug: self._mazs = None self._demand = None self._network = None self._root_index = None self._leaf_index = None # delete sp path files for bin_no in range(len(self._bin_edges)): file_path = os.path.join(self._eb_dir, f"sp_{time}_{bin_no}.ebp") if os.path.exists(file_path): os.remove(file_path)
def init_input_files(self, input_paths): for input_path in input_paths: if not self._is_ignored_file(input_path): file_ = self._create_file(input_path) file_.init() index_files = dict() other_files = _defaultdict(list) for file_ in self.output_files.values(): path = file_.output_path[len(self.output_dir):] dir_, name = _split(path) if name == "index.html": index_files[dir_] = file_ else: other_files[dir_].append(file_) for dir_ in index_files: parent_dir = _split(dir_)[0] if parent_dir == "/": continue file_ = index_files[dir_] file_.parent = index_files.get(parent_dir) for dir_ in other_files: parent = index_files.get(dir_) for file_ in other_files[dir_]: file_.parent = parent
def sections_by_group(self): result = _defaultdict(list) for section in _SafeConfigParser.sections(self): group, key = self.parse_section(section) result[group].append(section) return result
def find_duplicates(path: Path, filter_fun=lambda file: True): files_by_size = _defaultdict(list) files_by_small_hash = _defaultdict(list) for file in path.rglob("*"): if file.is_file() and filter_fun(file): try: file_size = file.stat().st_size except (OSError, FileNotFoundError): # not accessible (permissions, etc) - pass on continue files_by_size[file_size].append(file) # For all files with the same file size, get their hash on the first 1024 bytes logger.info('Calculating small hashes...') for file_size, files in _tqdm(files_by_size.items(), unit='files-by-size'): if len(files) < 2: continue # this file size is unique, no need to spend cpu cycles on it for file in files: try: small_hash = get_hash(file, first_chunk_only=True) except OSError: # the file access might've changed till the exec point got here continue files_by_small_hash[(file_size, small_hash)].append(file) # For all files with the hash on the first 1024 bytes, get their hash on the full # file - if more than one file is inserted on a hash here they are certinly duplicates logger.info('Calculating full hashes...') for files in _tqdm(files_by_small_hash.values(), unit='files-by-small-hash'): if len(files) < 2: # the hash of the first 1k bytes is unique -> skip this file continue for file in files: try: full_hash = get_hash(file, first_chunk_only=False) except OSError: # the file access might've changed till the exec point got here continue files_by_full_hash[full_hash].append(file)
def find_duplicates(path: Path, filter_function=lambda __file: True): # THIS IS PARTLY COPIED FROM STACKOVERFLOW # https://stackoverflow.com/questions/748675/finding-duplicate-files-and-removing-them # We now use an optimized version linked from tfeldmann # https://gist.github.com/tfeldmann/fc875e6630d11f2256e746f67a09c1ae # THANK YOU Todor Minakov (https://github.com/tminakov) and Thomas Feldmann (https://github.com/tfeldmann) # NOTE: defaultdict(list) is a multimap, all init array handling is done internally # See: https://en.wikipedia.org/wiki/Multimap#Python files_by_size = _defaultdict(list) files_by_small_hash = _defaultdict(list) for path_item in path.rglob("*"): if path_item.is_file() and filter_function(path_item): try: file_size = path_item.stat().st_size except (OSError, FileNotFoundError): # not accessible (permissions, etc) - pass on continue files_by_size[file_size].append(path_item) # For all files with the same file size, get their hash on the first 1024 bytes print(f'Checking first chunks of {len(files_by_size.items())} items...') for file_size, files in tqdm(files_by_size.items()): if len(files) < 2: continue # this file size is unique, no need to spend cpu cycles on it for path_item in files: try: small_hash = get_hash(path_item, first_chunk_only=True) except OSError: # the file access might've changed till the exec point got here continue files_by_small_hash[(file_size, small_hash)].append(path_item) # For all files with the hash on the first 1024 bytes, get their hash on the full # file - if more than one file is inserted on a hash here they are certainly duplicates print(f'Deeper analysis of {len(files_by_small_hash.values())} items...') for files in tqdm(files_by_small_hash.values()): if len(files) < 2: # the hash of the first 1k bytes is unique -> skip this file continue for path_item in files: try: full_hash = get_hash(path_item, first_chunk_only=False) except OSError: # the file access might've changed till the exec point got here continue files_by_full_hash[full_hash].append(path_item) return
def __init__(self, audio_data): # __current_widgets is here to keep the reference to active widgets # This will prevent those widgets to be garbage collected. self.__current_widgets = _defaultdict(lambda: []) self.__current_figs = [] self.__cached_fft = None # Setup the audio data to be analysed. self.set_audio_data(audio_data)
def _getSamples(factorColumn, dependentColumn, minsize=1): factors, _, _ = factorColumn._eval(None) dependents, _, _ = dependentColumn._eval(None) groups = _defaultdict(list) for factor, depenent in zip(factors, dependents): groups[factor].append(depenent) samples = groups.values() if any(len(s) < minsize for s in samples): print "WARNING: sample has less than %d subjects" % minsize return map(_numpy.array, samples)
def set_audio_data(self, audio_data): """ Set up the audio data. """ if not isinstance(audio_data, _AudioData): raise ValueError("`audio_data` must be of type {}.".format(AudioData)) self.__audio_data = audio_data self.__current_widgets = _defaultdict(lambda: []) self.__current_figs = [] self.__cached_fft = None
def create_from_binary(cls, mft_config, binary_data, entry_number): #TODO test carefully how to find the correct index entry, specially with NTFS versions < 3 '''Creates a MFTEntry from a binary stream. It correctly process the binary data extracting the MFTHeader, all the attributes and the slack information from the binary stream. The binary data WILL be changed to apply the fixup array. Args: mft_config (:obj:`MFTConfig`) - An instance of MFTConfig, as this tells how the library will interpret data. binary_data (bytearray) - A binary stream with the data to extract. This has to be a writeable and support the memoryview call entry_number (int) - The entry number for this entry Returns: MFTEntry: If the object is empty, returns None, otherwise, new object MFTEntry ''' bin_view = memoryview(binary_data) entry = None #test if the entry is empty if bin_view[0:4] != b"\x00\x00\x00\x00": try: header = MFTHeader.create_from_binary( mft_config.ignore_signature_check, bin_view[:MFTHeader.get_representation_size()]) except HeaderError as e: e.update_entry_number(entry_number) e.update_entry_binary(binary_data) raise entry = cls(header, _defaultdict(list)) if header.mft_record != entry_number: _MOD_LOGGER.warning( "The MFT entry number doesn't match. %d != %d", entry_number, header.mft_record) if len(binary_data) != header.entry_alloc_len: _MOD_LOGGER.error( "Expected MFT size is different than entry size.") raise EntryError( f"Expected MFT size ({len(binary_data)}) is different than entry size ({header.entry_alloc_len}).", binary_data, entry_number) if mft_config.apply_fixup_array: apply_fixup_array(bin_view, header.fx_offset, header.fx_count, header.entry_alloc_len) entry._load_attributes(mft_config, bin_view[header.first_attr_offset:]) bin_view.release() #release the underlying buffer return entry
def set_audio_data(self, audio_data): """ Set up the audio data. """ if not isinstance(audio_data, _AudioData): raise ValueError( "`audio_data` must be of type {}.".format(AudioData)) self.__audio_data = audio_data self.__current_widgets = _defaultdict(lambda: []) self.__current_figs = [] self.__cached_fft = None
def __init__(self, element, parent, name=None): self.element = element self.parent = parent self.name = name if self.name is None: self.name = self.element.attrib["name"] self.type_name = self.element.tag self.title = None self.text = None self.hidden = False self.internal = False self.proposed = False self.deprecated = False self.experimental = False self.ancestors = list() self.children = list() self.children_by_name = dict() self.links_by_relation = _defaultdict(list) self.annotations = dict() self.model = None self.reference = None if self.parent: if self.name in self.parent.children_by_name: raise Exception("Collision! {}".format(self.name)) self.parent.children.append(self) self.parent.children_by_name[self.name] = self node = self reference_items = [self.name] while node.parent: node = node.parent self.ancestors.append(node) if not isinstance(node, Group): reference_items.append(node.name) self.model = node self.reference = "/{}".format("/".join(reversed(reference_items))) self.model.nodes_by_reference[self.reference] = self
def _instantiate_spectroscopy_buffer(data_buffer, sweep_buffer): """ Instantiate a spectroscopy Buffer object from a hierarchical object-structure. Parameters ---------- obj : object The hierarchical object-structure. Returns ------- buffer_ : magni.afm.types.spectroscopy.Buffer The instantiated spectroscopy Buffer. See Also -------- _instantiate_spectroscopy : Function using the present function. """ chunks = _defaultdict(lambda: []) for chunk in data_buffer['data']: key = chunk['attrs'].get('pointIndex') chunks[key].append(_SpectroscopyChunk(chunk['attrs'], chunk['data'])) if len(sweep_buffer.data) == 0: chunks[None].extend(chunks[0]) items = [] for item in sweep_buffer.data: if isinstance(item, _SpectroscopyGrid): points = [[ _SpectroscopyPoint(point.attrs, chunks[point.attrs['index']]) for point in row ] for row in item.points] items.append(_SpectroscopyGrid(item.attrs, points)) elif isinstance(item, _SpectroscopyPoint): items.append( _SpectroscopyPoint(item.attrs, chunks[item.attrs['index']])) return _SpectroscopyBuffer(data_buffer['attrs'], items + chunks[None])
def iterbuiltinsinfo(sammary=True, doc=True): r"""SUMMARY @Return: """ for bname, builtin in getbuiltins(): if sammary: smmry = getdocsummary(builtin) else: smmry = '' if doc: dc = _inspect.getdoc(builtin) else: doc = '' yield _defaultdict(str, name=bname, type=type(builtin), summary=smmry, doc=dc)
def __init__(self, file_pointer, mft_config=MFTConfig()): '''See class docstring.''' self.file_pointer = file_pointer self.mft_config = mft_config self.mft_entry_size = self.mft_config.entry_size self._entries_parent_child = _defaultdict( list) #holds the relation ship between parent and child self._entries_child_parent = { } #holds the relation between child and parent self._number_valid_entries = 0 if not self.mft_entry_size: #if entry size is zero, try to autodetect _MOD_LOGGER.info("Trying to detect MFT size entry") self.mft_entry_size = MFT._find_mft_size(file_pointer) self.total_amount_entries = int( _get_file_size(self.file_pointer) / self.mft_entry_size) if self.mft_config.create_initial_information: self._load_relationship_info()
def _itermodinfo(self): r"""SUMMARY @Return: """ for key, obj in sys.modules.iteritems(): if obj is None: continue yield _defaultdict(str, name=key, file=getsafeabsfile(obj), type=type(obj), definition=key, summary=getdocsummary(obj), doc=(_inspect.getdoc(obj) or '')) try: for d in self._recursivemod(obj, parent=key): yield d except NotImplementedError: continue
def check_links(self, internal=True, external=False): with _Phase(self, "Finding input files"): input_paths = self.find_input_files() with _Phase(self, "Initializing input files"): self.init_input_files(input_paths) with _Phase(self, "Finding links"): for file_ in self.output_files.values(): file_.find_links() with _Phase(self, "Checking links"): errors_by_link = _defaultdict(list) links = self._filter_links(self.links) for link in links: if internal and link.startswith(self.site_url): if link not in self.link_targets: errors_by_link[link].append("Link has no target") if external and not link.startswith(self.site_url): code, error = self._check_external_link(link) if code >= 400: msg = "HTTP error code {}".format(code) errors_by_link[link].append(msg) if error: errors_by_link[link].append(error.message) for link in errors_by_link: print("Link: {}".format(link)) for error in errors_by_link[link]: print(" Error: {}".format(error)) for source in self.links[link]: print(" Source: {}".format(source)) return len(errors_by_link)
def _load_text_format_paths( self, time: str, bin_no: int) -> Dict[int, Dict[int, List[int]]]: """Load all paths from text file and return as nested dictionary. Args: time: time period name bin_no: bin number (id) for this demand segment Returns: All paths as a nested dictionary, path = paths[origin][destination], using the node IDs as integers. """ paths = _defaultdict(lambda: {}) with open( os.path.join(self._eb_dir, f"sp_{time}_{bin_no}.txt"), "r", encoding="utf8", ) as paths_file: for line in paths_file: nodes = [int(x) for x in line.split()] paths[nodes[0]][nodes[-1]] = nodes[1:] return paths
def clique_merge(contigsof, threshold=0.5): """Merges all maximal cliques of clusters. Inputs: contigsof: A {clustername: set(contignames)} dict threshold [0.5]: Minimum fraction of overlapping contigs to create edge Output: A {clustername: set(contignames)} dict """ # Calculate all edges between the vertices edges = _defaultdict(set) for (cluster1, cluster2), overlap in _iter_overlapping_pairs(contigsof, threshold): edges[cluster1].add(cluster2) edges[cluster2].add(cluster1) # Find all maximal 2-cliques or larger w. Bron-Kerbosch algorithm cliques = list() _bron_kerbosch(set(), set(edges), set(), cliques, edges) # All maximal 1-cliques (i.e. vertices with degree zero) are added for loner in list(set(contigsof) - set(edges)): cliques.append({loner}) del edges # Now simply add the results to a dictionary with new names for clusters. mergedclusters = dict() for i, clique in enumerate(cliques): mergedname = 'cluster_' + str(i + 1) contigs = set() for cluster in clique: contigs.update(contigsof[cluster]) mergedclusters[mergedname] = contigs return mergedclusters
def _loadDBIP(fpath): ''' Load dbip data, :param fpath: location of dbip data. :return: a dict of ip -> country code. ''' with open(fpath, 'rb') as f: values = _defaultdict(lambda : None) # returns None if key not present for line in f: # line looks like : line = line.split(',') # every value has quotes around it so strip them # also strip end line characters values[line[0][1:-1]] = line[2].rstrip()[1:-1] return values def country_code(me, ip_addr): ''' Query given country code. :param ip_addr: address to query. :return: str ''' return me.values[ip_addr]
def clear(self): '''Removes all keys and values from the DLWSI.''' self._check_iter_ok() self._store = _defaultdict(list) self._len = 0
def __init__(self, wrapped_obj, parent=None): super(ObjectWrapperProtocol, self).__init__(parent) self._nodes = _defaultdict(_partial(_deepcopy, wrapped_obj))
def divideSpikes(spikes, blockStartT, blockEndT, blockSeq, flag): ''' From spikes, generate a dictionary where keys are elements from blockSeq and values are ndarrays with all spikes in between blockStartT/EndT for that conditoin. input: ------ spikes: ndarray like with spike times blockStartT: ndarray like with the start time of each block blockEndT: ndarray like with the end time of each block blockSeq: ndarray like with 'keys' identifying each block. Blocks with the same identifier will end up together. keys can be integers, strings or any other immutable object Flag: Decides between different types of computations on the spikes 0: Spike times are not changed at all 1: Spike times are changed as if all block sequences for a given condition were continuous (the time for the first instance of each block seq is 0, the second instance starts from where the 1st left off and so on) 2: Spike times are changed such that EVERY block seq starts from 0 output: ------- spikesOut: a dictionary in which spikesOut[blockSeq[i]] is a ndarray with all the spikes associated with blockSeq[i] Depending on 'flag' spike times might be modified. Usage: ------ Possible use of Flag 0 Set random seed at the beginning and have a random stimuli alternating between conditions. Both conditions draw numbers from the same random stream. Possible use of Flag 1 Set the seed for as many random streams as experimental conditions and alternate the conditions many times without reseting the seed Possible use of Flag 2 Reset the seed when a stimulus is repeated ''' # Make a dictionary where keys are blockSeq IDs and the values are the accumulated time under such condition. This will be used if flag==2 accumulatedTime = _Counter() # start an empty array where spikes will be added spikesOut = _defaultdict(lambda : _np.array([])) # add two spike to 'spikes' one prior to first blockStartT and one after last blockEndT to avoid special cases below. By adding these spikes startIndex and lastIndex are always found preSpk = _np.array([blockStartT[0]-1]) postSpk = _np.array([blockEndT[-1]+1]) spks = _np.concatenate((preSpk, spikes, postSpk)) #_pdb.set_trace() for i, startT in enumerate(blockStartT): # only assign spikes with meaningful blockSeq. Sometimes I want to exclude spikes from the analysis for example during adapting sequences. if blockSeq[i] is None: continue # find 1st spike in spikes that is greater than startT startIndex = _np.where(_np.diff(_np.sign(spks-startT)))[0][0]+1 # find last spike in spikes that is smaller than BlockEndT[i] lastIndex = _np.where(_np.diff(_np.sign(spks-blockEndT[i])))[0][0] # grab only the spikes corresponding to this block blockSpks = spks[startIndex:lastIndex+1] # Modify spike times in this block according to flag if flag==0: pass elif flag==1: blockSpks -= sum(accumulatedTime.values()) - accumulatedTime[blockSeq[i]] elif flag==2: blockSpks -= startT #_pdb.set_trace() # Add spike times to spikesOut spikesOut[blockSeq[i]] = _np.concatenate((spikesOut[blockSeq[i]], blockSpks)) # Keep track of accumulatedTime accumulatedTime[blockSeq[i]] += blockEndT[i] - blockStartT[i] return spikesOut
def __init__(self): self._store = _defaultdict(list) self._itercount = 0 self._len = 0
8. publication.tab 9. strain.tab 10. environment.tab 11. media.tab 12. time_series.tab 13. experimentMeta.tab 14. experimentalUnit.tab 15. measurementDescription.tab The sample.tab and log2level.tab are the truly important files. The other tables are primarily to attach meta data to the sample. ''' VERSION = 2 docs = _defaultdict(dict) # SAMPLE TABLE - 1 docs['sam']['pre'] = ''' A sample is typically a single expression experiment (ex: individual microarray, or RNA-Seq run). Potentially replicates could be collapsed/averaged into one sample. A sample is some biological sample run in a particular combination of platform, environment, protocol and media.''' docs['sam']['title'] = 'The title of sample.' docs['sam']['description'] = 'The description of the sample.' docs['sam']['molecule'] = ''' Enumerated values [total RNA, polyA RNA, cytoplasmic RNA, nuclear RNA, protein, total RNA/genomic DNA, polyA RNA/genomic DNA, cytoplasmic RNA/genomic DNA, nuclear RNA/genomic DNA]''' docs['sam']['type'] = 'Enumerated values [microarray, RNA-Seq, qPCR, proteomics]' docs['sam']['externalSourceId'] = 'A way to identify the external source of the sample, typically maps to a GEO Sample (GSM#).' docs['sam']['dataSource'] = '''
Flt = float Int = int Str = str TextFile = _io.TextIOWrapper check_collections_shallow = True check_calls = True def _replace_fwd(orig, final): 'helper function for _fulfill implementations to replace Fwd types with final types.' if isinstance(orig, Fwd) and orig.name == final.name: return final return orig _fwd_dependents = _defaultdict(set) # maps fwd declared names to types that depend on them. def _update_dependencies(T, dependencies): # first register T as dependent on each dependency (struct fields or union variants). for D in dependencies: if isinstance(D, Fwd): s = _fwd_dependents[D.name] if s is None: raise TypeError('Fwd has already been fulfilled: {}'.format(D.name)) s.add(T) # T depends on final of D, which has not yet been defined. # then fulfill all types that depend on T (possibly including itself, just registered). try: name = T.name except AttributeError: return # if T does not have a name then it cannot have a Fwd, so nobody depends on it. for D in _fwd_dependents[name]:
def _convex_hull_3d(vecs, eps=1e-6): """三次元の凸包を求める :param vecs: list of 3D array :type vecs: list | tuple | numpy.ndarray :param eps: 距離がこれ以下なら同一平面と見做す """ n = len(vecs) if n == 0: return [] elif n == 1: return [0] verts = [_Vert(i, v) for i, v in enumerate(vecs)] # なるべく離れている二頂点を求める # medium = _reduce(lambda a, b: a + b, vecs) / len(vecs) medium = _np.sum(vecs, axis=0) / len(vecs) # v1 = max(verts, key=lambda v: _norm(v.co - medium)) # v2 = max(verts, key=lambda v: _norm(v.co - v1.co)) v1 = verts[_norm(vecs - medium, axis=1).argmax()] v2 = verts[_norm(vecs - v1.co, axis=1).argmax()] line = v2.co - v1.co if _norm(line) <= eps: # 全ての頂点が重なる return [0] if len(verts) == 2: return [v1.index, v2.index] # 三角形を構成する為の頂点を求める # v3 = max(verts, key=lambda v: _norm(_cross(line, v.co - v1.co))) v3 = verts[_norm(_cross(line, vecs - v1.co), axis=1).argmax()] # NOTE: # np.cross(vec, mat)[0] == np.cross(vec, mat[0]) # np.cross(mat, vec)[0] == np.cross(mat[0], vec) if _norm(_cross_3d(_normalized(line), v3.co - v1.co)) <= eps: # 全ての頂点が同一線上にある return [v1.index, v2.index] if len(verts) == 3: return [v1.index, v2.index, v3.index] verts.remove(v1) verts.remove(v2) verts.remove(v3) pool = _mp.Pool() # 四面体を構成する為の頂点を求める normal = _normal_tri(v1.co, v2.co, v3.co) plane = _plane(v1.co, normal) def key_func(v): return abs(_distance_point_to_plane(v.co4d, plane)) v4 = max(verts, key=key_func) if key_func(v4) <= eps: # 全ての頂点が平面上にある quat = _rotation_difference_v3v3(normal, _array([0., 0., 1.])) # vecs_2d = [_np.resize(_mul_qt_v3(quat, v), 2) for v in vecs] # vecs_2d = [_mul_qt_v3(quat, v)[:2] for v in vecs] result = pool.starmap_async(_mul_qt_v3, zip(_repeat(quat), vecs)) vecs_2d = [v[:2] for v in result.get()] return _convex_hull_2d(vecs_2d, eps) verts.remove(v4) # 四面体作成 # ^ normal # v3 | # / |\ # v1 /____\ v2 # \ / # \ / # v4 if _distance_point_to_plane(v4.co, v1.co, normal) < 0.0: faces = [_Face(v1, v2, v3), _Face(v1, v4, v2), _Face(v2, v4, v3), _Face(v3, v4, v1)] else: faces = [_Face(v1, v3, v2), _Face(v1, v2, v4), _Face(v2, v3, v4), _Face(v3, v1, v4)] # 残りの頂点を各面に分配 _divide_outer_verts(faces, verts, eps) # edge_faces作成 edge_faces = _defaultdict(list) for face in faces: for ekey in face.edge_keys: edge_faces[ekey].append(face) while True: added = False for i in range(len(faces)): try: face = faces[i] except: break if not face.outer_verts: continue v1 = max(face.outer_verts, key=lambda v: face.distance4d(v.co4d)) if face.distance4d(v1.co4d) > eps: # 凸包になるようにv1から放射状に面を貼る added = True # 隠れて不要となる面を求める remove_faces = set() _find_remove_faces_re(remove_faces, v1.co4d, face, edge_faces, eps) # remove_facesを多面体から除去して穴を開ける for f in remove_faces: for ekey in f.edge_keys: edge_faces[ekey].remove(f) faces.remove(f) # 穴に面を貼る new_faces = [] ekey_count = _defaultdict(int) for f in remove_faces: for ekey in f.edge_keys: ekey_count[ekey] += 1 for ekey, cnt in ekey_count.items(): if cnt != 1: continue linkface = edge_faces[ekey][0] v2, v3 = ekey if linkface.verts[linkface.verts.index(v2) - 1] != v3: v2, v3 = v3, v2 new_face = _Face(v1, v2, v3) for key in new_face.edge_keys: edge_faces[key].append(new_face) new_faces.append(new_face) faces.extend(new_faces) # 頂点の再分配 outer_verts = _reduce(lambda a, b: a + b, (f.outer_verts for f in remove_faces)) if v1 in outer_verts: outer_verts.remove(v1) _divide_outer_verts(new_faces, outer_verts, eps) else: face.outer_verts = [] if not added: break # 忘れるべからず pool.close() pool.join() return [[v.index for v in f.verts] for f in faces]
_log = [] vars = {} vvars = {'version': 703} _tabpage = 0 _mode = 'n' _buf_purge_events = set() options = { 'paste': 0, 'ambiwidth': 'single', 'columns': 80, 'encoding': 'utf-8', } _last_bufnr = 0 _highlights = {} from collections import defaultdict as _defaultdict _environ = _defaultdict(lambda: '') del _defaultdict _thread_id = None def _set_thread_id(): global _thread_id from threading import current_thread _thread_id = current_thread().ident # Assuming import is done from the main thread _set_thread_id()
import logging as _logging import sys as _sys import threading as _threading import os as _os _logging_modules = list() _logging_levels_by_name = { "debug": _logging.DEBUG, "info": _logging.INFO, "warn": _logging.WARN, "error": _logging.ERROR, "critical": _logging.CRITICAL } _logging_handlers_by_logger = _defaultdict(list) class _StreamHandler(_logging.StreamHandler): def __repr__(self): args = self.__class__.__name__, self.level, self.stream.name return "%s(%s,%s)" % args def add_logging(name, level, file): assert level, level assert file, file if isinstance(level, str): level = _logging_levels_by_name[level.lower()] if isinstance(file, str): file = open(file, "a")
print "LOAD ABUNDANCES" from libms.Chemistry.Elements import Elements as _Elements from collections import defaultdict as _defaultdict _abu=_defaultdict(dict) _elements = _Elements() _symbols = _elements.symbol.values _massnumbers = _elements.massnumber.values _abundances = _elements.abundance.values for _symbol, _massnumber, _abundance in zip(_symbols, _massnumbers, _abundances): exec("%s=_abundance" % (_symbol+str(_massnumber))) _abu[_symbol][_massnumber] = _abundance for _k in _abu.keys(): exec("%s=_abu['%s']" % (_k, _k))
def show(self): """ Show the plots. """ _plt.show() self.__current_widgets = _defaultdict(lambda: []) # clean up the widgets.