def grabKeys(self, obj, stack=[], keys={}): '''Recursively grabs a list of json object key strings. Format is 'parent.child' for all nested keys. ''' childKeys = {} if (type(obj) == type({})): keys = dict( chain( [(x, True) for x in keys.iterkeys()] + [('.'.join(stack + [y]), True) for y in obj.iterkeys() ] )) childKeys = [ [ x for x in self.grabKeys( y[1], stack + [y[0]], keys).iteritems() ] for y in filter( lambda x: self.typecheck(x[1]), obj.iteritems() )] childKeys = dict(chain.from_iterable(childKeys)) elif (type(obj) is type([])): childKeys = [ [x for x in self.grabKeys(item, stack, keys).iteritems()] for item in filter(lambda x: self.typecheck(x), obj) ] childKeys = dict(chain.from_iterable(childKeys)) return(dict( chain( childKeys.iteritems(), keys.iteritems() )))
def assemble_contigs(adjlist): if not adjlist: return [] graph = {u: vs for u, vs in adjlist.items()} vertives = set(chain(graph.keys(), chain.from_iterable(graph.values()))) ins = dict.fromkeys(vertives, 0) outs = dict.fromkeys(vertives, 0) for u, vs in graph.items(): outs[u] += len(vs) for v in chain.from_iterable(graph.values()): ins[v] += 1 print('ins', ins) print('outs', outs) contig_starts = [v for v, out in outs.items() if not (out in (0, 1) and ins[v] == 1)] print('contig_starts', contig_starts) contigs = [] for start in contig_starts: while graph[start]: # multiple edges path = [start] u = graph[start].pop() while ins[u] == outs[u] == 1: path.append(u) u = graph[u].pop() contigs.append(''.join(v[0] for v in path) + u) return contigs
def _check_ordering(cls): if not cls._meta.ordering: return [] if not isinstance(cls._meta.ordering, (list, tuple)): return [checks.Error("'ordering' must be a tuple or list.", hint=None, obj=cls, id='models.E014')] fields = [f for f in cls._meta.ordering if f != '?'] fields = [f[1:] if f.startswith('-') else f for f in fields] fields = set(f for f in fields if f not in ('_order', 'pk') and '__' not in f) valid_fields = set(chain.from_iterable( (f.name, f.attname) for f in cls._meta.fields )) valid_tfields = set(chain.from_iterable( (f.name, f.attname) for f in cls._meta.translations_model._meta.fields if f.name not in ('master', 'language_code') )) return [checks.Error("'ordering' refers to the non-existent field '%s' --hvad." % field, hint=None, obj=cls, id='models.E015') for field in fields - valid_fields - valid_tfields]
def university_prerequisite_statistics(abbreviation, result_set): uni_courses = session.query(Course).join(Department).join(University).filter(University.abbreviation==abbreviation).all() prereq_distances = [prerequisite_distances(course, result_set) for course in uni_courses] prereq_distances = [p for p in prereq_distances if p] # strip courses with no prerequisites mean = numpy.mean(list(chain.from_iterable(prereq_distances))) stdv = numpy.std(list(chain.from_iterable(prereq_distances))) return (mean, stdv)
def run_all_combos(self, num_tests, disabled=None): tests = self.generate_tests(num_tests, disabled=disabled) for total in range(1, num_tests + 1): res = [] res_disabled = [] for chunk in range(1, total + 1): f = chunk_by_slice(chunk, total) res.append(list(f(tests, {}))) if disabled: f.disabled = True res_disabled.append(list(f(tests, {}))) lengths = [len([t for t in c if 'disabled' not in t]) for c in res] # the chunk with the most tests should have at most one more test # than the chunk with the least tests self.assertLessEqual(max(lengths) - min(lengths), 1) # chaining all chunks back together should equal the original list # of tests self.assertEqual(list(chain.from_iterable(res)), list(tests)) if disabled: lengths = [len(c) for c in res_disabled] self.assertLessEqual(max(lengths) - min(lengths), 1) self.assertEqual(list(chain.from_iterable(res_disabled)), list(tests))
def _get_as_path(self, path): asps = (p['as_paths'] for p in path['attrs'] if p['type'] == BGP_ATTR_TYPE_AS_PATH and 'as_paths' in p and p['as_paths'] != None) asps = chain.from_iterable(asps) asns = (asp['asns'] for asp in asps) return list(chain.from_iterable(asns))
def _simple_complete(self, path, dot, like): if not path and not dot: scope = self._parser.user_scope() if not scope.is_scope(): # Might be a flow (if/while/etc). scope = scope.get_parent_scope() names_dicts = global_names_dict_generator( self._evaluator, self._evaluator.wrap(scope), self._pos ) completion_names = [] for names_dict, pos in names_dicts: names = list(chain.from_iterable(names_dict.values())) if not names: continue completion_names += filter_definition_names(names, self._parser.user_stmt(), pos) elif self._get_under_cursor_stmt(path) is None: return [] else: scopes = list(self._prepare_goto(path, True)) completion_names = [] debug.dbg('possible completion scopes: %s', scopes) for s in scopes: names = [] for names_dict in s.names_dicts(search_global=False): names += chain.from_iterable(names_dict.values()) completion_names += filter_definition_names(names, self._parser.user_stmt()) return completion_names
def apply(self, backups): purge = {} last_rule = None grouped_backups = _group_backups_by_tag(backups) for rule in self.rules: # find backups purged by the previous rule that should be tagged with this rule's tag if last_rule is not None: retagged_backups = rule.find_retag_candidate(purge[last_rule.tag], backups) for backup in retagged_backups: purge[last_rule.tag].remove(backup) grouped_backups[rule.tag].add(backup) # purge expired backups purge[rule.tag] = set() for backup in grouped_backups[rule.tag].copy(): if rule.is_expired(backup): grouped_backups[rule.tag].remove(backup) purge[rule.tag].add(backup) # check if any time bucket has multiple backups and purge the latest/oldest depending on the policy duplicates = rule.find_duplicates(grouped_backups[rule.tag]) for backup in duplicates: grouped_backups[rule.tag].remove(backup) purge[rule.tag].add(backup) last_rule = rule return (list(chain.from_iterable(grouped_backups.values())), list(chain.from_iterable(purge.values())))
def plot_piecewise(data, xmax): x = list(chain.from_iterable((data[i][0], data[i][0]) for i in range(1, len(data)))) x.insert(0, data[0][0]) x.append(xmax) y = list(chain.from_iterable((data[i][1], data[i][1]) for i in range(len(data)))) plt.plot(x, y) plt.axis([min(x), max(x), min(y) - 1, max(y) + 1])
def metric_windiff(forest1, forest2): masses1 = [get_untyped_masses(tree) for tree in forest1] masses2 = [get_untyped_masses(tree) for tree in forest2] segments1 = list(chain.from_iterable(masses1)) segments2 = list(chain.from_iterable(masses2)) score = segeval.window_diff(segments1, segments2) * 100 return score
def powerset(A,nonTrivial=False): ''' powerset(set) -> iterator -- returns a complete list of all subsets of A as tuple, if nonTrivial=True, returns all set expects the empty set and A''' from itertools import chain, combinations if nonTrivial: return chain.from_iterable( combinations(A,i) for i in range(1,len(A)) ) else: return chain.from_iterable( combinations(A,i) for i in range(0,len(A)+1) )
def pack_apply_message(f, args, kwargs, buffer_threshold=MAX_BYTES, item_threshold=MAX_ITEMS): """pack up a function, args, and kwargs to be sent over the wire Each element of args/kwargs will be canned for special treatment, but inspection will not go any deeper than that. Any object whose data is larger than `threshold` will not have their data copied (only numpy arrays and bytes/buffers support zero-copy) Message will be a list of bytes/buffers of the format: [ cf, pinfo, <arg_bufs>, <kwarg_bufs> ] With length at least two + len(args) + len(kwargs) """ arg_bufs = list(chain.from_iterable( serialize_object(arg, buffer_threshold, item_threshold) for arg in args)) kw_keys = sorted(kwargs.keys()) kwarg_bufs = list(chain.from_iterable( serialize_object(kwargs[key], buffer_threshold, item_threshold) for key in kw_keys)) info = dict(nargs=len(args), narg_bufs=len(arg_bufs), kw_keys=kw_keys) msg = [pickle.dumps(can(f), PICKLE_PROTOCOL)] msg.append(pickle.dumps(info, PICKLE_PROTOCOL)) msg.extend(arg_bufs) msg.extend(kwarg_bufs) return msg
def add_formatted_flags(flags_name, format): """Print CMake flags using macros_printer. Arguments: flags_name - Name to search for in config_compilers. format - Function that takes a build type and flag match, and returns the string to print out. """ paths = ["compiler/"+flags_name, "compiler/ADD_"+flags_name] # This creates an iterable over elements in config_compilers # that match in non-debug mode. normal_matches = chain.from_iterable( all_matches(self.compiler_xml_tree, path, normal_dict) for path in paths ) for match in normal_matches: macros_printer.print(format(model, match.text)) # Now the same for debug mode. debug_matches = chain.from_iterable( all_matches(self.compiler_xml_tree, path, debug_dict) for path in paths ) for match in debug_matches: macros_printer.print(format(model+"_DEBUG", match.text))
def make_table(self, metadata): bin_columns = chain.from_iterable(b.postgres_columns for b in self.bins) stat_columns = chain.from_iterable(b.postgres_columns for b in self.statistics) return Table(self.statistics_table_name, metadata, Column('id', Integer, primary_key = True), *list(chain(bin_columns, stat_columns)), keep_existing = True)
def _write_statistics_file(run_dir, genomes, shared_single_copy, shared_multi_copy, partially_shared, nr_of_seqs): """Write out file with some basic statistics about the genomes, orthologs and size of shared core genome.""" # Some easy statistics about genomes and orthologs nr_shared_sico_orth = len(shared_single_copy) # Determine number of ORFans by deducting unique proteins identified as orthologs from total number of genes proteins = set(chain.from_iterable(prot for per_genome in shared_single_copy for prot in per_genome.values())) proteins.update(chain.from_iterable(prot for per_genome in shared_multi_copy for prot in per_genome.values())) proteins.update(chain.from_iterable(prot for per_genome in partially_shared for prot in per_genome.values())) nr_orfans = nr_of_seqs - len(proteins) # Now unused statistics # nr_non_sico_orth = len(shared_multi_copy) + len(partially_shared) # nr_sico_genes = len(proteins) # nr_non_sico_genes = len(proteins) - nr_sico_genes stats_file = os.path.join(run_dir, 'extract-stats.txt') with open(stats_file, mode='w') as writer: # Write Genome & gene count statistics to file writer.write('{0:7}\tGenomes\n'.format(len(genomes))) writer.write('{0:7}\tGenes\n'.format(nr_of_seqs)) writer.write('{0:7}\tORFan genes (no orthologs)\n'.format(nr_orfans)) writer.write('{0:7}\tSingle-copy orthologous genes\n'.format(nr_shared_sico_orth)) # writer.write('{0:7}\tShared single-copy orthologous genes in {1} orthologs\n'.format(nr_sico_genes, # nr_shared_sico_orth)) # writer.write('{0:7}\tOtherwise orthologous genes in {1} orthologs\n'.format(nr_non_sico_genes, # nr_non_sico_orth)) assert os.path.isfile(stats_file) and 0 < os.path.getsize(stats_file), stats_file + ' should exist with content.' return stats_file
def command(self, path=None): if self.css: self._prepend_css(self.css) args = [self.wkhtmltopdf] args += list(chain.from_iterable(list(self.options.items()))) args = [_f for _f in args if _f] if self.toc: args.append('toc') args += list(chain.from_iterable(list(self.toc.items()))) if self.cover: args.append('cover') args.append(self.cover) # If the source is a string then we will pipe it into wkhtmltopdf # If the source is file-like then we will read from it and pipe it in if self.source.isString() or self.source.isFileObj(): args.append('-') else: if isinstance(self.source.source, str): args.append(self.source.to_s()) else: args += self.source.source # If output_path evaluates to False append '-' to end of args # and wkhtmltopdf will pass generated PDF to stdout if path: args.append(path) else: args.append('-') return args
def bio_classification_report(y_true, y_pred): """ Classification report for a list of BIO-encoded sequences. It computes token-level metrics and discards "O" labels. Note that it requires scikit-learn 0.15+ (or a version from github master) to calculate averages properly! """ lb = LabelBinarizer() y_true_combined = lb.fit_transform(list(chain.from_iterable(y_true))) y_pred_combined = lb.transform(list(chain.from_iterable(y_pred))) tagset = set(lb.classes_) - {'O'} tagset = sorted(tagset, key=lambda tag: tag.split('-', 1)[::-1]) class_indices = {cls: idx for idx, cls in enumerate(lb.classes_)} labs = [class_indices[cls] for cls in tagset] return((precision_recall_fscore_support(y_true_combined, y_pred_combined, labels=labs, average=None, sample_weight=None)), (classification_report( y_true_combined, y_pred_combined, labels=[class_indices[cls] for cls in tagset], target_names=tagset, )), labs)
def bio_classification_report(y_true, y_pred): lb = LabelBinarizer() y_true_combined = 1 - lb.fit_transform(list(chain.from_iterable(y_true))) y_pred_combined = list(chain.from_iterable(y_pred)) tagset = set(lb.classes_) - {'O'} tagset = sorted(tagset, key=lambda tag: tag.split('-', 1)[::-1]) class_indices = {cls: idx for idx, cls in enumerate(lb.classes_)} print 'True sum %d Pred sum %d Len %d' %(sum(y_true_combined), sum(y_pred_combined), len(y_pred_combined)) print "AUC\tP-R: %.4f\tROC: %.4f" % (average_precision_score(y_true_combined, y_pred_combined, average=None), roc_auc_score(y_true_combined, y_pred_combined, average=None)) #plt.figure() #fpr, tpr, thr = roc_curve(y_true_combined, y_pred_combined) #area = auc(fpr, tpr) #plt.plot(fpr, tpr, label='{area:.3f}'.format( area=area)) #plt.legend(loc=4) #plt.savefig('sub3.jpg') return classification_report( 1 - y_true_combined, [0 if v > 0.1 else 1 for v in y_pred_combined], labels=[class_indices[cls] for cls in tagset], target_names=tagset, )
def __init__(nn_index, hs, cx_list): import algos cx2_desc = hs.feats.cx2_desc # Make unique id for indexed descriptors feat_uid = hs.prefs.feat_cfg.get_uid() sample_uid = helpers.hashstr_arr(cx_list, "dcxs") uid = "_" + sample_uid + feat_uid # Number of features per sample chip sx2_nFeat = [len(cx2_desc[cx]) for cx in iter(cx_list)] # Inverted index from indexed descriptor to chipx and featx _ax2_cx = [[cx] * nFeat for (cx, nFeat) in izip(cx_list, sx2_nFeat)] _ax2_fx = [range(nFeat) for nFeat in iter(sx2_nFeat)] ax2_cx = np.array(list(chain.from_iterable(_ax2_cx))) ax2_fx = np.array(list(chain.from_iterable(_ax2_fx))) # Aggregate indexed descriptors into continuous structure ax2_desc = np.vstack([cx2_desc[cx] for cx in cx_list if len(cx2_desc[cx]) > 0]) # Build/Load the flann index flann_params = {"algorithm": "kdtree", "trees": 4} precomp_kwargs = { "cache_dir": hs.dirs.cache_dir, "uid": uid, "flann_params": flann_params, "force_recompute": hs.args.nocache_flann, } flann = algos.precompute_flann(ax2_desc, **precomp_kwargs) # ---- # Agg Data nn_index.ax2_cx = ax2_cx nn_index.ax2_fx = ax2_fx nn_index.ax2_data = ax2_desc nn_index.flann = flann
def evalTR(stack, E_f, E_0, angles, n_i, n_f, t_angles, addBulkT=False): # option to add a bulk transmission coefficient # for a thick substrate. In this case the bulk # coefficient added will be for the final index (n_f) to air (n=1). # this expression is again Hecht 4.68 for normal incidence bulkT = (4.0*n_f)/((n_f+1.0)**2) if addBulkT: T = [[[real(bulkT*(n_f/n_i)*(cos(t_angles[len(stack)][j][k])/cos(angles[k]))*abs( E_f[j][k][l][0])**2) for l in range(len(E_f[0][0]))] for k in range(len(E_f[0]))] for j in range(len(E_f))] else: T = [[[real((n_f/n_i)*(cos(t_angles[len(stack)][j][k])/cos(angles[k]))*abs( E_f[j][k][l][0])**2) for l in range(len(E_f[0][0]))] for k in range(len(E_f[0]))] for j in range(len(E_f))] R = [[[(abs(E_0[j][k][l][1])/abs(E_0[j][k][l][0]))**2 for l in range(len(E_f[0][0]))] for k in range(len(E_f[0]))] for j in range(len(E_f))] #averaging for both of these lists TAvg = mean(list(chain.from_iterable(chain.from_iterable(T)))) RAvg = mean(list(chain.from_iterable(chain.from_iterable(R)))) return (T, R, TAvg, RAvg)
def _generate_asts(evidence_json: str, predictor, okay_check=False): logging.debug("entering") js = json.loads(evidence_json) # parse evidence as a JSON string # enhance keywords evidence from others keywords = list(chain.from_iterable([Keywords.split_camel(c) for c in js['apicalls']])) + \ list(chain.from_iterable([Keywords.split_camel(t) for t in js['types']])) + \ js['keywords'] js['keywords'] = list(set([k.lower() for k in keywords if k.lower() not in Keywords.STOP_WORDS])) # # Generate ASTs from evidence. # asts = predictor.infer(js) # # If okay_check is set, retain only those asts that pass the _okay(...) filter. Otherwise retain all asts. # if okay_check: okay_asts = [] for ast in asts: if _okay(js, ast, predictor): okay_asts.append(ast) okay_asts = asts if okay_asts == [] else okay_asts else: okay_asts = asts logging.debug("exiting") return json.dumps({'evidences': js, 'asts': okay_asts}, indent=2)
def write_packages(self, allfeatures, allextensions): f = self._f_gl self.write_module(f, self.PACKAGE) self.write_imports(f, [self.FUNCS, self.EXT, self.ENUMS, self.TYPES], False) for api, features in allfeatures.iteritems(): extensions = allextensions[api] with open(self.make_path(api), 'w') as f: self.write_module(f, api) extenums = chain.from_iterable(ext.enums for ext in extensions) funcenums = chain.from_iterable(ext.enums for ext in extensions) enums = set(enum.name for enum in extenums) | \ set(enum.name for enum in funcenums) featfuncs = set(func.proto.name for func in chain.from_iterable(feat.functions for feat in features)) extfuncs = set(func.proto.name for func in chain.from_iterable(ext.functions for ext in extensions)) extfuncs = extfuncs - featfuncs self.write_selective_import(f, self.FUNCS, featfuncs) self.write_selective_import(f, self.EXT, extfuncs) self.write_selective_import(f, self.ENUMS, enums)
def _run(self): target = self.target def _quote(items): # XXX It's not clear for me how the parameters have to be quote. if PY2: items = (i.encode('utf-8') for i in items) return ','.join(items) data = { p: _quote(getattr(self, p)) for p in 'track follow'.split() if getattr(self, p) } locations = ','.join(str(f) for f in chain.from_iterable(chain.from_iterable(self.locations))) if locations: data['locations'] = locations response = self.client.post(self.url, data=data, stream=True) response.raise_for_status() line = None for line in response.iter_lines(): target.send(line.decode('utf-8')) else: # XXX Should be changed to something meaningful raise EndOfStreamError(line)
def typediff(pos_items, neg_items, opts): """pos_items and neg_items are lists of either Fragment or Reading objects""" # currently assuming that the Reading objects are only coming from gold # profiles, therefore only one per item. otherwise we'd need to be using s # list of Reading objects or probably could be defining an ProfileItem # class that emulates the relevant interface to Fragment tfunc = lambda x:x.types.keys() if opts.all else x.best.types.keys() pos_types = set(chain.from_iterable(tfunc(x) for x in pos_items)) neg_types = set(chain.from_iterable(tfunc(x) for x in neg_items)) if len(pos_types) + len(neg_types) > 1: typelist = list(compare_types(pos_types, neg_types, opts)) else: typelist = list(max(pos_types, neg_types)) if opts.raw: return '\n'.join(typelist) hierarchy = delphin.load_hierarchy(opts.grammar.types_path) if opts.supers: for group in (pos, neg): for item in group: item.load_supers(hierarchy) sfunc = lambda x:x.supers pos_supers = set(chain.from_iterable(sfunc(x) for x in pos)) neg_supers = set(chain.from_iterable(sfunc(x) for x in neg)) supers = compare_types(pos_supers, neg_supers, opts) typelist.extend('^'+t for t in supers) return pretty_print_types(typelist, hierarchy)
def _grid_glyphs(self, glyphs): x = self._x y = self._y K = self._K leading = self._leading FMX = self.font['__gridfont__'].character_index colored_chars = list(chain.from_iterable(zip_longest([], text, fillvalue=self._palatte.get(token, (0, 0, 0, 1))) for token, text in xml_lexer.get_tokens(''.join(self._CHARS)))) # print(set(token for token, text in xml_lexer.get_tokens(''.join(self._CHARS)))) lines = list(_linebreak(colored_chars, self._charlength)) self._IJ = [0] + list(accumulate(len(l) for l, br in lines)) self.y_bottom = y + leading * len(lines) y += leading xd = x + 30 colored_text = {color: [] for color in self._palatte.values()} for l, line in enumerate(lines): for color, G in groupby(((FMX(character), xd + i*K, y + l*leading, color) for i, (color, character) in enumerate(line[0]) if character != '\n'), key = lambda k: k[3]): try: colored_text[color].extend((g, h, k) for g, h, k, c in G) except KeyError: colored_text[color] = [(g, h, k) for g, h, k, c in G] N = zip(accumulate(line[1] for line in lines), enumerate(lines)) numbers = chain.from_iterable(((FMX(character), x + i*K, y + l*leading) for i, character in enumerate(str(int(N)))) for N, (l, line) in N if line[1]) colored_text[(0.7, 0.7, 0.7, 1)] = list(numbers) self._rows = len(lines) self._colored_text = colored_text #documentation """
def report(test_y, pred_y): lb = LabelBinarizer() test_y_combined = lb.fit_transform(list(chain.from_iterable(test_y))) pred_y_combined = lb.transform(list(chain.from_iterable(pred_y))) tagset = sorted(set(lb.classes_)) class_indices = {cls: idx for idx, cls in enumerate(tagset)} print(classification_report(test_y_combined, pred_y_combined, labels=[class_indices[cls] for cls in tagset], target_names=tagset))
def __init__(self, pdb_filename, pdb_id=None, surface_file=None, binders_file=None, clusters_report_file=None): ''' Constructor ''' assert os.path.isfile(pdb_filename) self.pdb_filename = os.path.abspath(pdb_filename) pdblines = open(self.pdb_filename).readlines() self.receptor_tmp_file = '/tmp/%d.pdb.tmp' % os.getpid() # print 'using receptor in %s' % self.receptor_tmp_file TMP_PDB = open(self.receptor_tmp_file, 'w') for line in pdblines: line = line.strip() if line.startswith('ATOM') and line[21].strip() == 'A': print >> TMP_PDB, line.strip() TMP_PDB.close() self.pdb_id = os.path.basename(self.pdb_filename).split('.')[0].upper() assert re.match(r'^[A-Za-z0-9]{4}$', self.pdb_id) self.polymer_obj = Polymer(self.receptor_tmp_file) if surface_file: self.surface_residues = set(chain.from_iterable([self.residues_with_num(int(line.split()[1])) for line in open(surface_file).readlines()])) if binders_file: self.binding_residues = set(chain.from_iterable([self.residues_with_num(int(line.split()[1])) for line in open(binders_file).readlines()])) if clusters_report_file: for line in open(clusters_report_file).readlines(): if line.startswith('#'): continue line_split = line.split() cluster_num = int(line_split[1]) cluster_residues = set(self.residues_with_num(map(int, line_split[-1].split(',')))) self.clusters[cluster_num] = cluster_residues if binders_file: self.true_binders[cluster_num] = cluster_residues.intersection(self.binding_residues)
def join(colls): colls, colls_copy = tee(colls) it = iter(colls_copy) try: dest = next(it) except StopIteration: return None cls = dest.__class__ if isinstance(dest, basestring): return ''.join(colls) elif isinstance(dest, Mapping): result = dest.copy() for d in it: result.update(d) return result elif isinstance(dest, Set): return dest.union(*it) elif isinstance(dest, (Iterator, xrange)): return chain.from_iterable(colls) elif isinstance(dest, Iterable): # NOTE: this could be reduce(concat, ...), # more effective for low count return cls(chain.from_iterable(colls)) else: raise TypeError("Don't know how to join %s" % cls.__name__)
def _check_ordering(cls): if not cls._meta.ordering: return [] if not isinstance(cls._meta.ordering, (list, tuple)): return [checks.Error("'ordering' must be a tuple or list.", hint=None, obj=cls, id="models.E014")] fields = [f for f in cls._meta.ordering if f != "?"] fields = [f[1:] if f.startswith("-") else f for f in fields] fields = set(f for f in fields if f not in ("_order", "pk") and "__" not in f) valid_fields = set(chain.from_iterable((f.name, f.attname) for f in cls._meta.fields)) valid_tfields = set( chain.from_iterable( (f.name, f.attname) for f in cls._meta.translations_model._meta.fields if f.name not in ("master", "language_code") ) ) return [ checks.Error( "'ordering' refers to the non-existent field '%s' --hvad." % field, hint=None, obj=cls, id="models.E015", ) for field in fields - valid_fields - valid_tfields ]
def spreader_generator(blockpool, spread): """Returns an iter over an iter of iters, where the inner elements are interleaved at spread intervals. The tail has no such guarantees, as we're an eager bin packer. >>> jobs = [["a1", "a2", "a3"], ["b1"], ["c1", "c2", "c3"]] >>> list(spreader_generator(jobs, 1)) ['a1', 'a2', 'a3', 'b1', 'c1', 'c2', 'c3'] >>> list(spreader_generator(jobs, 2)) ['a1', 'b1', 'a2', 'c1', 'a3', 'c2', 'c3'] >>> list(spreader_generator(jobs, 3)) ['a1', 'b1', 'c1', 'a2', 'c2', 'a3', 'c3'] >>> list(spreader_generator(jobs, 4)) ['a1', 'b1', 'c1', 'a2', 'c2', 'a3', 'c3'] """ # This sentinel object is unique to this function and can't equal anything # a user can put into the lists, so it's a safe "nothing" value for filler. sentinel = object() # We need a real iterator for our feeders to share blockpool_iter = iter(blockpool) # The feeders pick off blocks from blockpool_iter lazily and return # the elements of each group in turn feeders = [chain.from_iterable(blockpool_iter) for _ in range(spread)] # Now we'll zip our lazily-distributed spread-wide groups of jobs into stripes stripes = izip_longest(*feeders, fillvalue=sentinel) # and return all the values of the stripes in order flattened_spread = chain.from_iterable(stripes) # And we won't return the non-values from feeders that get the short straw not_sentinel = lambda x: x is not sentinel return ifilter(not_sentinel, flattened_spread)
def convert_to_data(self, *args: Any, **kwargs: Any) -> ArrayLike: instruments = self._args + list(self._kwargs.values()) ordered_args = args + tuple(kwargs[key] for key in self._kwargs.keys()) # Match the internal order of args # Process and flatten all args data = chain.from_iterable([instrument.process_arg(arg) for instrument, arg in zip(instruments, ordered_args)]) return data
def complete_chunked_upload(self, uuid, final_path, storage_metadata, force_client_side=False): self._initialize_cloud_conn() chunk_list = self._chunk_list_from_metadata(storage_metadata) # Here is where things get interesting: we are going to try to assemble this server side # In order to be a candidate all parts (after offsets have been computed) must be at least 5MB server_side_assembly = False if not force_client_side: server_side_assembly = True for chunk_offset, chunk in enumerate(chunk_list): # If the chunk is both too small, and not the last chunk, we rule out server side assembly if chunk.length < self.minimum_chunk_size and (chunk_offset + 1) < len(chunk_list): server_side_assembly = False break if server_side_assembly: logger.debug("Performing server side assembly of multi-part upload for: %s", final_path) try: # Awesome, we can do this completely server side, now we have to start a new multipart # upload and use copy_part_from_key to set all of the chunks. mpu = self.__initiate_multipart_upload( final_path, content_type=None, content_encoding=None ) updated_chunks = chain.from_iterable( [_CloudStorage._rechunk(c, self.maximum_chunk_size) for c in chunk_list] ) # [_PartUpload] upload_parts = [] for index, chunk in enumerate(updated_chunks): abs_chunk_path = self._init_path(chunk.path) part = mpu.Part(index + 1) part_copy = part.copy_from( CopySource={"Bucket": self.get_cloud_bucket().name, "Key": abs_chunk_path}, CopySourceRange="bytes=%s-%s" % (chunk.offset, chunk.length + chunk.offset - 1), ) part_copy = self._perform_action_with_retry( mpu.Part(index + 1).copy_from, CopySource={"Bucket": self.get_cloud_bucket().name, "Key": abs_chunk_path}, CopySourceRange="bytes=%s-%s" % (chunk.offset, chunk.length + chunk.offset - 1), ) upload_parts.append(_PartUpload(index + 1, part_copy["CopyPartResult"]["ETag"])) self._perform_action_with_retry( mpu.complete, MultipartUpload={ "Parts": [ {"ETag": p.e_tag, "PartNumber": p.part_number} for p in upload_parts ] }, ) except (botocore.exceptions.ClientError, IOError) as ioe: # Something bad happened, log it and then give up msg = "Exception when attempting server-side assembly for: %s" logger.exception(msg, final_path) mpu.abort() raise ioe else: # We are going to turn all of the server side objects into a single file-like stream, and # pass that to stream_write to chunk and upload the final object. self._client_side_chunk_join(final_path, chunk_list)
if __name__ == '__main__': with open(settings.data.blacklist_path) as handle: blacklist = set(handle.read().split('\n')) data_file = settings.data.pairs_path print ('Reading {0}'.format(data_file)) with open(data_file) as handle: reader = csv.reader(handle) pairs = ((question, answer) for question, answer in reader if not any(w in question for w in blacklist) and not any(w in answer for w in blacklist)) print ('Building Frequency Distribution') vocabulary_size = settings.model.vocabulary_size - 4 # pad, start, end, unk freq_dist = FreqDist(chain.from_iterable(q.split() + a.split() for q, a in tqdm(pairs, total=3102698) )) print ('Total {0} unique words'.format(len(freq_dist))) word_counts = freq_dist.most_common(vocabulary_size) vocabulary = [word for word, count in word_counts] length = settings.model.sequence_length - 2 # start, end vocabulary_set = set(vocabulary) def remove_unknown(line): return ' '.join(word if word in vocabulary_set else UNK for word in line.split()) unk_ratio = settings.data.unk_ratio def is_valid(line): words = line.split() return len(words) <= length and (words.count(UNK) / float(len(words))) < unk_ratio
def powerset(iterable): "powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)" s = list(iterable) return chain.from_iterable(combinations(s, r) for r in range(len(s)+1))
import networkx as nx import matplotlib.pyplot as plt from pyvis.network import Network from itertools import chain from colors import get_different_colors from database import database_info, service_info from elasticsearch_client import get_services_from_table from string_utils import document_from_database_info from tfidf_kmeans import tfidf_kmeans from wordnet_network import get_wordnet_labels foreign_key_graph = nx.Graph() db_info = database_info() connected_nodes = set(chain.from_iterable( [table['foreign_keys'] + [table['name']] for table in db_info if len(table['foreign_keys']) > 0])) db_info = [info for info in db_info if info['name'] in connected_nodes] documents = [document_from_database_info(info) for info in db_info] k = 10 labels = tfidf_kmeans(documents, k) # labels = get_wordnet_labels(documents) vis_network = Network(height="100%", width="70%") colors = get_different_colors(max(labels)+1) # labels from k means color_labels = [colors[l] for l in labels] # services use this tables # color_labels = ["#FF0000" if len(get_services_from_table(x['name'])) > 0 else "#DDDDDD" for x in db_info] vis_network.add_nodes([x['name'] for x in db_info], color=color_labels)
def func(s, iterator): return chain.from_iterable(imap(f, iterator))
def grandchildren(self): return list(chain.from_iterable(c.children for c in self.children))
def _core_oscillators(difmats, assignment, adj_index, rev_index, verbose): """ Given a list of diffusion matrices calculated during a flip-flop state, this function identifies core oscillators as well as their anti-correlated partners. Parameters ---------- :param difmats: Diffusion matrices during flip-flop state :param assignment: Cluster assignment :param adj_index: Dictionary for indexing :param rev_index: Dictionary for indexing :param verbose: Verbosity level of function :return: Tuple with list of oscillators and dictionary of anti-correlated oscillators """ oscillators = list() oscillators_series = list() for index in range(len(assignment)): # node amplitude is NOT correlated to position in network seq = difmats[:, index, index] ampli = np.max(seq) - np.min(seq) if ampli > 0.5: # if the amplitude is this large, # the node may be an oscillator # in that case, mean amplitude may be low oscillators.append(index) oscillators_series.append(seq) oscillators = [rev_index[x] for x in oscillators] if verbose: logger.info('Found the following strong oscillators: ' + str(oscillators)) amplis = dict() clusdict = dict.fromkeys(oscillators) for x in clusdict: clusdict[x] = assignment[adj_index[x]] # we find anti-correlated oscillator nodes # there should be at least one node represented for each cluster for pair in combinations(range(len(oscillators)), 2): total = oscillators_series[pair[0]] - oscillators_series[pair[1]] # need to be careful with this number, # the core oscillators should converge to 1 and -1 # but may stick a little below that value amplis[(oscillators[pair[0]], oscillators[pair[1]])] = (np.max(total) - np.min(total)) # need to find the largest anti-correlation per cluster clus_corrs = dict.fromkeys(set(assignment), 0) clus_nodes = dict.fromkeys(set(assignment)) for corr in amplis: cluster1 = clusdict[corr[0]] cluster2 = clusdict[corr[1]] if amplis[corr] > clus_corrs[cluster1]: clus_nodes[cluster1] = corr clus_corrs[cluster1] = amplis[corr] if amplis[corr] > clus_corrs[cluster2]: clus_nodes[cluster2] = corr clus_corrs[cluster2] = amplis[corr] clus_nodes = {k: v for k, v in clus_nodes.items() if v is not None} # it is possible for clusters to not have a strong oscillator core_oscillators = set(list(chain.from_iterable(list( clus_nodes.values())))) id_corrs = dict.fromkeys(core_oscillators, 0) anti_sizes = dict.fromkeys(core_oscillators, 0) for nodes in combinations(core_oscillators, 2): try: size = amplis[nodes] except KeyError: size = amplis[(nodes[1], nodes[0])] if size > anti_sizes[nodes[0]]: id_corrs[nodes[0]] = nodes[1] anti_sizes[nodes[0]] = size if size > anti_sizes[nodes[1]]: id_corrs[nodes[1]] = nodes[0] anti_sizes[nodes[1]] = size [ clusdict.pop(x) for x in list(clusdict.keys()) if x not in core_oscillators ] anti_corrs = dict() for core in core_oscillators: anti_corrs[clusdict[core]] = clusdict[id_corrs[core]] # oscillator is defined as strongest anti-correlation return core_oscillators, anti_corrs
list formated graph. Looking at the table, one can see that the # of neighbors is simply how many times the node is present. Therefore, one could simply convert the array into a list and count the occurances of each node. ''' # allows use of Python3.X print functionality from __future__ import print_function from collections import Counter from itertools import chain edges = [] with open('data/rosalind_deg.txt', 'r') as f: # Skip first line f.next() # NOTE - use next(f) in Python 3.X for line in f: edges.append(line.strip().split()) f.close() my_list = [] for x in chain.from_iterable(edges): # flatten the lists my_list.append(x) d = Counter(my_list) # count how many times each number present o = open("output/Algorithmic_003_DEG.txt", 'w') for key in sorted( d, key=int ): # because keys are numbers, must be converted to int for sorting print(d[key], end=" ", file=o) o.close()
def render(self): return mark_safe('\n'.join( chain.from_iterable( getattr(self, 'render_' + name)() for name in MEDIA_TYPES)))
def cache_pupil_timeline_data( self, key: str, detector_tag: str, ylim=None, fallback_detector_tag: T.Optional[str] = None, ): world_start_stop_ts = [ self.g_pool.timestamps[0], self.g_pool.timestamps[-1] ] if not self.g_pool.pupil_positions: self.cache[key] = { "left": [], "right": [], "xlim": world_start_stop_ts, "ylim": [0, 1], } else: ts_data_pairs_right_left = [], [] for eye_id in (0, 1): pupil_positions = self.g_pool.pupil_positions[eye_id, detector_tag] if not pupil_positions and fallback_detector_tag is not None: pupil_positions = self.g_pool.pupil_positions[ eye_id, fallback_detector_tag] if pupil_positions: t0, t1 = ( pupil_positions.timestamps[0], pupil_positions.timestamps[-1], ) timestamps_target = np.linspace(t0, t1, NUMBER_SAMPLES_TIMELINE, dtype=np.float32) data_indeces = pm.find_closest(pupil_positions.timestamps, timestamps_target) data_indeces = np.unique(data_indeces) for idx in data_indeces: ts_data_pair = ( pupil_positions.timestamps[idx], pupil_positions[idx][key], ) ts_data_pairs_right_left[eye_id].append(ts_data_pair) if ylim is None: # max_val must not be 0, else gl will crash all_pupil_data_chained = chain.from_iterable( ts_data_pairs_right_left) try: # Outlier removal based on: # https://en.wikipedia.org/wiki/Outlier#Tukey's_fences min_val, max_val = np.quantile( [pd[1] for pd in all_pupil_data_chained], [0.25, 0.75]) iqr = max_val - min_val min_val -= 1.5 * iqr max_val += 1.5 * iqr ylim = min_val, max_val except IndexError: # no pupil data available ylim = 0.0, 1.0 self.cache[key] = { "right": ts_data_pairs_right_left[0], "left": ts_data_pairs_right_left[1], "xlim": world_start_stop_ts, "ylim": ylim, }
label_name = consume(tokens) if peek_or_terminal(tokens) == TOKENS.COLON: return symbol_table['__ labeled_statement __'](chain( (label_name, ), consume_all(tokens)), symbol_table) # return label_stmnt(label_name, statement(tokens, symbol_table)) # it must be an expression, TODO: figure out a way without using dangerous chain! # tokens = chain((label_name, consume(tokens)), tokens) tokens = chain((label_name, ), consume_all(tokens)) expr, _ = symbol_table['__ expression __']( tokens, symbol_table), error_if_not_value(tokens, TOKENS.SEMICOLON) return repeat(expr, 1) if peek_or_terminal(tokens) is not terminal: expr, _ = symbol_table['__ expression __']( tokens, symbol_table), error_if_not_value(tokens, TOKENS.SEMICOLON) return repeat(expr, 1) raise ValueError( '{l} No rule could be found to create statement, got {got}'.format( l=loc(peek(tokens, EOFLocation)), got=peek(tokens, ''))) statement_funcs = labeled_statement, selection_statement, iteration_statement, jump_statement set_rules( statement, chain( chain.from_iterable( imap(izip, imap(rules, statement_funcs), imap(repeat, statement_funcs))), ((TOKENS.LEFT_BRACE, _comp_stmnt), (TOKENS.SEMICOLON, _empty_statement))))
def collect_frequent( it: Iterator[List[Tuple[str, List[Any]]]], threshold: float, permutations: int, decay: float, min_freq: int, decay_filter: str, wrap_filter: str, bootstrap_prefix: str = None ) -> Generator[Tuple[str, PDict], None, None]: # noqa """ Reads all the documents (as returned by :func:`minhash_group`) and collects the frequent paragraphs from them on a per-domain basis. TODO: reference to MMDS Yields (domain, `PDict`) tuples per domain. :param it: an iterator that yields documents as in :func:`minhash_group`; i.e. URL -- paragraph minhash list tuples. :param threshold: the Jaccard similarity threshold for paragraph identity. :param permutations: the number of permutations per paragraph. :param decay: the decay (multiplication) constant used for scoring paraphraphs. :param min_freq: the minimum number of occurrence from which a paragraph is deemed frequent. :param decay_filter: decay expression that is used to filter paragraphs after each step. :param wrap_filter: expression that is used to filter paragraphs after all documents have been processed. :param bootstrap_prefix: prefix of an existing .pdata/.pdi file pair to bootstrap the domain frequency counts with. """ curr_domain = None if bootstrap_prefix: bootstrap = RandomPDataReader(bootstrap_prefix) logging.debug('Bootstrap file prefix: {}'.format(bootstrap_prefix)) else: bootstrap = None try: fc = FrequentCollector(threshold, permutations, decay, min_freq, bootstrap, decay_filter, wrap_filter) # I don't want to write all the domain != curr_domain stuff twice, so # let's add a sentinel record to the end. for url, mhs in chain(chain.from_iterable(it), [('', [])]): domain = urlsplit(url).netloc # A new domain: yield results and re-initialize everything if domain != curr_domain: # Filtering and yielding results if curr_domain is not None: fc.wrap_up_domain() logging.debug('Finished collecting frequent paragraphs ' 'from {}...'.format(curr_domain)) if fc.freq_ps: logging.debug( 'Found {} frequent paragraphs (duplicates: ' '{}) in domain {} ({} documents).'.format( len(fc.freq_ps), fc.num_dup, curr_domain, fc.stats.docs)) # The domain is returned as well, so that we know what the # input was yield curr_domain, fc.freq_ps, fc.stats # Check for the sentinel if not domain: break # Re-initialization logging.debug( 'Collecting frequent paragraphs from {}...'.format(domain)) curr_domain = domain fc.reset(curr_domain) fc.collect_from_doc(url, mhs) finally: if bootstrap: bootstrap.close()
async def load_definitions(self): if "definitions" not in self.mem_map: return {} logger.info("Updating Definitions from Panel") data = defaultdict(dict) try: def_parsers = self.get_message("DefinitionsParserMap") definitions = self.mem_map["definitions"] for elem_type in definitions: if elem_type not in def_parsers: logger.warning("No parser for %s definitions", elem_type) continue start_time = time() parser = def_parsers[elem_type] if isinstance(parser, typing.Callable): parser = parser(self.settings) assert isinstance(parser, Construct) elem_def = definitions[elem_type] enabled_indexes = set() addresses = enumerate(chain.from_iterable( elem_def["addresses"]), start=1) async for index, raw_data in self._eeprom_batch_reader( addresses, parser.sizeof()): element = parser.parse(raw_data) if cfg.LOGGING_DUMP_MESSAGES: logger.debug( f"EEPROM parsed ({elem_type}/{index}): {element}") if elem_def.get("bit_encoded"): for elem_index, elem_data in element.items(): definition = elem_data.get("definition") data_index = (index - 1) * len(element) + elem_index data[elem_type][data_index] = elem_data if definition != "disabled": enabled_indexes.add(data_index) else: data[elem_type][index] = element definition = element.get("definition") if definition != "disabled": enabled_indexes.add(index) cfg.LIMITS[elem_type] = get_limits_for_type( elem_type, list(enabled_indexes)) cfg.LIMITS[elem_type] = list( set(cfg.LIMITS[elem_type]).intersection(enabled_indexes)) logger.info( f"{elem_type.title()} definitions loaded ({round(time() - start_time, 2)}s)" ) except ResourceWarning: pass return construct_free(data)
def _fetch_artifacts(self, local_override_versions): """Download jars from maven repo into the artifact cache dir, then symlink them into our workdir.""" products = self.context.products # Coordinate -> set(relative path to symlink of artifact in symlink farm) coord_to_artifact_symlinks = defaultdict(set) # Demanded by some downstream tasks safe_mkdir(self.pom_cache_dir) products.safe_create_data('ivy_cache_dir', lambda: self.pom_cache_dir) coords = set( Coordinate(*t) for t in chain.from_iterable( self.target_to_maven_coordinate_closure.values())) artifacts_to_download = set() for coord in coords: for artifact in self.maven_coordinate_to_provided_artifacts[coord]: # Sanity check. At this point, all artifacts mapped to a coord should be fully resolved, location included. if artifact.repo_url is None: raise Exception( "Something went wrong! {} was mapped to an artifact {} with no " "associated repo: ".format(coord, artifact)) cached_artifact_path = os.path.join(self.pom_cache_dir, artifact.artifact_path) if not os.path.exists(cached_artifact_path): artifacts_to_download.add(artifact) self._download_artifacts(artifacts_to_download) # TODO(mateo): Rename. I think that Foursquare still needs this product but it is a deprecated concept upstream. # There is no ivy involved in this anymore. ivy_symlink_map = self.context.products.get_data( 'ivy_resolve_symlink_map', dict) safe_mkdir(self.artifact_symlink_dir, clean=True) for coord in coords: for artifact in self.maven_coordinate_to_provided_artifacts[coord]: local_override_key = (artifact.groupId, artifact.artifactId) if local_override_key not in local_override_versions: cached_artifact_path = os.path.realpath( os.path.join(self.pom_cache_dir, artifact.artifact_path)) else: cached_artifact_path = os.path.realpath( local_override_versions[local_override_key]) if not os.path.exists(cached_artifact_path): raise Exception( 'Local override for {} at {} does not exist.'. format(artifact, cached_artifact_path)) # TODO(mateo): Use regular logging for info - print is not great and does not respect --quiet. # But atm, log levels for pom-resolve is either overwhelming or not enough info. pom-resolve should probably # be broken up into multiple tasks some day. This setting is only used for local development in any case. print("\n* Using local override for {}:\n\t{}".format( artifact, cached_artifact_path)) symlinked_artifact_path = os.path.join( self.artifact_symlink_dir, artifact.artifact_path) safe_mkdir(os.path.dirname(symlinked_artifact_path)) try: os.symlink(cached_artifact_path, symlinked_artifact_path) except OSError as e: if e.errno != errno.EEXIST: raise existing_symlink_target = os.readlink( symlinked_artifact_path) if existing_symlink_target != cached_artifact_path: raise Exception( 'A symlink already exists for artifact {}, but it points to the wrong path.\n' 'Symlink: {}\n' 'Destination of existing symlink: {}\n' 'Where this symlink should point: {}\n'.format( artifact, symlinked_artifact_path, existing_symlink_target, cached_artifact_path)) ivy_symlink_map[cached_artifact_path] = symlinked_artifact_path coord_to_artifact_symlinks[artifact] = symlinked_artifact_path return coord_to_artifact_symlinks
tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices") tf.flags.DEFINE_string("output_file", "single_scores.csv", "Name of output file for final bAbI accuracy scores.") FLAGS = tf.flags.FLAGS print("Started Task:", FLAGS.task_id) # task data train, test = load_task(FLAGS.data_dir, FLAGS.task_id) data = train + test vocab = sorted( reduce(lambda x, y: x | y, (set(list(chain.from_iterable(s)) + q + a) for s, q, a in data))) word_idx = dict((c, i + 1) for i, c in enumerate(vocab)) max_story_size = max(map(len, (s for s, _, _ in data))) mean_story_size = int(np.mean(map(len, (s for s, _, _ in data)))) sentence_size = max(map(len, chain.from_iterable(s for s, _, _ in data))) query_size = max(map(len, (q for _, q, _ in data))) memory_size = min(FLAGS.memory_size, max_story_size) vocab_size = len(word_idx) + 1 # +1 for nil word sentence_size = max(query_size, sentence_size) # for the position print("Longest sentence length", sentence_size) print("Longest story length", max_story_size) print("Average story length", mean_story_size) # train/validation/test sets
def read_trials(self, start_re = 'MSG\t([\d\.]+)\ttrial (\d+) started at (\d+.\d)', stop_re = 'MSG\t([\d\.]+)\ttrial (\d+) stopped at (\d+.\d)', phase_re = 'MSG\t([\d\.]+)\ttrial X phase (\d+) started at (\d+.\d)', parameter_re = 'MSG\t[\d\.]+\ttrial X parameter[\t ]*(\S*?)\s+: ([-\d\.]*|[\w]*)'): """ read_trials reads in trials from the message file, constructing timings and parameters for each of the trials, their phases and their parameters. It reads the actual values to internal variables, and also creates dictionaries that will indicate the formats needed when creating the hfd5 file """ self.logger.info('reading trials from %s', os.path.split(self.message_file)[-1]) self.get_message_string() # # read the trials themselves # self.start_trial_strings = re.findall(re.compile(start_re), self.message_string) self.stop_trial_strings = re.findall(re.compile(stop_re), self.message_string) if len(self.start_trial_strings) > 0: # check whether there are any trials here. self.trial_starts = np.array([[float(s[0]), int(s[1]), float(s[2])] for s in self.start_trial_strings]) self.trial_ends = np.array([[float(s[0]), int(s[1]), float(s[2])] for s in self.stop_trial_strings]) self.nr_trials = int(self.trial_ends[-1,1])+1 # remove duplicate rows: self.trial_starts = np.vstack([self.trial_starts[self.trial_starts[:,1]==t,:][0,:] for t in range(self.nr_trials)]) self.trial_ends = np.vstack([self.trial_ends[self.trial_ends[:,1]==t,:][0,:] for t in range(self.nr_trials)]) # # sometimes we have twice as many trial starts as trial ends! # if 2 * len(self.trial_starts) == len(self.trial_ends): # self.trial_ends = self.trial_ends[::2] # # due to early task abortion we can have more trial starts than trial ends: # if abs(len(self.trial_starts) - len(self.trial_ends)) == 1: # self.trial_starts = self.trial_starts[:-2] # self.trial_ends = self.trial_ends[:len(self.trial_starts)] # self.nr_trials = len(self.trial_starts) self.trials = np.hstack((self.trial_starts, self.trial_ends)) # create a dictionary for the types of timing informations we'd like to look at self.trial_type_dictionary = [('trial_start_EL_timestamp', np.float64), ('trial_start_index',np.int32), ('trial_start_exp_timestamp',np.float64), ('trial_end_EL_timestamp',np.float64), ('trial_end_index',np.int32), ('trial_end_exp_timestamp',np.float64)] self.trials = [{'trial_start_EL_timestamp': tr[0], 'trial_start_index': tr[1], 'trial_start_exp_timestamp': tr[2], 'trial_end_EL_timestamp': tr[3], 'trial_end_index': tr[4], 'trial_end_exp_timestamp': tr[5]} for tr in self.trials] self.trial_type_dictionary = np.dtype(self.trial_type_dictionary) # # trial phases # self.trial_phases = [] for i in range(self.nr_trials): this_trial_re = phase_re.replace(' X ', ' ' + str(i) + ' ') phase_strings = re.findall(re.compile(this_trial_re), self.message_string) self.trial_phases.append([[int(i), float(s[0]), int(s[1]), float(s[2])] for s in phase_strings]) self.trial_phases = list(chain.from_iterable(self.trial_phases)) self.trial_phases = [{'trial_phase_trial': tr[0], 'trial_phase_EL_timestamp': tr[1], 'trial_phase_index': tr[2], 'trial_phase_exp_timestamp': tr[3]} for tr in self.trial_phases] self.nr_trial_phases = len(self.trial_phases) self.trial_phase_type_dictionary = [('trial_phase_trial', np.float64), ('trial_phase_EL_timestamp',np.int32), ('trial_phase_index',np.float64), ('trial_phase_exp_timestamp',np.float64)] self.trial_phase_type_dictionary = np.dtype(self.trial_phase_type_dictionary) # now adjust the trial type dictionary and convert into a numpy dtype # self.trial_type_dictionary.append(('trial_phase_timestamps', np.float64, (self.nr_phase_starts.max(), 3))) else: self.logger.info('no trial or phase information in edf file %s'%self.input_file_name) self.nr_trials = 0 # # parameters # self.message_string = self.message_string.replace(' [','').replace('.]','') parameters = [] for i in range(self.nr_trials): this_re = parameter_re.replace(' X ', ' ' + str(i) + ' ') parameter_strings = re.findall(re.compile(this_re), self.message_string) # check if double params: param_names = np.array([p[0] for p in parameter_strings]) try: nr_double_trials = sum(param_names == param_names[0]) # we have double trials -- custom procedure!: if nr_double_trials > 1: nr_params = int(len(param_names) / nr_double_trials) nr_param = 0 parameter_strings2 = [] for d in range(nr_double_trials): parameter_strings2.append( parameter_strings[nr_param:nr_param+nr_params] ) nr_param += nr_params for d in parameter_strings2: # assuming all these parameters are numeric this_trial_parameters = {'trial_nr': float(i)} for s in d: try: this_trial_parameters.update({s[0]: float(s[1])}) except ValueError: pass parameters.append(this_trial_parameters) # we don't have double trial -- standard procedure! else: if len(parameter_strings) > 0: # assuming all these parameters are numeric this_trial_parameters = {'trial_nr': float(i)} for s in parameter_strings: try: this_trial_parameters.update({s[0]: float(s[1])}) except ValueError: pass parameters.append(this_trial_parameters) except: pass if len(parameters) > 0: # there were parameters in the edf file self.parameters = parameters ptd = [(k, np.float64) for k in np.unique(np.concatenate([list(k.keys()) for k in self.parameters]))] self.parameter_type_dictionary = np.dtype(ptd) else: # we have to take the parameters from the output_dict pickle file of the same name as the edf file. self.logger.info('no parameter information in edf file')
def coset_enumeration_c(fp_grp, Y, max_cosets=None, draft=None, incomplete=False): """ >>> from sympy.combinatorics.free_groups import free_group >>> from sympy.combinatorics.fp_groups import FpGroup, coset_enumeration_c >>> F, x, y = free_group("x, y") >>> f = FpGroup(F, [x**3, y**3, x**-1*y**-1*x*y]) >>> C = coset_enumeration_c(f, [x]) >>> C.table [[0, 0, 1, 2], [1, 1, 2, 0], [2, 2, 0, 1]] """ # Initialize a coset table C for < X|R > X = fp_grp.generators R = fp_grp.relators C = CosetTable(fp_grp, Y, max_cosets=max_cosets) if draft: C.table = draft.table[:] C.p = draft.p[:] C.deduction_stack = draft.deduction_stack for alpha, x in product(range(len(C.table)), X): if not C.table[alpha][C.A_dict[x]] is None: C.deduction_stack.append((alpha, x)) A = C.A # replace all the elements by cyclic reductions R_cyc_red = [rel.identity_cyclic_reduction() for rel in R] R_c = list(chain.from_iterable((rel.cyclic_conjugates(), (rel**-1).cyclic_conjugates()) \ for rel in R_cyc_red)) R_set = set() for conjugate in R_c: R_set = R_set.union(conjugate) # a list of subsets of R_c whose words start with "x". R_c_list = [] for x in C.A: r = set([word for word in R_set if word[0] == x]) R_c_list.append(r) R_set.difference_update(r) for w in Y: C.scan_and_fill_c(0, w) for x in A: C.process_deductions(R_c_list[C.A_dict[x]], R_c_list[C.A_dict_inv[x]]) alpha = 0 while alpha < len(C.table): if C.p[alpha] == alpha: try: for x in C.A: if C.p[alpha] != alpha: break if C.table[alpha][C.A_dict[x]] is None: C.define_c(alpha, x) C.process_deductions(R_c_list[C.A_dict[x]], R_c_list[C.A_dict_inv[x]]) except ValueError as e: if incomplete: return C raise e alpha += 1 return C
def powerset(s): s = list(s) return set( chain.from_iterable(combinations(s, r) for r in range(len(s) + 1)))
def stability_curve_time_space_splitter( train_data: pd.DataFrame, training_time_limit: DateType, space_column: str, time_column: str, freq: str = 'M', space_hold_percentage: float = 0.5, random_state: int = None, min_samples: int = 1000) -> SplitterReturnType: """ Splits the data into temporal buckets given by the specified frequency. Training set is fixed before hold out and uses a rolling window hold out set. Each fold moves the hold out further into the future. Useful to see how model performance degrades as the training data gets more outdated. Folds are made so that NONE of the IDs in the holdout appears in the training set. Parameters ---------- train_data : pandas.DataFrame A Pandas' DataFrame that will be split for stability curve estimation. training_time_limit : str The Date String for the end of the testing period. Should be of the same format as `time_column` space_column : str The name of the ID column of `train_data` time_column : str The name of the Date column of `train_data` freq : str The temporal frequency. See: http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases space_hold_percentage : float The proportion of hold out IDs random_state : int A seed for the random number generator for ID sampling across train and hold out sets. min_samples : int The minimum number of samples required in the split to keep the split. """ train_data = train_data.reset_index() rng = check_random_state(random_state) train_time = train_data[ train_data[time_column] <= training_time_limit][time_column] train_index = train_time.index.values train_space = train_data.iloc[train_index][space_column].unique() held_space = rng.choice(train_space, int(len(train_space) * space_hold_percentage), replace=False) test_data = train_data[(train_data[time_column] > training_time_limit) & (~train_data[space_column].isin(held_space))] train_index = train_data[ (train_data[time_column] <= training_time_limit) & (train_data[space_column].isin(held_space))].index.values first_test_moment = test_data[time_column].min() last_test_moment = test_data[time_column].max() logs, test_indexes = _get_sc_test_fold_idx_and_logs( test_data, train_time, time_column, first_test_moment, last_test_moment, min_samples, freq) # From "list of dicts" to "dict of lists" hack: logs = [{k: [dic[k] for dic in logs] for k in logs[0]}] # Flatten test_indexes: flattened_test_indices = list(chain.from_iterable(test_indexes)) return [(train_index, flattened_test_indices)], logs
help = "File to store distributions in. Pickle format will be used. Default is 'distributions.pickle'") parser.add_argument("--non_paternity", "-np", type = float, default = 0.0, help = "Non paternity rate for the adversary to assume.") parser.add_argument("--to_json", default = None, help = "If this flag is present, will instead store the population as json for faster computation in another language") args = parser.parse_args() print("Loading population") with open(args.population_file, "rb") as pickle_file: population = PopulationUnpickler(pickle_file).load() fix_twin_parents(population) if not args.recover: potentially_labeled = list(chain.from_iterable([generation.members for generation in population.generations[-3:]])) if args.num_labeled_nodes <= 0: num_labeled_nodes = population.size // 100 else: num_labeled_nodes = args.num_labeled_nodes labeled_nodes = sample(potentially_labeled, num_labeled_nodes) else: print("Recovering run") labeled_nodes = [population.id_mapping[int(filename)] for filename in listdir(args.work_dir)] if args.to_json: num_generations = population.num_generations clear_index = max(num_generations - args.gen_back, 0) to_clear = population.generations[clear_index].members
# routes is a list of all possible (x,y) tuples where (x,y) # is a space. (i.e. the list of all valid points) routes = {(x, y) for x in range(width) for y in range(height) if lines[y][x].isspace()} # This is the clever bit of the code, you parse through the input # string and for each instruction you add a lambda function to the # list which when given the current state of the system returns the # next state. The state is represented by 4 variables, x, y, dx, dy # where x and y is the current position and dx, dy encode the direction raw_path = input().strip() path = list( chain.from_iterable({ 'l': lambda _: [lambda x, y, dx, dy: (x, y, dy, -dx)], 'r': lambda _: [lambda x, y, dx, dy: (x, y, -dy, dx)] }.get(entry, lambda ct: [lambda x, y, dx, dy: (x + dx, y + dy, dx, dy)] * int(ct))(entry) for entry in re.findall('[rl]|[0-9]+', raw_path))) test = list( chain.from_iterable({ 'l': lambda _: ['left'], 'r': lambda _: ['right'] }.get(entry, lambda ct: ['straight'] * int(ct))(entry) for entry in re.findall('[rl]|[0-9]+', raw_path))) def get_entry(t): return [t]
def words(self, doc_id: Hashable = None, sort=False) -> list[str]: words = self._documents[doc_id] if doc_id else list(chain.from_iterable(self._documents.values())) if sort: return sorted(words) return words
def __init__(self, annotations, struct, json_annotations, ext): """ Describes the property list for a struct Also create a list of c_ast.Decl to append to the struct decls """ self.json_annotations = json_annotations self.annotated_properties = None self.annotations = annotations self.ext = ext self.init_list = None self.decls = None self.struct = struct self.extra_decls = None def make_extra_decl(name, t): idtype = c_ast.IdentifierType([t]) td = c_ast.TypeDecl(name, [], idtype) return c_ast.Decl( name, [], # quals [], # storage [], # funcspec td, # type None, # init None, # bitsize ) fi = getframeinfo(currentframe()) annotated_properties = [ AnnotatedProperty(self, d) for d in struct.decls ] out_ap = [] for ap in annotated_properties: inline_annotation = ap.values.get('inline', False) if inline_annotation: astruct = self.inline_struct_annotated(inline_annotation, ap.decl) out_ap += astruct.annotated_properties else: out_ap.append(ap) self.annotated_properties = out_ap init_lists = [ ap.init_list for ap in out_ap # 'private' and 'inline' have no init_list if ap.init_list is not None ] # NULL terminator init_lists.append(c_ast.InitList([c_ast.Constant('int', '0')])) self.init_list = c_ast.InitList(init_lists, Coord(fi.filename, fi.lineno)) decls = [ap.decl for ap in out_ap] extra_decls = chain.from_iterable( (ap.extra_decls.iteritems() for ap in out_ap)) extra_decls = [make_extra_decl(name, t) for name, t in extra_decls] decls += extra_decls self.decls = decls
def test_all_variables_included(self): for ast, var_names_locs in self.task.graphs_and_instances: locations = list( chain.from_iterable([i[1] for i in var_names_locs])) self.assertCountEqual( locations, [i[0] for i in ast.nodes_that_represent_variables])
def load_ndarray(fpath, celltype=None): print(" - reading", fpath) # FIXME: implement celltype a = la.read_csv(fpath, dialect='liam2') # print(a.info) return a with open(fpath, "rb") as f: reader = csv.reader(f) line_stream = skip_comment_cells(strip_rows(reader)) header = line_stream.next() str_table = [] for line in line_stream: if any(value == '' for value in line): raise Exception("empty cell found in %s" % fpath) str_table.append(line) ndim = len(header) # handle last dimension header (horizontal values) last_d_header = str_table.pop(0) # auto-detect type of values for the last d and convert them last_d_pvalues = convert_1darray(last_d_header) unique_last_d, dupe_last_d = unique_duplicate(last_d_pvalues) if dupe_last_d: print(("Duplicate column header value(s) (for '%s') in '%s': %s" % (header[-1], fpath, ", ".join(str(v) for v in dupe_last_d)))) raise Exception("bad data in '%s': found %d " "duplicate column header value(s)" % (fpath, len(dupe_last_d))) # handle other dimensions header # strip the ndim-1 first columns headers = [[line.pop(0) for line in str_table] for _ in range(ndim - 1)] headers = [convert_1darray(pvalues_str) for pvalues_str in headers] if ndim > 1: # having duplicate values is normal when there are more than 2 # dimensions but we need to test whether there are duplicates of # combinations. dupe_combos = list(duplicates(zip(*headers))) if dupe_combos: print(("Duplicate row header value(s) in '%s':" % fpath)) print((PrettyTable(dupe_combos))) raise Exception("bad alignment data in '%s': found %d " "duplicate row header value(s)" % (fpath, len(dupe_combos))) possible_values = [np.array(list(unique(pvalues))) for pvalues in headers] possible_values.append(np.array(unique_last_d)) shape = tuple(len(values) for values in possible_values) num_possible_values = prod(shape) # transform the 2d table into a 1d list str_table = list(chain.from_iterable(str_table)) if len(str_table) != num_possible_values: raise Exception("incoherent data in '%s': %d data cells " "found while it should be %d based on the number " "of possible values in headers (%s)" % (fpath, len(str_table), num_possible_values, ' * '.join(str(len(values)) for values in possible_values))) # TODO: compare time with numpy built-in conversion: # if dtype is None, numpy tries to detect the best type itself # which it does a good job of if the values are already numeric values # if dtype is provided, numpy does a good job to convert from string # values. if celltype is None: celltype = detect_column_type(str_table) data = convert_1darray(str_table, celltype) array = np.array(data, dtype=celltype) return la.LArray(array.reshape(shape), header, possible_values)
def install_flags(self): if self.indices_args is None: self.indices_args = tuple( chain.from_iterable(('--extra-index-url', x) for x in PIP_EXTRA_INDICES) ) return self.indices_args
def test_no_duplicates_uses(self): for ast, var_names_locs in self.task.graphs_and_instances: locations = list( chain.from_iterable([i[1] for i in var_names_locs])) self.assertEqual(len(locations), len(set(locations)))
def calc_grow_delta(white_lines, white_cols): centers = chain.from_iterable(zip(white_lines, white_cols)[1:-1]) return int(min(b - a for a, b in centers) / 2)
def genPowerset(iterable): s = iterable return chain.from_iterable(combinations(s, r) for r in range(len(s) + 1))