Beispiel #1
0
  def grabKeys(self, obj, stack=[], keys={}):
   '''Recursively grabs a list of json object key strings.

      Format is 'parent.child' for all nested keys.
   '''

   childKeys = {}
   if (type(obj) == type({})):
     keys   = dict(
                chain(  [(x,                     True) for x in keys.iterkeys()]
                      + [('.'.join(stack + [y]), True) for y in obj.iterkeys() ]
                       ))
     childKeys = [ [ x for x in self.grabKeys( y[1],
                                                stack + [y[0]],
                                                keys).iteritems()
                    ] for y in filter( 
                                lambda x: self.typecheck(x[1]), 
                                obj.iteritems()
                              )]
     childKeys = dict(chain.from_iterable(childKeys))

   elif (type(obj) is type([])):
     childKeys = [ [x for x in self.grabKeys(item, stack, keys).iteritems()]
                     for item in filter(lambda x: self.typecheck(x), obj) 
                 ]
     childKeys = dict(chain.from_iterable(childKeys))

   return(dict(
          chain( childKeys.iteritems(),
                 keys.iteritems()
                )))
def assemble_contigs(adjlist):
    if not adjlist: return []
    graph = {u: vs for u, vs in adjlist.items()}

    vertives = set(chain(graph.keys(), chain.from_iterable(graph.values())))
    ins = dict.fromkeys(vertives, 0)
    outs = dict.fromkeys(vertives, 0)
    for u, vs in graph.items():
        outs[u] += len(vs)
    for v in chain.from_iterable(graph.values()):
        ins[v] += 1
    print('ins', ins)
    print('outs', outs)

    contig_starts = [v for v, out in outs.items() if not (out in (0, 1) and ins[v] == 1)]
    print('contig_starts', contig_starts)

    contigs = []
    for start in contig_starts:
        while graph[start]:  # multiple edges
            path = [start]
            u = graph[start].pop()
            while ins[u] == outs[u] == 1:
                path.append(u)
                u = graph[u].pop()
            contigs.append(''.join(v[0] for v in path) + u)

    return contigs
Beispiel #3
0
        def _check_ordering(cls):
            if not cls._meta.ordering:
                return []

            if not isinstance(cls._meta.ordering, (list, tuple)):
                return [checks.Error("'ordering' must be a tuple or list.",
                                    hint=None, obj=cls, id='models.E014')]

            fields = [f for f in cls._meta.ordering if f != '?']
            fields = [f[1:] if f.startswith('-') else f for f in fields]
            fields = set(f for f in fields if f not in ('_order', 'pk') and '__' not in f)

            valid_fields = set(chain.from_iterable(
                (f.name, f.attname)
                for f in cls._meta.fields
            ))
            valid_tfields = set(chain.from_iterable(
                (f.name, f.attname)
                for f in cls._meta.translations_model._meta.fields
                if f.name not in ('master', 'language_code')
            ))

            return [checks.Error("'ordering' refers to the non-existent field '%s' --hvad." % field,
                                hint=None, obj=cls, id='models.E015')
                    for field in fields - valid_fields - valid_tfields]
def university_prerequisite_statistics(abbreviation, result_set):
    uni_courses = session.query(Course).join(Department).join(University).filter(University.abbreviation==abbreviation).all()
    prereq_distances = [prerequisite_distances(course, result_set) for course in uni_courses]
    prereq_distances = [p for p in prereq_distances if p] # strip courses with no prerequisites
    mean = numpy.mean(list(chain.from_iterable(prereq_distances)))
    stdv = numpy.std(list(chain.from_iterable(prereq_distances)))
    return (mean, stdv)
    def run_all_combos(self, num_tests, disabled=None):
        tests = self.generate_tests(num_tests, disabled=disabled)

        for total in range(1, num_tests + 1):
            res = []
            res_disabled = []
            for chunk in range(1, total + 1):
                f = chunk_by_slice(chunk, total)
                res.append(list(f(tests, {})))
                if disabled:
                    f.disabled = True
                    res_disabled.append(list(f(tests, {})))

            lengths = [len([t for t in c if 'disabled' not in t]) for c in res]
            # the chunk with the most tests should have at most one more test
            # than the chunk with the least tests
            self.assertLessEqual(max(lengths) - min(lengths), 1)

            # chaining all chunks back together should equal the original list
            # of tests
            self.assertEqual(list(chain.from_iterable(res)), list(tests))

            if disabled:
                lengths = [len(c) for c in res_disabled]
                self.assertLessEqual(max(lengths) - min(lengths), 1)
                self.assertEqual(list(chain.from_iterable(res_disabled)),
                                 list(tests))
Beispiel #6
0
 def _get_as_path(self, path):
     asps = (p['as_paths'] for p in path['attrs'] if
             p['type'] == BGP_ATTR_TYPE_AS_PATH and 'as_paths' in p
             and p['as_paths'] != None)
     asps = chain.from_iterable(asps)
     asns = (asp['asns'] for asp in asps)
     return list(chain.from_iterable(asns))
Beispiel #7
0
    def _simple_complete(self, path, dot, like):
        if not path and not dot:
            scope = self._parser.user_scope()
            if not scope.is_scope():  # Might be a flow (if/while/etc).
                scope = scope.get_parent_scope()
            names_dicts = global_names_dict_generator(
                self._evaluator,
                self._evaluator.wrap(scope),
                self._pos
            )
            completion_names = []
            for names_dict, pos in names_dicts:
                names = list(chain.from_iterable(names_dict.values()))
                if not names:
                    continue
                completion_names += filter_definition_names(names, self._parser.user_stmt(), pos)
        elif self._get_under_cursor_stmt(path) is None:
            return []
        else:
            scopes = list(self._prepare_goto(path, True))
            completion_names = []
            debug.dbg('possible completion scopes: %s', scopes)
            for s in scopes:
                names = []
                for names_dict in s.names_dicts(search_global=False):
                    names += chain.from_iterable(names_dict.values())

                completion_names += filter_definition_names(names, self._parser.user_stmt())
        return completion_names
Beispiel #8
0
    def apply(self, backups):
        purge = {}
        last_rule = None
        
        grouped_backups = _group_backups_by_tag(backups)
        for rule in self.rules:
            # find backups purged by the previous rule that should be tagged with this rule's tag
            if last_rule is not None:
                retagged_backups = rule.find_retag_candidate(purge[last_rule.tag], backups)
                for backup in retagged_backups:
                    purge[last_rule.tag].remove(backup)
                    grouped_backups[rule.tag].add(backup)

            # purge expired backups
            purge[rule.tag] = set()
            for backup in grouped_backups[rule.tag].copy():
                if rule.is_expired(backup):
                    grouped_backups[rule.tag].remove(backup)
                    purge[rule.tag].add(backup)

            # check if any time bucket has multiple backups and purge the latest/oldest depending on the policy
            duplicates = rule.find_duplicates(grouped_backups[rule.tag])
            for backup in duplicates:
                grouped_backups[rule.tag].remove(backup)
                purge[rule.tag].add(backup)

            last_rule = rule

        return (list(chain.from_iterable(grouped_backups.values())), list(chain.from_iterable(purge.values())))
def plot_piecewise(data, xmax):
    x = list(chain.from_iterable((data[i][0], data[i][0]) for i in range(1, len(data))))
    x.insert(0, data[0][0])
    x.append(xmax)
    y = list(chain.from_iterable((data[i][1], data[i][1]) for i in range(len(data))))
    plt.plot(x, y)
    plt.axis([min(x), max(x), min(y) - 1, max(y) + 1])
Beispiel #10
0
def metric_windiff(forest1, forest2):
    masses1 = [get_untyped_masses(tree) for tree in forest1]
    masses2 = [get_untyped_masses(tree) for tree in forest2]
    segments1 = list(chain.from_iterable(masses1))
    segments2 = list(chain.from_iterable(masses2))
    score = segeval.window_diff(segments1, segments2) * 100
    return score
def powerset(A,nonTrivial=False):
	''' powerset(set) -> iterator -- returns a complete list of all subsets of A as tuple, if nonTrivial=True, returns all set expects the empty set and A'''
	from itertools import chain, combinations
	if nonTrivial:
		return chain.from_iterable( combinations(A,i) for i in range(1,len(A)) )
	else:	
		return chain.from_iterable( combinations(A,i) for i in range(0,len(A)+1) )
Beispiel #12
0
def pack_apply_message(f, args, kwargs, buffer_threshold=MAX_BYTES, item_threshold=MAX_ITEMS):
    """pack up a function, args, and kwargs to be sent over the wire

    Each element of args/kwargs will be canned for special treatment,
    but inspection will not go any deeper than that.

    Any object whose data is larger than `threshold`  will not have their data copied
    (only numpy arrays and bytes/buffers support zero-copy)

    Message will be a list of bytes/buffers of the format:

    [ cf, pinfo, <arg_bufs>, <kwarg_bufs> ]

    With length at least two + len(args) + len(kwargs)
    """

    arg_bufs = list(chain.from_iterable(
        serialize_object(arg, buffer_threshold, item_threshold) for arg in args))

    kw_keys = sorted(kwargs.keys())
    kwarg_bufs = list(chain.from_iterable(
        serialize_object(kwargs[key], buffer_threshold, item_threshold) for key in kw_keys))

    info = dict(nargs=len(args), narg_bufs=len(arg_bufs), kw_keys=kw_keys)

    msg = [pickle.dumps(can(f), PICKLE_PROTOCOL)]
    msg.append(pickle.dumps(info, PICKLE_PROTOCOL))
    msg.extend(arg_bufs)
    msg.extend(kwarg_bufs)

    return msg
        def add_formatted_flags(flags_name, format):
            """Print CMake flags using macros_printer.

            Arguments:
            flags_name - Name to search for in config_compilers.
            format - Function that takes a build type and flag match, and
            returns the string to print out.
            """

            paths = ["compiler/"+flags_name, "compiler/ADD_"+flags_name]

            # This creates an iterable over elements in config_compilers
            # that match in non-debug mode.
            normal_matches = chain.from_iterable(
                all_matches(self.compiler_xml_tree, path, normal_dict)
                for path in paths
                )
            for match in normal_matches:
                macros_printer.print(format(model, match.text))

            # Now the same for debug mode.
            debug_matches = chain.from_iterable(
                all_matches(self.compiler_xml_tree, path, debug_dict)
                for path in paths
                )
            for match in debug_matches:
                macros_printer.print(format(model+"_DEBUG", match.text))
 def make_table(self, metadata):
     bin_columns = chain.from_iterable(b.postgres_columns for b in self.bins)
     stat_columns = chain.from_iterable(b.postgres_columns for b in self.statistics)
     return Table(self.statistics_table_name, metadata,
             Column('id', Integer, primary_key = True),
             *list(chain(bin_columns, stat_columns)),
             keep_existing = True)
Beispiel #15
0
def _write_statistics_file(run_dir, genomes, shared_single_copy, shared_multi_copy, partially_shared, nr_of_seqs):
    """Write out file with some basic statistics about the genomes, orthologs and size of shared core genome."""
    # Some easy statistics about genomes and orthologs
    nr_shared_sico_orth = len(shared_single_copy)

    # Determine number of ORFans by deducting unique proteins identified as orthologs from total number of genes
    proteins = set(chain.from_iterable(prot for per_genome in shared_single_copy for prot in per_genome.values()))
    proteins.update(chain.from_iterable(prot for per_genome in shared_multi_copy for prot in per_genome.values()))
    proteins.update(chain.from_iterable(prot for per_genome in partially_shared for prot in per_genome.values()))
    nr_orfans = nr_of_seqs - len(proteins)

    # Now unused statistics
    # nr_non_sico_orth = len(shared_multi_copy) + len(partially_shared)
    # nr_sico_genes = len(proteins)
    # nr_non_sico_genes = len(proteins) - nr_sico_genes

    stats_file = os.path.join(run_dir, 'extract-stats.txt')
    with open(stats_file, mode='w') as writer:
        # Write Genome & gene count statistics to file
        writer.write('{0:7}\tGenomes\n'.format(len(genomes)))
        writer.write('{0:7}\tGenes\n'.format(nr_of_seqs))
        writer.write('{0:7}\tORFan genes (no orthologs)\n'.format(nr_orfans))
        writer.write('{0:7}\tSingle-copy orthologous genes\n'.format(nr_shared_sico_orth))
        # writer.write('{0:7}\tShared single-copy orthologous genes in {1} orthologs\n'.format(nr_sico_genes,
        #                                                                                     nr_shared_sico_orth))
        # writer.write('{0:7}\tOtherwise orthologous genes in {1} orthologs\n'.format(nr_non_sico_genes,
        #                                                                            nr_non_sico_orth))

    assert os.path.isfile(stats_file) and 0 < os.path.getsize(stats_file), stats_file + ' should exist with content.'
    return stats_file
Beispiel #16
0
    def command(self, path=None):
        if self.css:
            self._prepend_css(self.css)

        args = [self.wkhtmltopdf]

        args += list(chain.from_iterable(list(self.options.items())))
        args = [_f for _f in args if _f]

        if self.toc:
            args.append('toc')
            args += list(chain.from_iterable(list(self.toc.items())))
        if self.cover:
            args.append('cover')
            args.append(self.cover)

        # If the source is a string then we will pipe it into wkhtmltopdf
        # If the source is file-like then we will read from it and pipe it in
        if self.source.isString() or self.source.isFileObj():
            args.append('-')
        else:
            if isinstance(self.source.source, str):
                args.append(self.source.to_s())
            else:
                args += self.source.source

        # If output_path evaluates to False append '-' to end of args
        # and wkhtmltopdf will pass generated PDF to stdout
        if path:
            args.append(path)
        else:
            args.append('-')

        return args
Beispiel #17
0
    def bio_classification_report(y_true, y_pred):
        """
        Classification report for a list of BIO-encoded sequences.
        It computes token-level metrics and discards "O" labels.

        Note that it requires scikit-learn 0.15+ (or a version from
        github master) to calculate averages properly!
        """
        lb = LabelBinarizer()
        y_true_combined = lb.fit_transform(list(chain.from_iterable(y_true)))
        y_pred_combined = lb.transform(list(chain.from_iterable(y_pred)))

        tagset = set(lb.classes_) - {'O'}
        tagset = sorted(tagset, key=lambda tag: tag.split('-', 1)[::-1])
        class_indices = {cls: idx for idx, cls in enumerate(lb.classes_)}

        labs = [class_indices[cls] for cls in tagset]

        return((precision_recall_fscore_support(y_true_combined,
                                                y_pred_combined,
                                                labels=labs,
                                                average=None,
                                                sample_weight=None)),
               (classification_report(
                   y_true_combined,
                   y_pred_combined,
                   labels=[class_indices[cls] for cls in tagset],
                   target_names=tagset,
               )), labs)
Beispiel #18
0
def bio_classification_report(y_true, y_pred):

    lb = LabelBinarizer()
    y_true_combined = 1 - lb.fit_transform(list(chain.from_iterable(y_true)))
    y_pred_combined = list(chain.from_iterable(y_pred))

    tagset = set(lb.classes_) - {'O'}
    tagset = sorted(tagset, key=lambda tag: tag.split('-', 1)[::-1])
    class_indices = {cls: idx for idx, cls in enumerate(lb.classes_)}
    print 'True sum %d Pred sum %d Len %d' %(sum(y_true_combined), sum(y_pred_combined), len(y_pred_combined))
    print "AUC\tP-R: %.4f\tROC: %.4f" % (average_precision_score(y_true_combined, y_pred_combined, average=None),
        roc_auc_score(y_true_combined, y_pred_combined, average=None))
    #plt.figure()
    #fpr, tpr, thr = roc_curve(y_true_combined, y_pred_combined)
    #area = auc(fpr, tpr)
    #plt.plot(fpr, tpr, label='{area:.3f}'.format( area=area))
    #plt.legend(loc=4)
    #plt.savefig('sub3.jpg')

    return classification_report(
        1 - y_true_combined,
        [0 if v > 0.1 else 1 for v in y_pred_combined],
        labels=[class_indices[cls] for cls in tagset],
        target_names=tagset,
    )
Beispiel #19
0
    def __init__(nn_index, hs, cx_list):
        import algos

        cx2_desc = hs.feats.cx2_desc
        # Make unique id for indexed descriptors
        feat_uid = hs.prefs.feat_cfg.get_uid()
        sample_uid = helpers.hashstr_arr(cx_list, "dcxs")
        uid = "_" + sample_uid + feat_uid
        # Number of features per sample chip
        sx2_nFeat = [len(cx2_desc[cx]) for cx in iter(cx_list)]
        # Inverted index from indexed descriptor to chipx and featx
        _ax2_cx = [[cx] * nFeat for (cx, nFeat) in izip(cx_list, sx2_nFeat)]
        _ax2_fx = [range(nFeat) for nFeat in iter(sx2_nFeat)]
        ax2_cx = np.array(list(chain.from_iterable(_ax2_cx)))
        ax2_fx = np.array(list(chain.from_iterable(_ax2_fx)))
        # Aggregate indexed descriptors into continuous structure
        ax2_desc = np.vstack([cx2_desc[cx] for cx in cx_list if len(cx2_desc[cx]) > 0])
        # Build/Load the flann index
        flann_params = {"algorithm": "kdtree", "trees": 4}
        precomp_kwargs = {
            "cache_dir": hs.dirs.cache_dir,
            "uid": uid,
            "flann_params": flann_params,
            "force_recompute": hs.args.nocache_flann,
        }
        flann = algos.precompute_flann(ax2_desc, **precomp_kwargs)
        # ----
        # Agg Data
        nn_index.ax2_cx = ax2_cx
        nn_index.ax2_fx = ax2_fx
        nn_index.ax2_data = ax2_desc
        nn_index.flann = flann
def evalTR(stack, E_f, E_0, angles, n_i, n_f, t_angles, addBulkT=False):
    # option to add a bulk transmission coefficient
    # for a thick substrate. In this case the bulk
    # coefficient added will be for the final index (n_f) to air (n=1).
    # this expression is again Hecht 4.68 for normal incidence
    bulkT = (4.0*n_f)/((n_f+1.0)**2)
    if addBulkT:
        T = [[[real(bulkT*(n_f/n_i)*(cos(t_angles[len(stack)][j][k])/cos(angles[k]))*abs(
            E_f[j][k][l][0])**2)
            for l in range(len(E_f[0][0]))]
            for k in range(len(E_f[0]))]
            for j in range(len(E_f))]
    else:
        T = [[[real((n_f/n_i)*(cos(t_angles[len(stack)][j][k])/cos(angles[k]))*abs(
            E_f[j][k][l][0])**2)
            for l in range(len(E_f[0][0]))]
            for k in range(len(E_f[0]))]
            for j in range(len(E_f))]
    R = [[[(abs(E_0[j][k][l][1])/abs(E_0[j][k][l][0]))**2
        for l in range(len(E_f[0][0]))]
        for k in range(len(E_f[0]))]
        for j in range(len(E_f))]

    #averaging for both of these lists
    TAvg = mean(list(chain.from_iterable(chain.from_iterable(T))))
    RAvg = mean(list(chain.from_iterable(chain.from_iterable(R))))
    
    return (T, R, TAvg, RAvg)
Beispiel #21
0
def _generate_asts(evidence_json: str, predictor, okay_check=False):
    logging.debug("entering")

    js = json.loads(evidence_json)  # parse evidence as a JSON string

    # enhance keywords evidence from others
    keywords = list(chain.from_iterable([Keywords.split_camel(c) for c in js['apicalls']])) + \
        list(chain.from_iterable([Keywords.split_camel(t) for t in js['types']])) + \
        js['keywords']
    js['keywords'] = list(set([k.lower() for k in keywords if k.lower() not in Keywords.STOP_WORDS]))

    #
    # Generate ASTs from evidence.
    #
    asts = predictor.infer(js)

    #
    # If okay_check is set, retain only those asts that pass the _okay(...) filter. Otherwise retain all asts.
    #
    if okay_check:
        okay_asts = []
        for ast in asts:
            if _okay(js, ast, predictor):
                okay_asts.append(ast)
        okay_asts = asts if okay_asts == [] else okay_asts
    else:
        okay_asts = asts

    logging.debug("exiting")
    return json.dumps({'evidences': js, 'asts': okay_asts}, indent=2)
Beispiel #22
0
Datei: d.py Projekt: pycerl/glad
    def write_packages(self, allfeatures, allextensions):
        f = self._f_gl

        self.write_module(f, self.PACKAGE)
        self.write_imports(f, [self.FUNCS, self.EXT, self.ENUMS, self.TYPES], False)

        for api, features in allfeatures.iteritems():
            extensions = allextensions[api]
            with open(self.make_path(api), 'w') as f:
                self.write_module(f, api)

                extenums = chain.from_iterable(ext.enums for ext in extensions)
                funcenums = chain.from_iterable(ext.enums for ext in extensions)
                enums = set(enum.name for enum in extenums) | \
                        set(enum.name for enum in funcenums)

                featfuncs = set(func.proto.name for func in
                        chain.from_iterable(feat.functions for feat in features))
                extfuncs = set(func.proto.name for func in
                        chain.from_iterable(ext.functions for ext in extensions))
                extfuncs = extfuncs - featfuncs

                self.write_selective_import(f, self.FUNCS, featfuncs)
                self.write_selective_import(f, self.EXT, extfuncs)
                self.write_selective_import(f, self.ENUMS, enums)
Beispiel #23
0
    def _run(self):
        target = self.target

        def _quote(items):
            # XXX It's not clear for me how the parameters have to be quote.
            if PY2:
                items = (i.encode('utf-8') for i in items)
            return ','.join(items)

        data = {
            p: _quote(getattr(self, p)) for p in 'track follow'.split() if getattr(self, p)
        }

        locations = ','.join(str(f) for f in chain.from_iterable(chain.from_iterable(self.locations)))
        if locations:
            data['locations'] = locations
        response = self.client.post(self.url, data=data, stream=True)

        response.raise_for_status()

        line = None
        for line in response.iter_lines():
            target.send(line.decode('utf-8'))
        else:
            # XXX Should be changed to something meaningful
            raise EndOfStreamError(line)
Beispiel #24
0
def typediff(pos_items, neg_items, opts):
    """pos_items and neg_items are lists of either Fragment or Reading objects"""
    # currently assuming that the Reading objects are only coming from gold
    # profiles, therefore only one per item. otherwise we'd need to be using s
    # list of Reading objects or probably could be defining an ProfileItem
    # class that emulates the relevant interface to Fragment
    tfunc = lambda x:x.types.keys() if opts.all else x.best.types.keys()
    pos_types = set(chain.from_iterable(tfunc(x) for x in pos_items))
    neg_types = set(chain.from_iterable(tfunc(x) for x in neg_items))

    if len(pos_types) + len(neg_types) > 1:
        typelist = list(compare_types(pos_types, neg_types, opts))
    else:
        typelist = list(max(pos_types, neg_types))
        
    if opts.raw:
        return '\n'.join(typelist)

    hierarchy = delphin.load_hierarchy(opts.grammar.types_path)    
        
    if opts.supers:
        for group in (pos, neg):
            for item in group:
                item.load_supers(hierarchy)     
    
        sfunc = lambda x:x.supers
        pos_supers = set(chain.from_iterable(sfunc(x) for x in pos))
        neg_supers = set(chain.from_iterable(sfunc(x) for x in neg))
        supers = compare_types(pos_supers, neg_supers, opts)
        typelist.extend('^'+t for t in supers)

    return pretty_print_types(typelist, hierarchy)
Beispiel #25
0
    def _grid_glyphs(self, glyphs):
        x = self._x
        y = self._y
        
        K = self._K
        leading = self._leading
        FMX = self.font['__gridfont__'].character_index
        
        colored_chars = list(chain.from_iterable(zip_longest([], text, fillvalue=self._palatte.get(token, (0, 0, 0, 1))) for token, text in xml_lexer.get_tokens(''.join(self._CHARS))))
#        print(set(token for token, text in xml_lexer.get_tokens(''.join(self._CHARS))))
        lines = list(_linebreak(colored_chars, self._charlength))
        self._IJ = [0] + list(accumulate(len(l) for l, br in lines))
        self.y_bottom = y + leading * len(lines)
        
        y += leading
        xd = x + 30
        
        colored_text = {color: [] for color in self._palatte.values()}
        for l, line in enumerate(lines):
            for color, G in groupby(((FMX(character), xd + i*K, y + l*leading, color) for i, (color, character) in enumerate(line[0]) if character != '\n'),
                    key = lambda k: k[3]):
                try:
                    colored_text[color].extend((g, h, k) for g, h, k, c in G)
                except KeyError:
                    colored_text[color] = [(g, h, k) for g, h, k, c in G]
        
        N = zip(accumulate(line[1] for line in lines), enumerate(lines))
        numbers = chain.from_iterable(((FMX(character), x + i*K, y + l*leading) for i, character in enumerate(str(int(N)))) for N, (l, line) in N if line[1])
        colored_text[(0.7, 0.7, 0.7, 1)] = list(numbers)
        self._rows = len(lines)
        self._colored_text = colored_text
        
        #documentation
        """
Beispiel #26
0
def report(test_y, pred_y):
    lb = LabelBinarizer()
    test_y_combined = lb.fit_transform(list(chain.from_iterable(test_y)))
    pred_y_combined = lb.transform(list(chain.from_iterable(pred_y)))
    tagset = sorted(set(lb.classes_))
    class_indices = {cls: idx for idx, cls in enumerate(tagset)}
    print(classification_report(test_y_combined, pred_y_combined, labels=[class_indices[cls] for cls in tagset], target_names=tagset))
    def __init__(self, pdb_filename, pdb_id=None, surface_file=None, binders_file=None, clusters_report_file=None):
        '''
        Constructor
        '''
        assert os.path.isfile(pdb_filename)
        self.pdb_filename = os.path.abspath(pdb_filename)
        pdblines = open(self.pdb_filename).readlines()
        self.receptor_tmp_file = '/tmp/%d.pdb.tmp' % os.getpid()
#        print 'using receptor in %s' % self.receptor_tmp_file
        TMP_PDB = open(self.receptor_tmp_file, 'w')
        for line in pdblines:
            line = line.strip()
            if line.startswith('ATOM') and line[21].strip() == 'A': print >> TMP_PDB, line.strip()
        TMP_PDB.close()
        
        self.pdb_id = os.path.basename(self.pdb_filename).split('.')[0].upper()
        assert re.match(r'^[A-Za-z0-9]{4}$', self.pdb_id)
        self.polymer_obj = Polymer(self.receptor_tmp_file)
        if surface_file:
            self.surface_residues = set(chain.from_iterable([self.residues_with_num(int(line.split()[1])) for line in open(surface_file).readlines()]))
        if binders_file:
            self.binding_residues = set(chain.from_iterable([self.residues_with_num(int(line.split()[1])) for line in open(binders_file).readlines()]))
        if clusters_report_file:
            for line in open(clusters_report_file).readlines():
                if line.startswith('#'): continue
                line_split = line.split()
                cluster_num = int(line_split[1])
                cluster_residues = set(self.residues_with_num(map(int, line_split[-1].split(','))))
                self.clusters[cluster_num] = cluster_residues
                if binders_file:
                    self.true_binders[cluster_num] = cluster_residues.intersection(self.binding_residues)
Beispiel #28
0
def join(colls):
    colls, colls_copy = tee(colls)
    it = iter(colls_copy)
    try:
        dest = next(it)
    except StopIteration:
        return None
    cls = dest.__class__

    if isinstance(dest, basestring):
        return ''.join(colls)
    elif isinstance(dest, Mapping):
        result = dest.copy()
        for d in it:
            result.update(d)
        return result
    elif isinstance(dest, Set):
        return dest.union(*it)
    elif isinstance(dest, (Iterator, xrange)):
        return chain.from_iterable(colls)
    elif isinstance(dest, Iterable):
        # NOTE: this could be reduce(concat, ...),
        #       more effective for low count
        return cls(chain.from_iterable(colls))
    else:
        raise TypeError("Don't know how to join %s" % cls.__name__)
Beispiel #29
0
        def _check_ordering(cls):
            if not cls._meta.ordering:
                return []

            if not isinstance(cls._meta.ordering, (list, tuple)):
                return [checks.Error("'ordering' must be a tuple or list.", hint=None, obj=cls, id="models.E014")]

            fields = [f for f in cls._meta.ordering if f != "?"]
            fields = [f[1:] if f.startswith("-") else f for f in fields]
            fields = set(f for f in fields if f not in ("_order", "pk") and "__" not in f)

            valid_fields = set(chain.from_iterable((f.name, f.attname) for f in cls._meta.fields))
            valid_tfields = set(
                chain.from_iterable(
                    (f.name, f.attname)
                    for f in cls._meta.translations_model._meta.fields
                    if f.name not in ("master", "language_code")
                )
            )

            return [
                checks.Error(
                    "'ordering' refers to the non-existent field '%s' --hvad." % field,
                    hint=None,
                    obj=cls,
                    id="models.E015",
                )
                for field in fields - valid_fields - valid_tfields
            ]
def spreader_generator(blockpool, spread):
    """Returns an iter over an iter of iters, where the inner elements are interleaved at spread intervals.
    
    The tail has no such guarantees, as we're an eager bin packer.
    
    >>> jobs = [["a1", "a2", "a3"], ["b1"], ["c1", "c2", "c3"]]
    
    >>> list(spreader_generator(jobs, 1))
    ['a1', 'a2', 'a3', 'b1', 'c1', 'c2', 'c3']
    
    >>> list(spreader_generator(jobs, 2))
    ['a1', 'b1', 'a2', 'c1', 'a3', 'c2', 'c3']
    
    >>> list(spreader_generator(jobs, 3))
    ['a1', 'b1', 'c1', 'a2', 'c2', 'a3', 'c3']
    
    >>> list(spreader_generator(jobs, 4))
    ['a1', 'b1', 'c1', 'a2', 'c2', 'a3', 'c3']
    """
    # This sentinel object is unique to this function and can't equal anything
    # a user can put into the lists, so it's a safe "nothing" value for filler.
    sentinel = object()
    # We need a real iterator for our feeders to share
    blockpool_iter = iter(blockpool)
    # The feeders pick off blocks from blockpool_iter lazily and return
    # the elements of each group in turn
    feeders = [chain.from_iterable(blockpool_iter) for _ in range(spread)]
    # Now we'll zip our lazily-distributed spread-wide groups of jobs into stripes
    stripes = izip_longest(*feeders, fillvalue=sentinel)
    # and return all the values of the stripes in order
    flattened_spread = chain.from_iterable(stripes)
    # And we won't return the non-values from feeders that get the short straw
    not_sentinel = lambda x: x is not sentinel
    return ifilter(not_sentinel, flattened_spread)
Beispiel #31
0
 def convert_to_data(self, *args: Any, **kwargs: Any) -> ArrayLike:
     instruments = self._args + list(self._kwargs.values())
     ordered_args = args + tuple(kwargs[key] for key in self._kwargs.keys())  # Match the internal order of args
     # Process and flatten all args
     data = chain.from_iterable([instrument.process_arg(arg) for instrument, arg in zip(instruments, ordered_args)])
     return data
Beispiel #32
0
    def complete_chunked_upload(self, uuid, final_path, storage_metadata, force_client_side=False):
        self._initialize_cloud_conn()
        chunk_list = self._chunk_list_from_metadata(storage_metadata)

        # Here is where things get interesting: we are going to try to assemble this server side
        # In order to be a candidate all parts (after offsets have been computed) must be at least 5MB
        server_side_assembly = False
        if not force_client_side:
            server_side_assembly = True
            for chunk_offset, chunk in enumerate(chunk_list):
                # If the chunk is both too small, and not the last chunk, we rule out server side assembly
                if chunk.length < self.minimum_chunk_size and (chunk_offset + 1) < len(chunk_list):
                    server_side_assembly = False
                    break

        if server_side_assembly:
            logger.debug("Performing server side assembly of multi-part upload for: %s", final_path)
            try:
                # Awesome, we can do this completely server side, now we have to start a new multipart
                # upload and use copy_part_from_key to set all of the chunks.
                mpu = self.__initiate_multipart_upload(
                    final_path, content_type=None, content_encoding=None
                )
                updated_chunks = chain.from_iterable(
                    [_CloudStorage._rechunk(c, self.maximum_chunk_size) for c in chunk_list]
                )

                # [_PartUpload]
                upload_parts = []
                for index, chunk in enumerate(updated_chunks):
                    abs_chunk_path = self._init_path(chunk.path)

                    part = mpu.Part(index + 1)
                    part_copy = part.copy_from(
                        CopySource={"Bucket": self.get_cloud_bucket().name, "Key": abs_chunk_path},
                        CopySourceRange="bytes=%s-%s"
                        % (chunk.offset, chunk.length + chunk.offset - 1),
                    )

                    part_copy = self._perform_action_with_retry(
                        mpu.Part(index + 1).copy_from,
                        CopySource={"Bucket": self.get_cloud_bucket().name, "Key": abs_chunk_path},
                        CopySourceRange="bytes=%s-%s"
                        % (chunk.offset, chunk.length + chunk.offset - 1),
                    )

                    upload_parts.append(_PartUpload(index + 1, part_copy["CopyPartResult"]["ETag"]))

                self._perform_action_with_retry(
                    mpu.complete,
                    MultipartUpload={
                        "Parts": [
                            {"ETag": p.e_tag, "PartNumber": p.part_number} for p in upload_parts
                        ]
                    },
                )
            except (botocore.exceptions.ClientError, IOError) as ioe:
                # Something bad happened, log it and then give up
                msg = "Exception when attempting server-side assembly for: %s"
                logger.exception(msg, final_path)
                mpu.abort()
                raise ioe

        else:
            # We are going to turn all of the server side objects into a single file-like stream, and
            # pass that to stream_write to chunk and upload the final object.
            self._client_side_chunk_join(final_path, chunk_list)
Beispiel #33
0
if __name__ == '__main__':
    

    with open(settings.data.blacklist_path) as handle:
        blacklist = set(handle.read().split('\n'))

    data_file = settings.data.pairs_path
    print ('Reading {0}'.format(data_file))
    with open(data_file) as handle:
        reader = csv.reader(handle)
        pairs = ((question, answer) for question, answer in reader if not any(w in question for w in blacklist) and not any(w in answer for w in blacklist))

        print ('Building Frequency Distribution')
        vocabulary_size = settings.model.vocabulary_size - 4 # pad, start, end, unk
        freq_dist = FreqDist(chain.from_iterable(q.split() + a.split() for q, a in tqdm(pairs, total=3102698) ))

        print ('Total {0} unique words'.format(len(freq_dist)))
        word_counts = freq_dist.most_common(vocabulary_size)
        vocabulary = [word for word, count in word_counts]

        length = settings.model.sequence_length - 2 # start, end
        vocabulary_set = set(vocabulary)
        def remove_unknown(line):
            return ' '.join(word if word in vocabulary_set else UNK for word in line.split())

        unk_ratio = settings.data.unk_ratio
        def is_valid(line):
            words = line.split()
            return len(words) <= length and (words.count(UNK) / float(len(words))) < unk_ratio
        
def powerset(iterable):
    "powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
    s = list(iterable)
    return chain.from_iterable(combinations(s, r) for r in range(len(s)+1))
import networkx as nx
import matplotlib.pyplot as plt
from pyvis.network import Network
from itertools import chain

from colors import get_different_colors
from database import database_info, service_info
from elasticsearch_client import get_services_from_table
from string_utils import document_from_database_info
from tfidf_kmeans import tfidf_kmeans
from wordnet_network import get_wordnet_labels

foreign_key_graph = nx.Graph()

db_info = database_info()
connected_nodes = set(chain.from_iterable(
    [table['foreign_keys'] + [table['name']] for table in db_info if len(table['foreign_keys']) > 0]))
db_info = [info for info in db_info if info['name'] in connected_nodes]

documents = [document_from_database_info(info) for info in db_info]
k = 10
labels = tfidf_kmeans(documents, k)
# labels = get_wordnet_labels(documents)
vis_network = Network(height="100%", width="70%")

colors = get_different_colors(max(labels)+1)
# labels from k means
color_labels = [colors[l] for l in labels]
# services use this tables
# color_labels = ["#FF0000" if len(get_services_from_table(x['name'])) > 0 else "#DDDDDD" for x in db_info]

vis_network.add_nodes([x['name'] for x in db_info], color=color_labels)
Beispiel #36
0
 def func(s, iterator):
     return chain.from_iterable(imap(f, iterator))
Beispiel #37
0
 def grandchildren(self):
     return list(chain.from_iterable(c.children for c in self.children))
Beispiel #38
0
def _core_oscillators(difmats, assignment, adj_index, rev_index, verbose):
    """
    Given a list of diffusion matrices calculated during a flip-flop state,
    this function identifies core oscillators as well as their anti-correlated partners.

    Parameters
    ----------
    :param difmats: Diffusion matrices during flip-flop state
    :param assignment: Cluster assignment
    :param adj_index: Dictionary for indexing
    :param rev_index: Dictionary for indexing
    :param verbose: Verbosity level of function
    :return: Tuple with list of oscillators and dictionary of anti-correlated oscillators
    """
    oscillators = list()
    oscillators_series = list()
    for index in range(len(assignment)):
        # node amplitude is NOT correlated to position in network
        seq = difmats[:, index, index]
        ampli = np.max(seq) - np.min(seq)
        if ampli > 0.5:
            # if the amplitude is this large,
            # the node may be an oscillator
            # in that case, mean amplitude may be low
            oscillators.append(index)
            oscillators_series.append(seq)
    oscillators = [rev_index[x] for x in oscillators]
    if verbose:
        logger.info('Found the following strong oscillators: ' +
                    str(oscillators))
    amplis = dict()
    clusdict = dict.fromkeys(oscillators)
    for x in clusdict:
        clusdict[x] = assignment[adj_index[x]]
    # we find anti-correlated oscillator nodes
    # there should be at least one node represented for each cluster
    for pair in combinations(range(len(oscillators)), 2):
        total = oscillators_series[pair[0]] - oscillators_series[pair[1]]
        # need to be careful with this number,
        # the core oscillators should converge to 1 and -1
        # but may stick a little below that value
        amplis[(oscillators[pair[0]],
                oscillators[pair[1]])] = (np.max(total) - np.min(total))
    # need to find the largest anti-correlation per cluster
    clus_corrs = dict.fromkeys(set(assignment), 0)
    clus_nodes = dict.fromkeys(set(assignment))
    for corr in amplis:
        cluster1 = clusdict[corr[0]]
        cluster2 = clusdict[corr[1]]
        if amplis[corr] > clus_corrs[cluster1]:
            clus_nodes[cluster1] = corr
            clus_corrs[cluster1] = amplis[corr]
        if amplis[corr] > clus_corrs[cluster2]:
            clus_nodes[cluster2] = corr
            clus_corrs[cluster2] = amplis[corr]
    clus_nodes = {k: v for k, v in clus_nodes.items() if v is not None}
    # it is possible for clusters to not have a strong oscillator
    core_oscillators = set(list(chain.from_iterable(list(
        clus_nodes.values()))))
    id_corrs = dict.fromkeys(core_oscillators, 0)
    anti_sizes = dict.fromkeys(core_oscillators, 0)
    for nodes in combinations(core_oscillators, 2):
        try:
            size = amplis[nodes]
        except KeyError:
            size = amplis[(nodes[1], nodes[0])]
        if size > anti_sizes[nodes[0]]:
            id_corrs[nodes[0]] = nodes[1]
            anti_sizes[nodes[0]] = size
        if size > anti_sizes[nodes[1]]:
            id_corrs[nodes[1]] = nodes[0]
            anti_sizes[nodes[1]] = size
    [
        clusdict.pop(x) for x in list(clusdict.keys())
        if x not in core_oscillators
    ]
    anti_corrs = dict()
    for core in core_oscillators:
        anti_corrs[clusdict[core]] = clusdict[id_corrs[core]]
    # oscillator is defined as strongest anti-correlation
    return core_oscillators, anti_corrs
list formated graph.  Looking at the table, one can see that the # of neighbors
is simply how many times the node is present.  Therefore, one could simply convert
the array into a list and count the occurances of each node.
'''

# allows use of Python3.X print functionality
from __future__ import print_function
from collections import Counter
from itertools import chain

edges = []
with open('data/rosalind_deg.txt', 'r') as f:
    # Skip first line
    f.next()  # NOTE - use next(f) in Python 3.X
    for line in f:
        edges.append(line.strip().split())
f.close()

my_list = []
for x in chain.from_iterable(edges):  # flatten the lists
    my_list.append(x)

d = Counter(my_list)  # count how many times each number present

o = open("output/Algorithmic_003_DEG.txt", 'w')
for key in sorted(
        d, key=int
):  # because keys are numbers, must be converted to int for sorting
    print(d[key], end=" ", file=o)
o.close()
 def render(self):
     return mark_safe('\n'.join(
         chain.from_iterable(
             getattr(self, 'render_' + name)() for name in MEDIA_TYPES)))
Beispiel #41
0
    def cache_pupil_timeline_data(
        self,
        key: str,
        detector_tag: str,
        ylim=None,
        fallback_detector_tag: T.Optional[str] = None,
    ):
        world_start_stop_ts = [
            self.g_pool.timestamps[0], self.g_pool.timestamps[-1]
        ]
        if not self.g_pool.pupil_positions:
            self.cache[key] = {
                "left": [],
                "right": [],
                "xlim": world_start_stop_ts,
                "ylim": [0, 1],
            }
        else:
            ts_data_pairs_right_left = [], []
            for eye_id in (0, 1):
                pupil_positions = self.g_pool.pupil_positions[eye_id,
                                                              detector_tag]
                if not pupil_positions and fallback_detector_tag is not None:
                    pupil_positions = self.g_pool.pupil_positions[
                        eye_id, fallback_detector_tag]
                if pupil_positions:
                    t0, t1 = (
                        pupil_positions.timestamps[0],
                        pupil_positions.timestamps[-1],
                    )
                    timestamps_target = np.linspace(t0,
                                                    t1,
                                                    NUMBER_SAMPLES_TIMELINE,
                                                    dtype=np.float32)

                    data_indeces = pm.find_closest(pupil_positions.timestamps,
                                                   timestamps_target)
                    data_indeces = np.unique(data_indeces)
                    for idx in data_indeces:
                        ts_data_pair = (
                            pupil_positions.timestamps[idx],
                            pupil_positions[idx][key],
                        )
                        ts_data_pairs_right_left[eye_id].append(ts_data_pair)

            if ylim is None:
                # max_val must not be 0, else gl will crash
                all_pupil_data_chained = chain.from_iterable(
                    ts_data_pairs_right_left)
                try:
                    # Outlier removal based on:
                    # https://en.wikipedia.org/wiki/Outlier#Tukey's_fences
                    min_val, max_val = np.quantile(
                        [pd[1] for pd in all_pupil_data_chained], [0.25, 0.75])
                    iqr = max_val - min_val
                    min_val -= 1.5 * iqr
                    max_val += 1.5 * iqr
                    ylim = min_val, max_val
                except IndexError:  # no pupil data available
                    ylim = 0.0, 1.0

            self.cache[key] = {
                "right": ts_data_pairs_right_left[0],
                "left": ts_data_pairs_right_left[1],
                "xlim": world_start_stop_ts,
                "ylim": ylim,
            }
Beispiel #42
0
        label_name = consume(tokens)
        if peek_or_terminal(tokens) == TOKENS.COLON:
            return symbol_table['__ labeled_statement __'](chain(
                (label_name, ), consume_all(tokens)), symbol_table)
            # return label_stmnt(label_name, statement(tokens, symbol_table))
        # it must be an expression, TODO: figure out a way without using dangerous chain!
        # tokens = chain((label_name, consume(tokens)), tokens)
        tokens = chain((label_name, ), consume_all(tokens))
        expr, _ = symbol_table['__ expression __'](
            tokens, symbol_table), error_if_not_value(tokens, TOKENS.SEMICOLON)
        return repeat(expr, 1)

    if peek_or_terminal(tokens) is not terminal:
        expr, _ = symbol_table['__ expression __'](
            tokens, symbol_table), error_if_not_value(tokens, TOKENS.SEMICOLON)
        return repeat(expr, 1)

    raise ValueError(
        '{l} No rule could be found to create statement, got {got}'.format(
            l=loc(peek(tokens, EOFLocation)), got=peek(tokens, '')))


statement_funcs = labeled_statement, selection_statement, iteration_statement, jump_statement
set_rules(
    statement,
    chain(
        chain.from_iterable(
            imap(izip, imap(rules, statement_funcs),
                 imap(repeat, statement_funcs))),
        ((TOKENS.LEFT_BRACE, _comp_stmnt),
         (TOKENS.SEMICOLON, _empty_statement))))
Beispiel #43
0
def collect_frequent(
    it: Iterator[List[Tuple[str, List[Any]]]],
    threshold: float,
    permutations: int,
    decay: float,
    min_freq: int,
    decay_filter: str,
    wrap_filter: str,
    bootstrap_prefix: str = None
) -> Generator[Tuple[str, PDict], None, None]:  # noqa
    """
    Reads all the documents (as returned by :func:`minhash_group`) and
    collects the frequent paragraphs from them on a per-domain basis.

    TODO: reference to MMDS

    Yields (domain, `PDict`) tuples per domain.

    :param it: an iterator that yields documents as in :func:`minhash_group`;
               i.e. URL -- paragraph minhash list tuples.
    :param threshold: the Jaccard similarity threshold for paragraph identity.
    :param permutations: the number of permutations per paragraph.
    :param decay: the decay (multiplication) constant used for scoring
                  paraphraphs.
    :param min_freq: the minimum number of occurrence from which a paragraph
                     is deemed frequent.
    :param decay_filter: decay expression that is used to filter paragraphs
                         after each step.
    :param wrap_filter: expression that is used to filter paragraphs after all
                        documents have been processed.
    :param bootstrap_prefix: prefix of an existing .pdata/.pdi file pair to
                             bootstrap the domain frequency counts with.
    """
    curr_domain = None

    if bootstrap_prefix:
        bootstrap = RandomPDataReader(bootstrap_prefix)
        logging.debug('Bootstrap file prefix: {}'.format(bootstrap_prefix))
    else:
        bootstrap = None

    try:
        fc = FrequentCollector(threshold, permutations, decay, min_freq,
                               bootstrap, decay_filter, wrap_filter)
        # I don't want to write all the domain != curr_domain stuff twice, so
        # let's add a sentinel record to the end.
        for url, mhs in chain(chain.from_iterable(it), [('', [])]):
            domain = urlsplit(url).netloc

            # A new domain: yield results and re-initialize everything
            if domain != curr_domain:
                # Filtering and yielding results
                if curr_domain is not None:
                    fc.wrap_up_domain()

                    logging.debug('Finished collecting frequent paragraphs '
                                  'from {}...'.format(curr_domain))
                    if fc.freq_ps:
                        logging.debug(
                            'Found {} frequent paragraphs (duplicates: '
                            '{}) in domain {} ({} documents).'.format(
                                len(fc.freq_ps), fc.num_dup, curr_domain,
                                fc.stats.docs))

                    # The domain is returned as well, so that we know what the
                    # input was
                    yield curr_domain, fc.freq_ps, fc.stats

                # Check for the sentinel
                if not domain:
                    break

                # Re-initialization
                logging.debug(
                    'Collecting frequent paragraphs from {}...'.format(domain))
                curr_domain = domain
                fc.reset(curr_domain)

            fc.collect_from_doc(url, mhs)
    finally:
        if bootstrap:
            bootstrap.close()
Beispiel #44
0
    async def load_definitions(self):
        if "definitions" not in self.mem_map:
            return {}
        logger.info("Updating Definitions from Panel")

        data = defaultdict(dict)
        try:
            def_parsers = self.get_message("DefinitionsParserMap")

            definitions = self.mem_map["definitions"]
            for elem_type in definitions:
                if elem_type not in def_parsers:
                    logger.warning("No parser for %s definitions", elem_type)
                    continue

                start_time = time()
                parser = def_parsers[elem_type]
                if isinstance(parser, typing.Callable):
                    parser = parser(self.settings)

                assert isinstance(parser, Construct)
                elem_def = definitions[elem_type]
                enabled_indexes = set()

                addresses = enumerate(chain.from_iterable(
                    elem_def["addresses"]),
                                      start=1)

                async for index, raw_data in self._eeprom_batch_reader(
                        addresses, parser.sizeof()):
                    element = parser.parse(raw_data)

                    if cfg.LOGGING_DUMP_MESSAGES:
                        logger.debug(
                            f"EEPROM parsed ({elem_type}/{index}): {element}")
                    if elem_def.get("bit_encoded"):
                        for elem_index, elem_data in element.items():
                            definition = elem_data.get("definition")
                            data_index = (index -
                                          1) * len(element) + elem_index
                            data[elem_type][data_index] = elem_data
                            if definition != "disabled":
                                enabled_indexes.add(data_index)
                    else:
                        data[elem_type][index] = element
                        definition = element.get("definition")
                        if definition != "disabled":
                            enabled_indexes.add(index)

                cfg.LIMITS[elem_type] = get_limits_for_type(
                    elem_type, list(enabled_indexes))
                cfg.LIMITS[elem_type] = list(
                    set(cfg.LIMITS[elem_type]).intersection(enabled_indexes))

                logger.info(
                    f"{elem_type.title()} definitions loaded ({round(time() - start_time, 2)}s)"
                )

        except ResourceWarning:
            pass

        return construct_free(data)
Beispiel #45
0
    def _fetch_artifacts(self, local_override_versions):
        """Download jars from maven repo into the artifact cache dir, then symlink them into our workdir."""

        products = self.context.products
        # Coordinate -> set(relative path to symlink of artifact in symlink farm)
        coord_to_artifact_symlinks = defaultdict(set)

        # Demanded by some downstream tasks
        safe_mkdir(self.pom_cache_dir)
        products.safe_create_data('ivy_cache_dir', lambda: self.pom_cache_dir)

        coords = set(
            Coordinate(*t) for t in chain.from_iterable(
                self.target_to_maven_coordinate_closure.values()))
        artifacts_to_download = set()
        for coord in coords:
            for artifact in self.maven_coordinate_to_provided_artifacts[coord]:
                # Sanity check. At this point, all artifacts mapped to a coord should be fully resolved, location included.
                if artifact.repo_url is None:
                    raise Exception(
                        "Something went wrong! {} was mapped to an artifact {} with no "
                        "associated repo: ".format(coord, artifact))
                cached_artifact_path = os.path.join(self.pom_cache_dir,
                                                    artifact.artifact_path)

                if not os.path.exists(cached_artifact_path):
                    artifacts_to_download.add(artifact)
        self._download_artifacts(artifacts_to_download)

        # TODO(mateo): Rename. I think that Foursquare still needs this product but it is a deprecated concept upstream.
        # There is no ivy involved in this anymore.
        ivy_symlink_map = self.context.products.get_data(
            'ivy_resolve_symlink_map', dict)
        safe_mkdir(self.artifact_symlink_dir, clean=True)
        for coord in coords:
            for artifact in self.maven_coordinate_to_provided_artifacts[coord]:
                local_override_key = (artifact.groupId, artifact.artifactId)
                if local_override_key not in local_override_versions:
                    cached_artifact_path = os.path.realpath(
                        os.path.join(self.pom_cache_dir,
                                     artifact.artifact_path))
                else:
                    cached_artifact_path = os.path.realpath(
                        local_override_versions[local_override_key])
                    if not os.path.exists(cached_artifact_path):
                        raise Exception(
                            'Local override for {} at {} does not exist.'.
                            format(artifact, cached_artifact_path))
                    # TODO(mateo): Use regular logging for info - print is not great and does not respect --quiet.
                    # But atm, log levels for pom-resolve is either overwhelming or not enough info. pom-resolve should probably
                    # be broken up into multiple tasks some day. This setting is only used for local development in any case.
                    print("\n* Using local override for {}:\n\t{}".format(
                        artifact, cached_artifact_path))
                symlinked_artifact_path = os.path.join(
                    self.artifact_symlink_dir, artifact.artifact_path)
                safe_mkdir(os.path.dirname(symlinked_artifact_path))

                try:
                    os.symlink(cached_artifact_path, symlinked_artifact_path)
                except OSError as e:
                    if e.errno != errno.EEXIST:
                        raise
                    existing_symlink_target = os.readlink(
                        symlinked_artifact_path)
                    if existing_symlink_target != cached_artifact_path:
                        raise Exception(
                            'A symlink already exists for artifact {}, but it points to the wrong path.\n'
                            'Symlink: {}\n'
                            'Destination of existing symlink: {}\n'
                            'Where this symlink should point: {}\n'.format(
                                artifact, symlinked_artifact_path,
                                existing_symlink_target, cached_artifact_path))
                ivy_symlink_map[cached_artifact_path] = symlinked_artifact_path
                coord_to_artifact_symlinks[artifact] = symlinked_artifact_path
        return coord_to_artifact_symlinks
tf.flags.DEFINE_boolean("log_device_placement", False,
                        "Log placement of ops on devices")
tf.flags.DEFINE_string("output_file", "single_scores.csv",
                       "Name of output file for final bAbI accuracy scores.")

FLAGS = tf.flags.FLAGS

print("Started Task:", FLAGS.task_id)

# task data
train, test = load_task(FLAGS.data_dir, FLAGS.task_id)
data = train + test

vocab = sorted(
    reduce(lambda x, y: x | y,
           (set(list(chain.from_iterable(s)) + q + a) for s, q, a in data)))
word_idx = dict((c, i + 1) for i, c in enumerate(vocab))

max_story_size = max(map(len, (s for s, _, _ in data)))
mean_story_size = int(np.mean(map(len, (s for s, _, _ in data))))
sentence_size = max(map(len, chain.from_iterable(s for s, _, _ in data)))
query_size = max(map(len, (q for _, q, _ in data)))
memory_size = min(FLAGS.memory_size, max_story_size)
vocab_size = len(word_idx) + 1  # +1 for nil word
sentence_size = max(query_size, sentence_size)  # for the position

print("Longest sentence length", sentence_size)
print("Longest story length", max_story_size)
print("Average story length", mean_story_size)

# train/validation/test sets
Beispiel #47
0
    def read_trials(self, 
        start_re = 'MSG\t([\d\.]+)\ttrial (\d+) started at (\d+.\d)', 
        stop_re = 'MSG\t([\d\.]+)\ttrial (\d+) stopped at (\d+.\d)', 
        phase_re = 'MSG\t([\d\.]+)\ttrial X phase (\d+) started at (\d+.\d)',
        parameter_re = 'MSG\t[\d\.]+\ttrial X parameter[\t ]*(\S*?)\s+: ([-\d\.]*|[\w]*)'):
        
        """
        read_trials reads in trials from the message file,
        constructing timings and parameters for each of the trials,
        their phases and their parameters.
        It reads the actual values to internal variables, and also 
        creates dictionaries that will indicate the formats needed
        when creating the hfd5 file
        """
        
        self.logger.info('reading trials from %s', os.path.split(self.message_file)[-1])
        self.get_message_string()
        
        #
        # read the trials themselves
        #
        self.start_trial_strings = re.findall(re.compile(start_re), self.message_string)
        self.stop_trial_strings = re.findall(re.compile(stop_re), self.message_string)
        
        if len(self.start_trial_strings) > 0:    # check whether there are any trials here. 
            
            self.trial_starts = np.array([[float(s[0]), int(s[1]), float(s[2])] for s in self.start_trial_strings])
            self.trial_ends = np.array([[float(s[0]), int(s[1]), float(s[2])] for s in self.stop_trial_strings])
            self.nr_trials = int(self.trial_ends[-1,1])+1
            
            # remove duplicate rows:
            self.trial_starts = np.vstack([self.trial_starts[self.trial_starts[:,1]==t,:][0,:] for t in range(self.nr_trials)])            
            self.trial_ends = np.vstack([self.trial_ends[self.trial_ends[:,1]==t,:][0,:] for t in range(self.nr_trials)])

            # # sometimes we have twice as many trial starts as trial ends!
            # if 2 * len(self.trial_starts) == len(self.trial_ends):
            #     self.trial_ends = self.trial_ends[::2]

            # # due to early task abortion we can have more trial starts than trial ends:
            # if abs(len(self.trial_starts) - len(self.trial_ends)) == 1:
            #     self.trial_starts = self.trial_starts[:-2]
            #     self.trial_ends = self.trial_ends[:len(self.trial_starts)]
            
            # self.nr_trials = len(self.trial_starts)
            self.trials = np.hstack((self.trial_starts, self.trial_ends))
            
            # create a dictionary for the types of timing informations we'd like to look at
            self.trial_type_dictionary = [('trial_start_EL_timestamp', np.float64), ('trial_start_index',np.int32), ('trial_start_exp_timestamp',np.float64), ('trial_end_EL_timestamp',np.float64), ('trial_end_index',np.int32), ('trial_end_exp_timestamp',np.float64)]
            
            self.trials = [{'trial_start_EL_timestamp': tr[0], 'trial_start_index': tr[1], 'trial_start_exp_timestamp': tr[2], 'trial_end_EL_timestamp': tr[3], 'trial_end_index': tr[4], 'trial_end_exp_timestamp': tr[5]} for tr in self.trials]
            
            self.trial_type_dictionary = np.dtype(self.trial_type_dictionary)
            #
            # trial phases 
            #
            self.trial_phases = []
            for i in range(self.nr_trials):
                this_trial_re = phase_re.replace(' X ', ' ' + str(i) + ' ')
                phase_strings = re.findall(re.compile(this_trial_re), self.message_string)
                self.trial_phases.append([[int(i), float(s[0]), int(s[1]), float(s[2])] for s in phase_strings])
            self.trial_phases = list(chain.from_iterable(self.trial_phases))
            self.trial_phases = [{'trial_phase_trial': tr[0], 'trial_phase_EL_timestamp': tr[1], 'trial_phase_index': tr[2], 'trial_phase_exp_timestamp': tr[3]} for tr in self.trial_phases]
            self.nr_trial_phases = len(self.trial_phases)
            
            self.trial_phase_type_dictionary = [('trial_phase_trial', np.float64), ('trial_phase_EL_timestamp',np.int32), ('trial_phase_index',np.float64), ('trial_phase_exp_timestamp',np.float64)]
            self.trial_phase_type_dictionary = np.dtype(self.trial_phase_type_dictionary)
            
            # now adjust the trial type dictionary and convert into a numpy dtype
            # self.trial_type_dictionary.append(('trial_phase_timestamps', np.float64, (self.nr_phase_starts.max(), 3)))
        else:
            self.logger.info('no trial or phase information in edf file %s'%self.input_file_name)
            self.nr_trials = 0
        
        #
        # parameters 
        #

        self.message_string = self.message_string.replace(' [','').replace('.]','')
        
        parameters = []
        for i in range(self.nr_trials):
            this_re = parameter_re.replace(' X ', ' ' + str(i) + ' ')
            parameter_strings = re.findall(re.compile(this_re), self.message_string)
            
            # check if double params:
            param_names = np.array([p[0] for p in parameter_strings])
            try:
                nr_double_trials = sum(param_names == param_names[0])
            
                # we have double trials -- custom procedure!:
                if nr_double_trials > 1:
                    nr_params = int(len(param_names) / nr_double_trials)
                    nr_param = 0
                    parameter_strings2 = []
                    for d in range(nr_double_trials):
                        parameter_strings2.append( parameter_strings[nr_param:nr_param+nr_params] )
                        nr_param += nr_params
                    for d in parameter_strings2:
                        # assuming all these parameters are numeric
                        this_trial_parameters = {'trial_nr': float(i)}
                        for s in d:
                            try:
                                this_trial_parameters.update({s[0]: float(s[1])})
                            except ValueError:
                                pass
                        parameters.append(this_trial_parameters)
                
                # we don't have double trial -- standard procedure!
                else:
                    if len(parameter_strings) > 0:
                        # assuming all these parameters are numeric
                        this_trial_parameters = {'trial_nr': float(i)}
                        for s in parameter_strings:
                            try:
                                this_trial_parameters.update({s[0]: float(s[1])})
                            except ValueError:
                                pass
                        parameters.append(this_trial_parameters)
            except:
                pass
        
        if len(parameters) > 0:        # there were parameters in the edf file
            self.parameters = parameters
            ptd = [(k, np.float64) for k in np.unique(np.concatenate([list(k.keys()) for k in self.parameters]))]
            self.parameter_type_dictionary = np.dtype(ptd)
        else: # we have to take the parameters from the output_dict pickle file of the same name as the edf file. 
            self.logger.info('no parameter information in edf file')
Beispiel #48
0
def coset_enumeration_c(fp_grp,
                        Y,
                        max_cosets=None,
                        draft=None,
                        incomplete=False):
    """
    >>> from sympy.combinatorics.free_groups import free_group
    >>> from sympy.combinatorics.fp_groups import FpGroup, coset_enumeration_c
    >>> F, x, y = free_group("x, y")
    >>> f = FpGroup(F, [x**3, y**3, x**-1*y**-1*x*y])
    >>> C = coset_enumeration_c(f, [x])
    >>> C.table
    [[0, 0, 1, 2], [1, 1, 2, 0], [2, 2, 0, 1]]

    """
    # Initialize a coset table C for < X|R >
    X = fp_grp.generators
    R = fp_grp.relators
    C = CosetTable(fp_grp, Y, max_cosets=max_cosets)
    if draft:
        C.table = draft.table[:]
        C.p = draft.p[:]
        C.deduction_stack = draft.deduction_stack
        for alpha, x in product(range(len(C.table)), X):
            if not C.table[alpha][C.A_dict[x]] is None:
                C.deduction_stack.append((alpha, x))
    A = C.A
    # replace all the elements by cyclic reductions
    R_cyc_red = [rel.identity_cyclic_reduction() for rel in R]
    R_c = list(chain.from_iterable((rel.cyclic_conjugates(), (rel**-1).cyclic_conjugates()) \
            for rel in R_cyc_red))
    R_set = set()
    for conjugate in R_c:
        R_set = R_set.union(conjugate)
    # a list of subsets of R_c whose words start with "x".
    R_c_list = []
    for x in C.A:
        r = set([word for word in R_set if word[0] == x])
        R_c_list.append(r)
        R_set.difference_update(r)
    for w in Y:
        C.scan_and_fill_c(0, w)
    for x in A:
        C.process_deductions(R_c_list[C.A_dict[x]], R_c_list[C.A_dict_inv[x]])
    alpha = 0
    while alpha < len(C.table):
        if C.p[alpha] == alpha:
            try:
                for x in C.A:
                    if C.p[alpha] != alpha:
                        break
                    if C.table[alpha][C.A_dict[x]] is None:
                        C.define_c(alpha, x)
                        C.process_deductions(R_c_list[C.A_dict[x]],
                                             R_c_list[C.A_dict_inv[x]])
            except ValueError as e:
                if incomplete:
                    return C
                raise e
        alpha += 1
    return C
def powerset(s):
    s = list(s)
    return set(
        chain.from_iterable(combinations(s, r) for r in range(len(s) + 1)))
Beispiel #50
0
def stability_curve_time_space_splitter(
        train_data: pd.DataFrame,
        training_time_limit: DateType,
        space_column: str,
        time_column: str,
        freq: str = 'M',
        space_hold_percentage: float = 0.5,
        random_state: int = None,
        min_samples: int = 1000) -> SplitterReturnType:
    """
    Splits the data into temporal buckets given by the specified frequency.
    Training set is fixed before hold out and uses a rolling window hold out set.
    Each fold moves the hold out further into the future.
    Useful to see how model performance degrades as the training data gets more
    outdated. Folds are made so that NONE of the IDs in the holdout appears in
    the training set.

    Parameters
    ----------
    train_data : pandas.DataFrame
        A Pandas' DataFrame that will be split for stability curve estimation.

    training_time_limit : str
        The Date String for the end of the testing period. Should be of the same
        format as `time_column`

    space_column : str
        The name of the ID column of `train_data`

    time_column : str
        The name of the Date column of `train_data`

    freq : str
        The temporal frequency.
        See: http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases

    space_hold_percentage : float
        The proportion of hold out IDs

    random_state : int
        A seed for the random number generator for ID sampling across train and
        hold out sets.

    min_samples : int
        The minimum number of samples required in the split to keep the split.
    """
    train_data = train_data.reset_index()

    rng = check_random_state(random_state)

    train_time = train_data[
        train_data[time_column] <= training_time_limit][time_column]
    train_index = train_time.index.values
    train_space = train_data.iloc[train_index][space_column].unique()

    held_space = rng.choice(train_space,
                            int(len(train_space) * space_hold_percentage),
                            replace=False)

    test_data = train_data[(train_data[time_column] > training_time_limit)
                           & (~train_data[space_column].isin(held_space))]
    train_index = train_data[
        (train_data[time_column] <= training_time_limit)
        & (train_data[space_column].isin(held_space))].index.values

    first_test_moment = test_data[time_column].min()
    last_test_moment = test_data[time_column].max()

    logs, test_indexes = _get_sc_test_fold_idx_and_logs(
        test_data, train_time, time_column, first_test_moment,
        last_test_moment, min_samples, freq)

    # From "list of dicts" to "dict of lists" hack:
    logs = [{k: [dic[k] for dic in logs] for k in logs[0]}]

    # Flatten test_indexes:
    flattened_test_indices = list(chain.from_iterable(test_indexes))

    return [(train_index, flattened_test_indices)], logs
                    help = "File to store distributions in. Pickle format will be used. Default is 'distributions.pickle'")
parser.add_argument("--non_paternity", "-np", type = float, default = 0.0,
                    help = "Non paternity rate for the adversary to assume.")
parser.add_argument("--to_json", default = None,
                    help = "If this flag is present, will instead store the population as json for faster computation in another language")

args = parser.parse_args()

print("Loading population")
with open(args.population_file, "rb") as pickle_file:
    population = PopulationUnpickler(pickle_file).load()
fix_twin_parents(population)

if not args.recover:
    potentially_labeled = list(chain.from_iterable([generation.members
                                                    for generation
                                                    in population.generations[-3:]]))
    if args.num_labeled_nodes <= 0:
        num_labeled_nodes = population.size // 100
    else:
        num_labeled_nodes = args.num_labeled_nodes
    labeled_nodes = sample(potentially_labeled, num_labeled_nodes)
else:
    print("Recovering run")
    labeled_nodes = [population.id_mapping[int(filename)]
                     for filename in listdir(args.work_dir)]

if args.to_json:
    num_generations = population.num_generations
    clear_index = max(num_generations - args.gen_back, 0)
    to_clear = population.generations[clear_index].members
Beispiel #52
0
# routes is a list of all possible (x,y) tuples where (x,y)
# is a space. (i.e. the list of all valid points)
routes = {(x, y)
          for x in range(width) for y in range(height)
          if lines[y][x].isspace()}

# This is the clever bit of the code, you parse through the input
# string and for each instruction you add a lambda function to the
# list which when given the current state of the system returns the
# next state. The state is represented by 4 variables, x, y, dx, dy
# where x and y is the current position and dx, dy encode the direction
raw_path = input().strip()
path = list(
    chain.from_iterable({
        'l': lambda _: [lambda x, y, dx, dy: (x, y, dy, -dx)],
        'r': lambda _: [lambda x, y, dx, dy: (x, y, -dy, dx)]
    }.get(entry, lambda ct: [lambda x, y, dx, dy:
                             (x + dx, y + dy, dx, dy)] * int(ct))(entry)
                        for entry in re.findall('[rl]|[0-9]+', raw_path)))

test = list(
    chain.from_iterable({
        'l': lambda _: ['left'],
        'r': lambda _: ['right']
    }.get(entry, lambda ct: ['straight'] * int(ct))(entry)
                        for entry in re.findall('[rl]|[0-9]+', raw_path)))


def get_entry(t):
    return [t]

Beispiel #53
0
 def words(self, doc_id: Hashable = None, sort=False) -> list[str]:
     words = self._documents[doc_id] if doc_id else list(chain.from_iterable(self._documents.values()))
     if sort:
         return sorted(words)
     return words
Beispiel #54
0
    def __init__(self, annotations, struct, json_annotations, ext):
        """
        Describes the property list for a struct
        Also create a list of c_ast.Decl to append to the struct decls
        """
        self.json_annotations = json_annotations
        self.annotated_properties = None
        self.annotations = annotations
        self.ext = ext
        self.init_list = None
        self.decls = None
        self.struct = struct
        self.extra_decls = None

        def make_extra_decl(name, t):
            idtype = c_ast.IdentifierType([t])
            td = c_ast.TypeDecl(name, [], idtype)
            return c_ast.Decl(
                name,
                [],  # quals
                [],  # storage
                [],  # funcspec
                td,  # type
                None,  # init
                None,  # bitsize
            )

        fi = getframeinfo(currentframe())
        annotated_properties = [
            AnnotatedProperty(self, d) for d in struct.decls
        ]

        out_ap = []
        for ap in annotated_properties:
            inline_annotation = ap.values.get('inline', False)
            if inline_annotation:
                astruct = self.inline_struct_annotated(inline_annotation,
                                                       ap.decl)
                out_ap += astruct.annotated_properties
            else:
                out_ap.append(ap)

        self.annotated_properties = out_ap

        init_lists = [
            ap.init_list for ap in out_ap
            # 'private' and 'inline' have no init_list
            if ap.init_list is not None
        ]

        # NULL terminator
        init_lists.append(c_ast.InitList([c_ast.Constant('int', '0')]))

        self.init_list = c_ast.InitList(init_lists,
                                        Coord(fi.filename, fi.lineno))

        decls = [ap.decl for ap in out_ap]

        extra_decls = chain.from_iterable(
            (ap.extra_decls.iteritems() for ap in out_ap))
        extra_decls = [make_extra_decl(name, t) for name, t in extra_decls]

        decls += extra_decls

        self.decls = decls
Beispiel #55
0
 def test_all_variables_included(self):
     for ast, var_names_locs in self.task.graphs_and_instances:
         locations = list(
             chain.from_iterable([i[1] for i in var_names_locs]))
         self.assertCountEqual(
             locations, [i[0] for i in ast.nodes_that_represent_variables])
Beispiel #56
0
def load_ndarray(fpath, celltype=None):
    print(" - reading", fpath)
    # FIXME: implement celltype
    a = la.read_csv(fpath, dialect='liam2')
    # print(a.info)
    return a
    with open(fpath, "rb") as f:
        reader = csv.reader(f)
        line_stream = skip_comment_cells(strip_rows(reader))
        header = line_stream.next()
        str_table = []
        for line in line_stream:
            if any(value == '' for value in line):
                raise Exception("empty cell found in %s" % fpath)
            str_table.append(line)
    ndim = len(header)

    # handle last dimension header (horizontal values)
    last_d_header = str_table.pop(0)
    # auto-detect type of values for the last d and convert them
    last_d_pvalues = convert_1darray(last_d_header)

    unique_last_d, dupe_last_d = unique_duplicate(last_d_pvalues)
    if dupe_last_d:
        print(("Duplicate column header value(s) (for '%s') in '%s': %s"
              % (header[-1], fpath,
                 ", ".join(str(v) for v in dupe_last_d))))
        raise Exception("bad data in '%s': found %d "
                        "duplicate column header value(s)"
                        % (fpath, len(dupe_last_d)))

    # handle other dimensions header

    # strip the ndim-1 first columns
    headers = [[line.pop(0) for line in str_table]
               for _ in range(ndim - 1)]
    headers = [convert_1darray(pvalues_str) for pvalues_str in headers]
    if ndim > 1:
        # having duplicate values is normal when there are more than 2
        # dimensions but we need to test whether there are duplicates of
        # combinations.
        dupe_combos = list(duplicates(zip(*headers)))
        if dupe_combos:
            print(("Duplicate row header value(s) in '%s':" % fpath))
            print((PrettyTable(dupe_combos)))
            raise Exception("bad alignment data in '%s': found %d "
                            "duplicate row header value(s)"
                            % (fpath, len(dupe_combos)))

    possible_values = [np.array(list(unique(pvalues))) for pvalues in headers]
    possible_values.append(np.array(unique_last_d))

    shape = tuple(len(values) for values in possible_values)
    num_possible_values = prod(shape)

    # transform the 2d table into a 1d list
    str_table = list(chain.from_iterable(str_table))
    if len(str_table) != num_possible_values:
        raise Exception("incoherent data in '%s': %d data cells "
                        "found while it should be %d based on the number "
                        "of possible values in headers (%s)"
                        % (fpath,
                           len(str_table),
                           num_possible_values,
                           ' * '.join(str(len(values))
                                      for values in possible_values)))

    # TODO: compare time with numpy built-in conversion:
    # if dtype is None, numpy tries to detect the best type itself
    # which it does a good job of if the values are already numeric values
    # if dtype is provided, numpy does a good job to convert from string
    # values.
    if celltype is None:
        celltype = detect_column_type(str_table)
    data = convert_1darray(str_table, celltype)
    array = np.array(data, dtype=celltype)
    return la.LArray(array.reshape(shape), header, possible_values)
Beispiel #57
0
 def install_flags(self):
     if self.indices_args is None:
         self.indices_args = tuple(
             chain.from_iterable(('--extra-index-url', x) for x in PIP_EXTRA_INDICES)
         )
     return self.indices_args
Beispiel #58
0
 def test_no_duplicates_uses(self):
     for ast, var_names_locs in self.task.graphs_and_instances:
         locations = list(
             chain.from_iterable([i[1] for i in var_names_locs]))
         self.assertEqual(len(locations), len(set(locations)))
Beispiel #59
0
def calc_grow_delta(white_lines, white_cols):
    centers = chain.from_iterable(zip(white_lines, white_cols)[1:-1])
    return int(min(b - a for a, b in centers) / 2)
def genPowerset(iterable):
    s = iterable
    return chain.from_iterable(combinations(s, r) for r in range(len(s) + 1))