Exemple #1
0
    def succeeded(self, event):
        command = self.started_cmds[event.request_id]
        if not command:
            return

        self.started_cmds.pop(event.request_id)

        duration = event.duration_micros
        if self.is_below_lwm(duration):
            return

        [cmd, q, meta] = take(3, command.items())
        self.render_cmd(cmd, duration, q)

        ents = pipe(
            traceback.extract_stack(),
            self.config.stack_preprocess,
            map(lambda rec: StackEntry(self.config.file_capture, *rec)),
            filter(lambda ent: ent.file_capture()),
            filter(lambda ent: len(
                list(
                    filter(lambda p: re.match(p, ent.file, re.M), self.config.
                           ignores))) == 0),
            groupby(lambda ent: ent.file),
        )
        self.render_stack(ents)
 def count_predictions(filtered_predictions_list, target_label):
     return pipe(
         filtered_predictions_list,
         filter(lambda (_, x): x == target_label),
         list,
         len
     )
Exemple #3
0
    def prune_hyperrect(self, rect: Hyperrectangle) -> Hyperrectangle:
        new_statements = set()

        training_examples_covered = pipe(self.examples,
                                         filter(lambda e: rect.covers(e)),
                                         list)

        for statement in rect.statements:
            covered_samples = pipe(training_examples_covered,
                                   filter(statement.covers_example), list)
            all_training_data_feature_values = pipe(
                covered_samples,
                map(lambda e: e.value_by_feature[statement.feature]),
                set,
            )
            if statement.feature.kind == FeatureType.CATEGORICAL:
                new_statements.add(
                    Statement(
                        feature=statement.feature,
                        categories=statement.categories.intersection(
                            all_training_data_feature_values),
                    ))

            if statement.feature.kind == FeatureType.REAL:
                lowest_boundary = min(all_training_data_feature_values)
                highest_boundary = max(all_training_data_feature_values)

                new_statements.add(
                    Statement(
                        feature=statement.feature,
                        lower_boundary=lowest_boundary,
                        upper_boundary=highest_boundary,
                    ))

        return Hyperrectangle(statements=new_statements, label=rect.label)
Exemple #4
0
def bound_if_needed(
    rule: Rule, statement: Statement, feature_min_values, feature_max_values
) -> Set[Statement]:
    statements = set(rule.get_statements_for_feature(statement.feature_idx)).difference(
        {statement}
    )
    new_statements = set()
    next_higher = pipe(
        statements, filter(lambda s: s.threshold > statement.threshold), list
    )

    next_lower = pipe(
        statements, filter(lambda s: s.threshold < statement.threshold), list
    )

    if statement.relation == Relation.LEQ and not any(next_lower):
        new_statements.add(
            Statement(
                statement.feature_idx,
                Relation.MT,
                feature_min_values[statement.feature_idx],
            )
        )
    elif statement.relation == Relation.MT and not any(next_higher):
        new_statements.add(
            Statement(
                statement.feature_idx,
                Relation.LEQ,
                feature_max_values[statement.feature_idx] - EPS,
            )
        )
    else:
        return set()

    return new_statements
Exemple #5
0
def main():
    """Main method."""
    # create a player named tuple
    Player = namedtuple('Player', ['first', 'last', 'number', 'team', 'city'])

    # create some player named tuples
    m_j = Player(first='Michael', last='Jordan', number='23', team='Bulls', city='Chicago')
    k_b = Player(first='Kobe', last='Bryant', number='24', team='Lakers', city='Los Angeles')
    l_b = Player(first='LeBron', last='James', number='23', team='Cavaliers', city='Cleveland')
    k_p = Player(first='Kristaps', last='Porzingis', number='6', team='Knicks', city='New York')
    k_d = Player(first='Kevin', last='Durant', number='35', team='Warriors', city='Oakland')

    # store the players in tuple
    players = (m_j, k_b, l_b, k_p, k_d)

    # filter
    two_three = filter(lambda x: x.number == '23', players)
    print(tuple(two_three)) # => (Player(first='Michael', last='Jordan', number='23', team='Bulls',
                            # city='Chicago'), Player(first='LeBron', last='James', number='23',
                            # team='Cavaliers', city='Cleveland'))

    # map
    result = map(lambda x: ''.join([x.first, ' ', x.last]), players)
    print(tuple(result)) # => ('Michael Jordan', 'Kobe Bryant', 'LeBron James', 'Kristaps Porzingis', 'Kevin Durant')

    # reduce
    result = reduce(lambda x, y: x + y, [1, 2, 3], 0)
    print(result) # => 6

    # compose
    nums = [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5]
    c_greater = curry(greater)
    greater_zero = c_greater(y_val=0)
    result = compose(filter(greater_zero), filter(iseven))(nums)
    print(tuple(result)) # => (2, 4)
Exemple #6
0
def fetch(content, prefix):
    return {
        "parts":
        pipe(
            parse("$..layers").find(content),
            mapcat(lambda m: m.value),
            filter(lambda v: v["exportOptions"]["exportFormats"]),
            filter(lambda v: re.match(prefix, v["name"])),
            map(lambda v: glom(
                v,
                {
                    "key":
                    "name",
                    "layout": (
                        "frame",
                        {
                            "left": ("x", round),
                            "top": ("y", round),
                            "width": ("width", round),
                            "height": ("height", round),
                        },
                    ),
                },
            )),
            sorted(key=lambda p: p["key"]),
            list,
        )
    }
Exemple #7
0
def genome(file_pattern):
    if os.path.basename(file_pattern).split('.')[-1] == "gz":
        gzopen = tz.curry(gzip.open)
        result = tz.pipe(file_pattern, glob, sorted, c.map(gzopen(mode='rt')),
                         tz.concat, c.filter(is_sequence), tz.concat,
                         c.filter(is_nucleotide))
    else:
        result = tz.pipe(file_pattern, glob, sorted, c.map(open), tz.concat,
                         c.filter(is_sequence), tz.concat,
                         c.filter(is_nucleotide))
    return result
def get_sequence(path_to_files):
    """Stream a genome, letter by letter, from a list of FASTA filenames."""
    return tz.pipe(
        path_to_files,
        cur.map(fasta_reader),
        tz.concat,
        cur.filter(is_sequence),
        # concatenate characters from all lines
        tz.concat,
        # discard newlines and 'N'
        cur.filter(is_nucleotide))
def genome_gz(file_pattern):
    """Stream a genome, letter by letter, from a list of FASTA filenames."""
    return tz.pipe(
        file_pattern,
        glob,
        sorted,
        # Filenames
        cur.map(gzopen(mode='rt')),  # lines 
        # concatenate lines from all files:
        tz.concat,  # drop header from each sequence 
        cur.filter(is_sequence),
        # concatenate characters from all lines
        tz.concat,
        # discard newlines and 'N'
        cur.filter(is_nucleotide))
Exemple #10
0
def get_tenant_metrics(tenant_id, scaling_groups, servers, _print=False):
    """
    Produce per-group metrics for all the groups of a tenant

    :param list scaling_groups: Tenant's scaling groups as dict from CASS
    :param dict servers: Servers from Nova grouped based on scaling group ID.
                         Expects only ACTIVE or BUILD servers
    :return: ``list`` of (tenantId, groupId, desired, actual) GroupMetrics
    """
    if _print:
        print('processing tenant {} with groups {} and servers {}'.format(
              tenant_id, len(scaling_groups), len(servers)))
    metrics = []
    for group in scaling_groups:
        group_id = group['groupId']
        create_metrics = partial(GroupMetrics, tenant_id,
                                 group_id, group['desired'])
        if group_id not in servers:
            metrics.append(create_metrics(0, 0))
        else:
            active = len(list(filter(lambda s: s['status'] == 'ACTIVE',
                                     servers[group_id])))
            metrics.append(
                create_metrics(active, len(servers[group_id]) - active))
    return metrics
Exemple #11
0
def fancify_summary(expr):
    """ Separate a complex summary into two pieces

    Helps pandas compute_by on summaries

    >>> t = symbol('t', 'var * {x: int, y: int}')
    >>> one, two, three = fancify_summary(summary(a=t.x.sum(), b=t.x.sum() + t.y.count() - 1))

    A simpler summary with only raw reductions
    >>> one
    summary(x_sum=sum(t.x), y_count=count(t.y))

    A mapping of those names to new leaves to use in another compuation
    >>> two  # doctest: +SKIP
    {'x_sum': x_sum, 'y_count': y_count}

    A mapping of computations to do for each column
    >>> three   # doctest: +SKIP
    {'a': x_sum, 'b': (x_sum + y_count) - 1}

    In this way, ``compute_by`` is able to do simple pandas reductions using
    groups.agg(...) and then do columnwise arithmetic afterwards.
    """
    seen_names.clear()
    name_dict.clear()
    exprs = pipe(expr.values, map(Expr._traverse), concat, filter(lambda x: isinstance(x, Reduction)), set)
    one = summary(**dict((_name(expr), expr) for expr in exprs))

    two = dict((_name(expr), symbol(_name(expr), datashape.var * expr.dshape)) for expr in exprs)

    d = dict((expr, two[_name(expr)]) for expr in exprs)
    three = dict((name, value._subs(d)) for name, value in zip(expr.names, expr.values))

    return one, two, three
Exemple #12
0
def get_char_to_lu_phones() -> Dict[str, List[str]]:
    char_to_phones = pipe(CharPhoneTable.select(),
                          map(lambda e: (e.char, e.lu)),
                          filter(lambda e: e[0] != '' and e[1] != ''),
                          groupby(lambda e: e[0]),
                          valmap(lambda phones: [e[1] for e in phones]), dict)
    return char_to_phones
Exemple #13
0
def assign_link(course: Course,
                course_root: str,
                path: str,
                link_text: str = None,
                *,
                force_upload: bool = False,
                dry_run: bool = False):
    path = resolve_path(course_root, path)

    meta = maybe_markdown_from_path(path).meta
    if not meta or 'name' not in meta:
        log.error(f'Cannot read assignment name from {path}')
        return link_text or path.name

    name = meta['name']
    html_url = pipe(
        canvas.assignment.assignments(course),
        filter(lambda a: a.data['name'] == name),
        maybe_first,
        lambda a: a.data['html_url'],
    )

    link_text = link_text or name
    if html_url:
        return (f'<a title="{name}" href="{html_url}">{link_text}</a>')
    log.error(f'Could not retrieve Canvas assignment with name: {name}')
    return link_text
Exemple #14
0
 def generalise(self, example):
     closest_hyperrectangle_with_distance = self.find_closest(example)
     (closest_hyperrectangle,
      distance) = closest_hyperrectangle_with_distance
     hyperrectangle_candidate = self.extend(closest_hyperrectangle, example)
     covers_conlicting = pipe(
         self.examples,
         filter(lambda e: hyperrectangle_candidate.covers(e)),
         filter(lambda e: e.label != hyperrectangle_candidate.label),
         any,
     )
     if covers_conlicting:
         self.add_as_new_hyperrectangle(example)
     else:
         self.hyperrectangles.remove(closest_hyperrectangle)
         self.hyperrectangles.add(hyperrectangle_candidate)
Exemple #15
0
def get_char_to_xhe_shapes() -> Dict[str, List[str]]:
    char_to_shape = pipe(CharHeShapeTable.select(),
                         map(lambda e: (e.char, e.shapes)),
                         filter(lambda e: e[0] != '' and e[1] != ''),
                         groupby(lambda e: e[0]),
                         valmap(lambda e: [s[1] for s in e]), dict)
    return char_to_shape
Exemple #16
0
def ccds_to_bed(ccds_stream):
  """Convert CCDS dump to Chanjo-style BED stream.

  Main entry point for default Chanjo converter (ccds). It converts
  a sorted (start, chrom) CCDS database to the Chanjo BED-format.

  Args:
    ccds_stream (file): file handle to read CCDS lines from

  Yields:
    Interval: interval with merged block and superblock ids
  """
  return pipe(
    ccds_stream,
    filter(grep('Public')),                    # filter out Public tx
    map(text_type.rstrip),                     # strip \n and spaces
    map(split(sep='\t')),                      # split into list
    map(extract_intervals),                    # convert to Interval
    concat,                                    # flatten
    map(rename_sex_interval),                  # rename sex contigs
    partial(lazy_groupby, key=attrgetter('contig')),  # group by contig
    pluck(1),                                  # extract second item
    map(groupby(attrgetter('name'))),          # non-lazy group by id
    map(valmap(merge_related_elements)),       # group intervals
    map(itervalues),                           # extract values
    map(partial(sorted, key=attrgetter('start'))),  # sort by start pos
    concat                                     # flatten
  )
Exemple #17
0
def get_scaling_group_servers(tenant_id,
                              group_id,
                              now,
                              all_as_servers=get_all_scaling_group_servers,
                              all_servers=get_all_server_details,
                              cache_class=CassScalingGroupServersCache):
    """
    Get a group's servers taken from cache if it exists. Updates cache
    if it is empty from newly fetched servers
    # NOTE: This function takes tenant_id even though the whole effect is
    # scoped on the tenant because cache calls require tenant_id. Should
    # they also not take tenant_id and work on the scope?

    :return: Servers as list of dicts
    :rtype: Effect
    """
    cache = cache_class(tenant_id, group_id)
    cached_servers, last_update = yield cache.get_servers(False)
    if last_update is None:
        servers = (yield all_as_servers()).get(group_id, [])
    else:
        current = yield all_servers()
        servers = mark_deleted_servers(cached_servers, current)
        servers = list(filter(server_of_group(group_id), servers))
    yield do_return(servers)
Exemple #18
0
def test_pipeline_example():
    from functools import reduce
    import operator as op

    data = range(100)
    result1 = math.sqrt(
        reduce(
            op.add,
            builtins.map(lambda x: x**2.0,
                         builtins.filter(
                             lambda x: x % 2 == 0,
                             data,
                         ))))

    from toolz.curried import filter, map, reduce
    from flowly.tz import chained

    transform = chained(
        filter(lambda x: x % 2 == 0),
        map(lambda x: x**2.0),
        reduce(op.add),
        math.sqrt,
    )

    result2 = transform(data)

    assert result1 == result2
Exemple #19
0
def filter_courses(course_iter: Iterable[Course],
                   **search_kw) -> Tuple[Course]:
    return _.pipe(
        course_iter,
        _.filter(lambda c: has_metadata(c, **search_kw)),
        tuple,
    )
Exemple #20
0
    def __call__(self, *node_ids):
        if len(node_ids) == 1 and __.is_seq(node_ids[0]):
            node_ids = tuple(node_ids[0])

        bad_ids = _.pipe(
            node_ids,
            _.filter(lambda i: i not in self),
            tuple,
        )
        if bad_ids:
            bad_str = _.pipe(bad_ids, _.map(str), ', '.join)
            log.error(f'Bad node ids: {bad_str}')

        # log.info(type(node_ids[0]))
        node_ids = set(node_ids) - set(bad_ids)
        # log.info(node_ids)
        if not node_ids:
            log.error('No ids left')
            return GraphNull

        return _.pipe(
            node_ids,
            _.map(lambda n: (n, self[n])),
            self.graph_pipe(self),
        )
Exemple #21
0
def course_from_path(api: rest.Api, path: (str, Path)) -> rest.Endpoint:
    '''Given a path, recursively search for a course.yml file, get the
    Canvas ID from it and find the course endpoint with that ID

    If more than either more than one or no course.yml files are
    found, will log error and return None.

    '''
    course_path = find_one_course_path(path)
    if course_path:
        course_data = yaml.read_yaml(course_path)
        if 'id' not in course_data:
            log.error(
                'Could not find course ID in course.yml file'
            )
            return None

        course_ep = _.pipe(
            get_courses(api()),
            _.filter(lambda c: c.data['id'] == course_data['id']),
            lcommon.maybe_first,
        )

        if not course_ep:
            log.error(
                'Could not find course in Canvas for course data:\n'
                f'{pprint.pformat(_.dissoc(course_data, "students"))}'
            )
            return None

        return course_ep
Exemple #22
0
def filter_data(field, yaml_data):
    """Extract a field of data from the YAML files.

    Args:
      field: the name of the field to extract
      yaml_data: the benchmark YAML data

    Returns:
      the filtered data from the YAML data
    """
    return pipe(
        yaml_data,
        dict,
        valmap(lambda val: val["data"]),
        valmap(filter(lambda item: item["name"].lower() == field)),
        valmap(list),
        valmap(get(0, default=None)),
        valfilter(lambda x: x is not None),
        itemmap(lambda item: (item[0], update_dict(item[1], name=item[0]))),
        lambda dict_: sorted(list(dict_.values()), key=lambda item: item["name"]),
        map(
            update_in(
                keys=["transform"],
                func=lambda x: x + [dict(expr="datum.x > 0.01", type="filter")],
            )
        ),
    )
Exemple #23
0
def rolling_fit_opt_weights(df, opt_weights_func, look_ahead_per):
    """applies opt_weights_func to rolling window on pandas df"""
    num_rows = df.shape[0]
    p = pipe(xrange(num_rows),
             filter(lambda x: x + look_ahead_per < num_rows),
             map(lambda x: {df.index[x]: opt_weights_func(df.iloc[x:x+look_ahead_per+1])}))
    return pd.DataFrame(merge(p)).T
Exemple #24
0
def cols_to_word_phone_table(cols: List[str], xhe_transformer, zrm_transformer) -> WordPhoneTable:
    if len(cols) == 1:
        word = cols[0]
        priority = 1
        full = get_full(word)
    elif len(cols) == 2:
        word = cols[0]
        priority = cols[1]
        full = get_full(word)
    elif len(cols) == 2 + len(cols[0]):
        word = cols[0]
        priority = cols[1]
        full = list(filter(lambda e: len(e) > 0, [e.strip() for e in cols[2:]]))
    else:
        raise RuntimeError("word item should be: 你好 [priority n i h ao]")

    return WordPhoneTable(
        word=word, 
        full=''.join(full),
        xhe=''.join([full_to_two(e, xhe_transformer) for e in full]),
        zrm=''.join([full_to_two(e, zrm_transformer) for e in full]),
        lu="",
        priority=priority, 
        updatedt=datetime.now()
    )
Exemple #25
0
def compute_down(expr, data, **kwargs):
    """ Compile a blaze expression to a sparksql expression"""
    leaves = expr._leaves()

    # make sure we only have a single leaf node
    if len(leaves) != 1:
        raise ValueError('Must compile from exactly one root database')

    leaf, = leaves

    # field expressions on the database are Field instances with a record
    # measure whose immediate child is the database leaf
    tables = pipe(expr._subterms(), filter(istable(leaf)), list)

    # raise if we don't have tables in our database
    if not tables:
        raise ValueError('Expressions not referencing a table cannot be '
                         'compiled')

    # make new symbols for each table
    new_leaves = [symbol(t._name, t.dshape) for t in tables]

    # sub them in the expression
    expr = expr._subs(dict(zip(tables, new_leaves)))

    # compute using sqlalchemy
    scope = dict(zip(new_leaves, map(make_sqlalchemy_table, tables)))
    query = compute(expr, scope)

    # interpolate params
    compiled = literalquery(query, dialect=HiveDialect())
    return data.sql(str(compiled))
Exemple #26
0
def course_files_matching_path(course: IdResourceEndpoint, path: str):
    path = Path(path).expanduser().resolve()
    return pipe(
        files(course),
        filter(lambda f: f.data['filename'] == path.name),
        tuple,
    )
Exemple #27
0
    def get_message(log):
        activity = log.get("activity")
        if activity == "Operation":
            on_load = log.get("on_load_no_of_packages")
            off_load = log.get("off_load_no_of_packages")
            msg = (
                " and ".join(
                    filter(
                        None,
                        [
                            on_load and "Loaded {} packages".format(on_load),
                            off_load and "Unloaded {} packages".format(off_load),
                        ],
                    )
                )
                or "Operation"
            )

            return "{} at {}".format(msg, log.get("station"),)

        if activity == "Stopped":
            return "Stopped at {}".format(log.get("station"))

        if activity == "Moving":
            return "Moving to {}".format(log.get("station"))

        return activity
Exemple #28
0
 def of_type(self, type):
     return _.pipe(
         self,
         _.filter(lambda n: self.nodes[n]['type'] == type),
         _.map(lambda n: (n, self[n])),
         self.graph_pipe(self),
     )
Exemple #29
0
def compute_down(expr, data, **kwargs):
    """ Compile a blaze expression to a sparksql expression"""
    leaves = expr._leaves()

    # make sure we only have a single leaf node
    if len(leaves) != 1:
        raise ValueError('Must compile from exactly one root database')

    leaf, = leaves

    # field expressions on the database are Field instances with a record
    # measure whose immediate child is the database leaf
    tables = pipe(expr._subterms(), filter(istable(leaf)), list)

    # raise if we don't have tables in our database
    if not tables:
        raise ValueError('Expressions not referencing a table cannot be '
                         'compiled')

    # make new symbols for each table
    new_leaves = [symbol(t._name, t.dshape) for t in tables]

    # sub them in the expression
    expr = expr._subs(dict(zip(tables, new_leaves)))

    # compute using sqlalchemy
    scope = dict(zip(new_leaves, map(make_sqlalchemy_table, tables)))
    query = compute(expr, scope, return_type='native')

    # interpolate params
    compiled = literalquery(query, dialect=HiveDialect())
    return data.sql(str(compiled))
Exemple #30
0
def test_pipeline_example():
    from functools import reduce
    import operator as op

    data = range(100)
    result1 = math.sqrt(
        reduce(
            op.add,
            builtins.map(
                lambda x: x ** 2.0,
                builtins.filter(
                    lambda x: x % 2 == 0,
                    data,
                )
            )
        )
    )

    from toolz.curried import filter, map, reduce
    from flowly.tz import chained

    transform = chained(
        filter(lambda x: x % 2 == 0),
        map(lambda x: x ** 2.0),
        reduce(op.add),
        math.sqrt,
    )

    result2 = transform(data)

    assert result1 == result2
Exemple #31
0
def gene_length_df(filename):
    """Grab Gene Symbol, Gene ID, and Gene Length from a GAF file.

    Parameters
    ----------
    filename : string
        Path to a Gene Annotation Format (GAF) file.

    Returns
    -------
    gene_lengths : pandas DataFrame
        A data frame with three columns: gene symbol, gene id, and gene
        length (in bases).
    """
    with open(filename) as fin:
        header = next(fin).rstrip().split('\t')
        geneid = header.index('FeatureID')
        genelen = header.index('FeatureCoordinates')
        feattype = header.index('FeatureType')
        output = tz.pipe(fin, spliteach,
                         tz.filter(lambda x: x[feattype] == 'gene'),
                         tz.pluck([geneid, genelen]), tz.map(range2len), list)
    df = pd.DataFrame(output, columns=['GeneSymbol', 'GeneID', 'GeneLength'])
    df = df.drop_duplicates('GeneSymbol').set_index('GeneSymbol')
    return df
Exemple #32
0
def process_directory(directory, output_filename='traces.csv'):
    """Extract traces and ROIs for all .da files in a directory.

    Parameters
    ----------
    directory : string
        The directory containing the .da files to be processed.
    output_filename : string
        The name of the file to write the results to.
    """
    filenames = tz.pipe(directory, os.listdir,
                        C.filter(X.call('endswith', '.da')), sorted)
    filenames = [os.path.join(directory, fn) for fn in filenames]
    images, frame_intervals, bncs, dark_frames = unzip(
        map(read_image, filenames))
    traces, rois = unzip(map(extract_trace, images))
    with open(output_filename, 'w') as fout:
        for filename, frame_interval, trace, roi in \
                            zip(filenames, frame_intervals, traces, rois):
            line = ','.join([os.path.basename(filename),
                             str(frame_interval)] + list(map(str, trace)))
            fout.write(line + '\n')
            io.imsave(filename[:-3] + '.roi.tif',
                      roi.astype(np.uint8) * 255,
                      plugin='tifffile',
                      compress=1)
Exemple #33
0
def link_next(response):
    return maybe_pipe(
        requests.utils.parse_header_links(response.headers.get('Link', '')),
        filter(lambda d: d.get('rel', '').lower() == 'next'),
        maybe_first,
        lambda d: d['url'],
    )
Exemple #34
0
 def composer(self, tokens):
     return compose(*pipe(
         tokens, reversed, filter(first), map(
             lambda arg: partial(arg[0], *arg[1], **arg[2]) 
                 if any(arg[1:]) else arg[0]
         ), list
     ))
def get_groups(parsed, store, conf):
    """
    Return groups based on argument provided

    :param Namespace parsed: arguments parsed
    :param store: Otter scaling group collection
    :param dict conf: config

    :return: Deferred fired with list of {"tenantId": .., "groupId": ..} dict
    """
    log = mock_log()
    if parsed.group:
        groups = [g.split(":") for g in parsed.group]
        return succeed(
            [{"tenantId": tid, "groupId": gid} for tid, gid in groups])
    elif parsed.all:
        d = store.get_all_valid_groups()
    elif parsed.tenant_id:
        d = get_groups_of_tenants(log, store, parsed.tenant_id)
    elif parsed.disabled_tenants:
        non_conv_tenants = conf["non-convergence-tenants"]
        d = store.get_all_valid_groups()
        d.addCallback(
            filter(lambda g: g["tenantId"] not in set(non_conv_tenants)))
        d.addCallback(list)
    elif parsed.conf_conv_tenants:
        d = get_groups_of_tenants(log, store, conf["convergence-tenants"])
    else:
        raise SystemExit("Unexpected group selection")
    return d
Exemple #36
0
def ngram_counts(tokens,
                 min_len=1,
                 max_len=None,
                 transform=" ".join,
                 in_vocabulary=lambda _: True):
    """
    Compute n-gram counts using toolz and Counter

    :param tokens: Iterable[str]
    :param min_len: int Minimum N-Gram size
    :param max_len: int Maximum N-Gram size
    :param transfrom: Callable[[Tuple[str, ...], str]] Function transforming ngram tuple into key
    :param in_vocabulary: Callable[[str], bool] Should token be preserved
    :return: Dict[str, int]
    """
    tokens = list(tokens)
    wc = len(tokens)
    max_len = (max_len if max_len else wc) + 1
    return (
        wc,
        pipe(
            everygrams(tokens, min_len=min_len, max_len=max_len),
            map(transform),
            filter(in_vocabulary),
            frequencies,
        ),
    )
Exemple #37
0
def page_link(course: Course,
              course_root: str,
              path: str,
              link_text: str = None,
              ref: str = None,
              *,
              force_upload: bool = False,
              dry_run: bool = False):
    ref = f'#{ref}' if ref else ''
    path = resolve_path(course_root, path)

    meta = maybe_markdown_from_path(path).meta
    if not meta or 'title' not in meta:
        log.error(f'Cannot read page title from {path}')
        return link_text or path.name

    title = meta['title']
    html_url = pipe(
        canvas.page.pages(course),
        filter(lambda p: p.data['title'] == title),
        maybe_first,
        lambda p: p.data['html_url'],
    )

    link_text = link_text or title
    if html_url:
        return (f'<a title="{title}" href="{html_url}{ref}">{link_text}</a>')
    log.error(f'Could not retrieve Canvas page with title: {title}')
    return link_text
Exemple #38
0
def get_groups_to_converge(config_func):
    """
    Get all tenant's all groups that needs convergence triggering
    """
    eff = Effect(GetAllValidGroups())
    eff = eff.on(
        filter(lambda g: tenant_is_enabled(g["tenantId"], config_func)))
    return eff.on(list)
def load_tgvH_file():
    json_file_name = 'tgvH.json'
    nodes = read_ast_json_file(json_file_name)
    variables_definitions = filter(
        lambda node: node['type'].startswith('variable_'),
        nodes,
        )
    return variables_definitions
Exemple #40
0
    def __dir__(self):
        result = dir(type(self))
        if isrecord(self.dshape.measure) or isinstance(self.dshape.measure, datashape.Map) and self.fields:
            result.extend(map(valid_identifier, self.fields))

        result.extend(toolz.merge(schema_methods(self.dshape.measure), dshape_methods(self.dshape)))

        return sorted(set(filter(isvalid_identifier, result)))
Exemple #41
0
def functional():
  return count_by(itemgetter('hour'),
                  map(json.loads,
                      filter(None,
                             mapcat(lambda output: output.strip().split('\n'),
                                    map(lambda date: logs[date.strftime('%Y/%m/%d')],
                                        map(lambda days_ago: today - timedelta(days=days_ago),
                                            range(1, days_of_logs + 1)))))))
Exemple #42
0
def get_clb_contents():
    """
    Get Rackspace Cloud Load Balancer contents as list of `CLBNode`. CLB
    health monitor information is also returned as a pmap of :obj:`CLB` objects
    mapped on LB ID.

    :return: Effect of (``list`` of :obj:`CLBNode`, `pmap` of :obj:`CLB`)
    :rtype: :obj:`Effect`
    """
    # If we get a CLBNotFoundError while fetching feeds, we should throw away
    # all nodes related to that load balancer, because we don't want to act on
    # data that we know is invalid/outdated (for example, if we can't fetch a
    # feed because CLB was deleted, we don't want to say that we have a node in
    # DRAINING with draining time of 0; we should just say that the node is
    # gone).

    def gone(r):
        return catch(CLBNotFoundError, lambda exc: r)

    lb_ids = [lb['id'] for lb in (yield _retry(get_clbs()))]
    node_reqs = [_retry(get_clb_nodes(lb_id).on(error=gone([])))
                 for lb_id in lb_ids]
    healthmon_reqs = [
        _retry(get_clb_health_monitor(lb_id).on(error=gone(None)))
        for lb_id in lb_ids]
    all_nodes_hms = yield parallel(node_reqs + healthmon_reqs)
    all_nodes, hms = all_nodes_hms[:len(lb_ids)], all_nodes_hms[len(lb_ids):]
    lb_nodes = {
        lb_id: [CLBNode.from_node_json(lb_id, node)
                for node in nodes]
        for lb_id, nodes in zip(lb_ids, all_nodes)}
    clbs = {
        str(lb_id): CLB(bool(health_mon))
        for lb_id, health_mon in zip(lb_ids, hms) if health_mon is not None}
    draining = [n for n in concat(lb_nodes.values())
                if n.description.condition == CLBNodeCondition.DRAINING]
    feeds = yield parallel(
        [_retry(get_clb_node_feed(n.description.lb_id, n.node_id).on(
            error=gone(None)))
         for n in draining]
    )
    nodes_to_feeds = dict(zip(draining, feeds))
    deleted_lbs = set([
        node.description.lb_id
        for (node, feed) in nodes_to_feeds.items() if feed is None])

    def update_drained_at(node):
        feed = nodes_to_feeds.get(node)
        if node.description.lb_id in deleted_lbs:
            return None
        if feed is not None:
            node.drained_at = extract_clb_drained_at(feed)
        return node

    nodes = map(update_drained_at, concat(lb_nodes.values()))
    yield do_return((
        list(filter(bool, nodes)),
        pmap(keyfilter(lambda k: k not in deleted_lbs, clbs))))
Exemple #43
0
def piped():
  return (_| range(1, days_of_logs + 1)
           | map(lambda days_ago: today - timedelta(days=days_ago))
           | map(lambda date: logs[date.strftime('%Y/%m/%d')])
           | mapcat(lambda output: output.strip().split('\n'))
           | filter(None)
           | map(json.loads)
           | count_by(itemgetter('hour'))
           |_)
def visit_ternary_operator(node):
    return pipe([
        visit_node(node['value_if_true']),
        visit_node(node['condition']),
        visit_node(node['value_if_false']) if 'value_if_false' in node else None,
        ],
        filter(None),
        concat,
        )
Exemple #45
0
def discover_jsonlines(j, n=10, encoding='utf-8', **kwargs):
    with json_lines(j.path, encoding=encoding) as lines:
        data = pipe(lines, filter(nonempty), map(json.loads), take(n), list)

    if len(data) < n:
        ds = discover(data)
    else:
        ds = var * discover(data).subshape[0]
    return date_to_datetime_dshape(ds)
Exemple #46
0
    def __dir__(self):
        result = dir(type(self))
        if isrecord(self.dshape.measure) and self.fields:
            result.extend(list(map(valid_identifier, self.fields)))

        d = toolz.merge(schema_methods(self.dshape.measure),
                        dshape_methods(self.dshape))
        result.extend(list(d))

        return sorted(set(filter(isvalid_identifier, result)))
 def get_label_predictions(predictions_list, all_labels, label):
     def count_predictions(filtered_predictions_list, target_label):
         return pipe(
             filtered_predictions_list,
             filter(lambda (_, x): x == target_label),
             list,
             len
         )
     filtered_predictions = pipe(
         predictions_list,
         filter(lambda (x, _): x == label)
     )
     count_predictions_partial = \
         partial(count_predictions, list(filtered_predictions))
     return pipe(
         all_labels,
         map(lambda target:
             {target: count_predictions_partial(target)}),
         map(pmap),
         merge,
         pmap
     )
def parse_violations(do_request):
    """"""
    logger.info('Parsing violations')

    return toolz.compose(
        # filter out meaningless values
        curried.filter(lambda x: x not in ('IME PREDPISA', '')),
        # extract data from each row
        curried.map(lambda tr: pq(tr).find('td').eq(1).text()),
        # get all rows in tables
        curried.mapcat(lambda page: page('table.MsoNormalTable tr')),
        # get all subpages
        curried.map(do_request),
        # let's skip empty urls/strings
        curried.filter(lambda a: a),
        # get menu links
        curried.map(lambda a: pq(a).attr('href')),
        # get menu elements
        lambda doc: doc('.moduletable_menu a'),
        # get main page
        do_request,
    )(VIOLATION_URL + '/index.php')
Exemple #49
0
def process(text):
    """ Replace failures in docstring with results """
    parts = pipe(text, parser.parse,
                       filter(None),
                       map(separate_fence),
                       concat, list)

    scope = dict()  # scope of variables in our executed environment
    state = dict()  # state of pymarkdown traversal

    out_parts = list()
    for part in parts:
        out, scope, state = step(part, scope, state)
        out_parts.extend(out)

    head = '\n'.join(sorted(state.get('headers', set())))
    body = pipe(out_parts, map(render_part),
                           filter(None),
                           '\n'.join)
    foot = '\n\n'.join(state.get('footers', []))

    return '\n\n'.join([head, body, foot]).strip()
Exemple #50
0
def get_scaling_group_servers(tenant_id, authenticator, service_name, region,
                              server_predicate=None, clock=None):
    """
    Return tenant's servers that belong to a scaling group as
    {group_id: [server1, server2]} ``dict``. No specific ordering is guaranteed

    :param server_predicate: `callable` taking single server as arg and returns True
                              if the server should be included, False otherwise
    """

    def has_group_id(s):
        return 'metadata' in s and 'rax:auto_scaling_group_id' in s['metadata']

    def group_id(s):
        return s['metadata']['rax:auto_scaling_group_id']

    server_predicate = server_predicate if server_predicate is not None else lambda s: s
    servers_apply = compose(groupby(group_id), filter(server_predicate), filter(has_group_id))

    d = get_all_server_details(tenant_id, authenticator, service_name, region, clock=clock)
    d.addCallback(servers_apply)
    return d
    def tokenize_and_filter_perc_func(allowed_parts_of_speech, given_text):
        """Return the tokens in the given text that are the allowed parts
        of speech

        This version uses the faster PerceptronTagger"""
        return tz.pipe(
            given_text,
            lambda x: TextBlob(x, pos_tagger=PerceptronTagger()),
            lambda x: x.tags,
            tz.filter(lambda x: x[1] in allowed_parts_of_speech), 
                # limit to allowed parts of speech
            tz.map(lambda x: x[0]), # return only the token
            list, 
        )
    def tokenize_and_filter_nltk_func(allowed_parts_of_speech, given_text):
        """Return the tokens in the given text that are the allowed parts
        of speech

        This version uses the recommended tagger from NLTK, it is relatively
        slow."""
        return tz.pipe(
            given_text,
            nltk.word_tokenize,
            lambda x: nltk.pos_tag(x),
            tz.filter(lambda x: x[1] in allowed_parts_of_speech), 
                # limit to allowed parts of speech
            tz.map(lambda x: x[0]), # return only the token
            list, 
            print_and_pass)
def next_page(page):
    try:
        return next(page.execute_page_transition_yield(
            lambda x: or_pipe(x,
                _.find_elements_by_class_name("CMpaginate"),
                _.find_elements_by_class_name("a-last"),
                default=[]
            ),
            _[0],
            _.find_elements_by_tag_name("a"),
            filter(lambda x: u"次" in x.text),
            list,
        )(_.click()))
    except Exception:
        return None
def get_posterior_probs_freq(num_words, all_streams_count_dict, this_stream_count_dict):
    """Return the posterior probabilities for the num_words most frequent tokens
    in this_stream_count_dict"""
    occurance_minimum = 5 # the number of times a token must occur to be included
    return tz.pipe(
        get_top_tokens(num_words, this_stream_count_dict),
        tz.filter(lambda x: all_streams_count_dict[x[0]] >= occurance_minimum),
        tz.map(lambda x: {
            'token': x[0], 
            'occurrences': x[1],
            'posterior': calculate_posterior(
                all_streams_count_dict,
                this_stream_count_dict,
                x[0])}),
        lambda x: sorted(x, key=lambda y: -y['posterior']))
Exemple #55
0
def get_all_scaling_group_servers(changes_since=None,
                                  server_predicate=identity):
    """
    Return tenant's servers that belong to any scaling group as
    {group_id: [server1, server2]} ``dict``. No specific ordering is guaranteed

    :param datetime changes_since: Get server since this time. Must be UTC
    :param server_predicate: function of server -> bool that determines whether
        the server should be included in the result.
    :return: dict mapping group IDs to lists of Nova servers.
    """

    def has_group_id(s):
        return 'metadata' in s and isinstance(s['metadata'], dict)

    def group_id(s):
        return group_id_from_metadata(s['metadata'])

    servers_apply = compose(keyfilter(lambda k: k is not None),
                            groupby(group_id),
                            filter(server_predicate),
                            filter(has_group_id))

    return get_all_server_details(changes_since).on(servers_apply)
Exemple #56
0
def get_all_metrics(dispatcher, tenanted_groups, log, _print=False,
                    get_all_metrics_effects=get_all_metrics_effects):
    """
    Gather server data and produce metrics for all groups across all tenants
    in a region.

    :param dispatcher: An Effect dispatcher.
    :param dict tenanted_groups: Scaling Groups grouped on tenantid
    :param bool _print: Should the function print while processing?

    :return: ``list`` of `GroupMetrics` as `Deferred`
    """
    effs = get_all_metrics_effects(tenanted_groups, log, _print=_print)
    d = _perform_limited_effects(dispatcher, effs, 10)
    d.addCallback(filter(lambda x: x is not None))
    return d.addCallback(lambda x: reduce(operator.add, x, []))
Exemple #57
0
def get_by_uuid(uuid, path='.'):
    """Get a Treant by short ID

    Args:
      uuid: a portion of the uuid
      path: the search path for Treants

    Returns:
      a Treant

    """
    return pipe(
        path,
        dtr.discover,
        list,
        filter(lambda x: uuid in x.uuid),
        list,
        get(0, default=None)
    )
Exemple #58
0
def annotate_bed_stream(bed_stream, bam_path, cutoff=10, extension=0,
                        contig_prefix='', bp_threshold=17000):
  """Annotate all intervals from a BED-file stream.

  Yields tuple data for each interval with calculated coverage and
  completeness.

  Args:
    bed_stream (sequence): usually a BED-file handle to read from
    bam_path (str): path to BAM-file
    cutoff (int, optional): threshold for completeness calculation,
      defaults to 10
    extension (int, optional): number of bases to extend each interval
      with (+/-), defaults to 0
    contig_prefix (str, optional): rename contigs by prefixing,
      defaults to empty string
    bp_threshold (int, optional): optimization threshold for reading
      BAM-file in chunks, default to 17000

  Yields:
    tuple: :class:`chanjo.BaseInterval`, coverage (float), and
      completeness (float)
  """
  # setup: connect to BAM-file
  bam = BamFile(bam_path)

  # the pipeline
  return pipe(
    bed_stream,
    filter(complement(comment_sniffer)),         # filter out comments
    map(text_type.rstrip),                       # strip invisble chars.
    map(prefix(contig_prefix)),                  # prefix to contig
    map(split(sep='\t')),                        # split lines
    map(do(validate_bed_format)),                # check correct format
    map(lambda row: bed_to_interval(*row)),      # convert to objects
    map(extend_interval(extension=extension)),   # extend intervals
    group_intervals(bp_threshold=bp_threshold),  # group by threshold
    map(process_interval_group(bam)),            # read coverage
    concat,                                      # flatten list of lists
    map(calculate_metrics(threshold=cutoff))     # calculate cov./compl.
  )
Exemple #59
0
def get_clb_contents():
    """Get Rackspace Cloud Load Balancer contents as list of `CLBNode`."""
    # If we get a CLBNotFoundError while fetching feeds, we should throw away
    # all nodes related to that load balancer, because we don't want to act on
    # data that we know is invalid/outdated (for example, if we can't fetch a
    # feed because CLB was deleted, we don't want to say that we have a node in
    # DRAINING with draining time of 0; we should just say that the node is
    # gone).

    def gone(r):
        return catch(CLBNotFoundError, lambda exc: r)
    lb_ids = [lb['id'] for lb in (yield _retry(get_clbs()))]
    node_reqs = [_retry(get_clb_nodes(lb_id).on(error=gone([])))
                 for lb_id in lb_ids]
    all_nodes = yield parallel(node_reqs)
    lb_nodes = {lb_id: [CLBNode.from_node_json(lb_id, node) for node in nodes]
                for lb_id, nodes in zip(lb_ids, all_nodes)}
    draining = [n for n in concat(lb_nodes.values())
                if n.description.condition == CLBNodeCondition.DRAINING]
    feeds = yield parallel(
        [_retry(get_clb_node_feed(n.description.lb_id, n.node_id).on(
            error=gone(None)))
         for n in draining]
    )
    nodes_to_feeds = dict(zip(draining, feeds))
    deleted_lbs = set([
        node.description.lb_id
        for (node, feed) in nodes_to_feeds.items() if feed is None])

    def update_drained_at(node):
        feed = nodes_to_feeds.get(node)
        if node.description.lb_id in deleted_lbs:
            return None
        if feed is not None:
            return assoc_obj(node, drained_at=extract_CLB_drained_at(feed))
        else:
            return node
    nodes = map(update_drained_at, concat(lb_nodes.values()))
    yield do_return(list(filter(bool, nodes)))