Example #1
0
def test_predict_directory(datafiles, tmp_path) -> None:
    cli_inference.predict_directory(
        datafiles,
        tmp_path,
        pattern="fse",
        bs=1,
        image_formats=[".jpg"],
        model_id="flyswot/convnext-tiny-224_flyswot",
    )
    csv_file = list(tmp_path.rglob("*.csv"))
    assert csv_file
    with open(csv_file[0], newline="") as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            assert row["path"]
            assert row["directory"]
        columns = defaultdict(list)
    with open(csv_file[0], newline="") as csvfile:
        reader = csv.DictReader(csvfile)

        for row in reader:
            for (k, v) in row.items():
                columns[k].append(v)
        assert any("prediction" in k for k in columns)
        labels = [columns[k] for k in columns if "prediction" in k]
        confidences = [columns[k] for k in columns if "confidence" in k]
        # check all labels are strings
        assert all(
            map(lambda x: isinstance(x, str), (itertoolz.concat(labels))))
        # check all confidences can be cast to float
        assert all(
            map(
                lambda x: isinstance(x, float),
                map(lambda x: float(x), (itertoolz.concat(confidences))),
            ))
def get_groups(parsed, store, conf):
    """
    Return groups based on argument provided

    :param Namespace parsed: arguments parsed
    :param store: Otter scaling group collection
    :param dict conf: config

    :return: Deferred fired with list of {"tenantId": .., "groupId": ..} dict
    """
    log = mock_log()
    if parsed.group:
        groups = [g.split(":") for g in parsed.group]
        return succeed([{"tenantId": tid, "groupId": gid} for tid, gid in groups])
    elif parsed.all:
        d = store.get_all_groups()
        d.addCallback(lambda tgs: concat(tgs.values()))
    elif parsed.tenant_id:
        d = get_groups_of_tenants(log, store, parsed.tenant_id)
    elif parsed.disabled_tenants:
        non_conv_tenants = conf["non-convergence-tenants"]
        d = store.get_all_groups()
        d.addCallback(keyfilter(lambda k: k not in set(non_conv_tenants)))
        d.addCallback(lambda tgs: concat(tgs.values()))
    elif parsed.conf_conv_tenants:
        d = get_groups_of_tenants(log, store, conf["convergence-tenants"])
    else:
        raise SystemExit("Unexpected group selection")
    return d
Example #3
0
def get_clb_contents():
    """
    Get Rackspace Cloud Load Balancer contents as list of `CLBNode`. CLB
    health monitor information is also returned as a pmap of :obj:`CLB` objects
    mapped on LB ID.

    :return: Effect of (``list`` of :obj:`CLBNode`, `pmap` of :obj:`CLB`)
    :rtype: :obj:`Effect`
    """
    # If we get a CLBNotFoundError while fetching feeds, we should throw away
    # all nodes related to that load balancer, because we don't want to act on
    # data that we know is invalid/outdated (for example, if we can't fetch a
    # feed because CLB was deleted, we don't want to say that we have a node in
    # DRAINING with draining time of 0; we should just say that the node is
    # gone).

    def gone(r):
        return catch(CLBNotFoundError, lambda exc: r)

    lb_ids = [lb['id'] for lb in (yield _retry(get_clbs()))]
    node_reqs = [_retry(get_clb_nodes(lb_id).on(error=gone([])))
                 for lb_id in lb_ids]
    healthmon_reqs = [
        _retry(get_clb_health_monitor(lb_id).on(error=gone(None)))
        for lb_id in lb_ids]
    all_nodes_hms = yield parallel(node_reqs + healthmon_reqs)
    all_nodes, hms = all_nodes_hms[:len(lb_ids)], all_nodes_hms[len(lb_ids):]
    lb_nodes = {
        lb_id: [CLBNode.from_node_json(lb_id, node)
                for node in nodes]
        for lb_id, nodes in zip(lb_ids, all_nodes)}
    clbs = {
        str(lb_id): CLB(bool(health_mon))
        for lb_id, health_mon in zip(lb_ids, hms) if health_mon is not None}
    draining = [n for n in concat(lb_nodes.values())
                if n.description.condition == CLBNodeCondition.DRAINING]
    feeds = yield parallel(
        [_retry(get_clb_node_feed(n.description.lb_id, n.node_id).on(
            error=gone(None)))
         for n in draining]
    )
    nodes_to_feeds = dict(zip(draining, feeds))
    deleted_lbs = set([
        node.description.lb_id
        for (node, feed) in nodes_to_feeds.items() if feed is None])

    def update_drained_at(node):
        feed = nodes_to_feeds.get(node)
        if node.description.lb_id in deleted_lbs:
            return None
        if feed is not None:
            node.drained_at = extract_clb_drained_at(feed)
        return node

    nodes = map(update_drained_at, concat(lb_nodes.values()))
    yield do_return((
        list(filter(bool, nodes)),
        pmap(keyfilter(lambda k: k not in deleted_lbs, clbs))))
Example #4
0
    def __new__(mcs, name, bases, namespace, normalizers=None):
        all_bases = set(concat(base.__mro__ for base in bases))
        for key in namespace:
            verify_key_attr = verify_attr(name, key)
            verify_key_attr(concat(base.__dict__.keys() for base in all_bases))

        if normalizers:
            processed_namespace = web3.utils.formatters.apply_formatters_to_dict(
                normalizers,
                namespace)
        else:
            processed_namespace = namespace

        return super().__new__(mcs, name, bases, processed_namespace)
Example #5
0
def get_clb_contents():
    """Get Rackspace Cloud Load Balancer contents as list of `CLBNode`."""

    # If we get a CLBNotFoundError while fetching feeds, we should throw away
    # all nodes related to that load balancer, because we don't want to act on
    # data that we know is invalid/outdated (for example, if we can't fetch a
    # feed because CLB was deleted, we don't want to say that we have a node in
    # DRAINING with draining time of 0; we should just say that the node is
    # gone).

    def gone(r):
        return catch(CLBNotFoundError, lambda exc: r)

    lb_ids = [lb['id'] for lb in (yield _retry(get_clbs()))]
    node_reqs = [
        _retry(get_clb_nodes(lb_id).on(error=gone([]))) for lb_id in lb_ids
    ]
    all_nodes = yield parallel(node_reqs)
    lb_nodes = {
        lb_id: [CLBNode.from_node_json(lb_id, node) for node in nodes]
        for lb_id, nodes in zip(lb_ids, all_nodes)
    }
    draining = [
        n for n in concat(lb_nodes.values())
        if n.description.condition == CLBNodeCondition.DRAINING
    ]
    feeds = yield parallel([
        _retry(
            get_clb_node_feed(n.description.lb_id,
                              n.node_id).on(error=gone(None)))
        for n in draining
    ])
    nodes_to_feeds = dict(zip(draining, feeds))
    deleted_lbs = set([
        node.description.lb_id for (node, feed) in nodes_to_feeds.items()
        if feed is None
    ])

    def update_drained_at(node):
        feed = nodes_to_feeds.get(node)
        if node.description.lb_id in deleted_lbs:
            return None
        if feed is not None:
            return assoc_obj(node, drained_at=extract_CLB_drained_at(feed))
        else:
            return node

    nodes = map(update_drained_at, concat(lb_nodes.values()))
    yield do_return(list(filter(bool, nodes)))
Example #6
0
def parse_passport(entries):
    entries = concat(map(lambda s: s.split(), chain(entries)))
    passport = {
        k: v
        for (k, v) in map(lambda entry: entry.split(":"), entries)
    }
    return passport
Example #7
0
def cycles(seq, n=1):
    """ Cycles through the sequence n-times.

    Basically the same as ``itertools.cycle`` except that this sets
    an upper limit on how many cycles will be done.

    Note:

        If ``n`` is `None`, this is identical to ``itertools.cycle``.

    Args:

        seq(iterable):           The sequence to grab items from.
        n(integral):             Number of times to cycle through.

    Returns:

        generator:               The cycled sequence generator.

    Examples:

        >>> list(cycles([1, 2, 3], 2))
        [1, 2, 3, 1, 2, 3]
    """

    if n is None:
        return (itertools.cycle(seq))

    assert (n >= 0), "n must be positive, but got n = " + repr(n)
    assert ((n % 1) == 0), "n must be an integer, but got n = " + repr(n)

    return concat(itertools.tee(seq, n))
Example #8
0
    def disperse_helper(b, part_seq_1):
        if b != 0:
            half_diff = float(b) / 2.0

            mid_1 = int(math.floor(half_diff))
            mid_2 = int(math.ceil(half_diff))

            if 0 < mid_1 and b > mid_2:
                part_seq_1, part_seq_2 = itertools.tee(part_seq_1)

                front_mid_1_seq, mid_1_val, _ = split(mid_1, part_seq_1)
                _, mid_2_val, back_mid_2_seq = split(mid_2, part_seq_2)
                del _

                mid_2_val = itertools.tee(mid_2_val)
                back_mid_2_seq = concat([mid_2_val[0], back_mid_2_seq])
                mid_2_val = mid_2_val[1]

                yield (first(mid_2_val))

                for _1, _2 in zip(disperse_helper(mid_1 - 0, front_mid_1_seq),
                                  disperse_helper(b - mid_2, back_mid_2_seq)):
                    yield (_2)
                    yield (_1)

                if mid_1 != mid_2:
                    yield (first(mid_1_val))
Example #9
0
def _log_remove_from_clb(steps):
    lbs = groupby(lambda s: s.lb_id, steps)
    effs = [
        cf_msg('convergence-remove-clb-nodes',
               lb_id=lb, nodes=sorted(concat(s.node_ids for s in lbsteps)))
        for lb, lbsteps in sorted(lbs.iteritems())]
    return parallel(effs)
Example #10
0
def duplicate(seq, n=1):
    """ Gets each element multiple times.

    Like ``itertools.repeat`` this will repeat each element n-times.
    However, it will do this for each element of the sequence.

    Args:

         seq(iterable):           The sequence to grab items from.
         n(integral):             Number of repeats for each element.

    Returns:

         generator:               A generator of repeated elements.

    Examples:

         >>> list(duplicate([1, 2, 3], 2))
         [1, 1, 2, 2, 3, 3]
    """

    assert (n >= 0), "n must be positive, but got n = " + repr(n)
    assert ((n % 1) == 0), "n must be an integer, but got n = " + repr(n)

    return concat(map(lambda _: itertools.repeat(_, n), seq))
Example #11
0
def _log_bulk_rcv3(event, steps):
    by_lbs = groupby(lambda s: s[0], concat(s.lb_node_pairs for s in steps))
    effs = [
        cf_msg(event, lb_id=lb_id, servers=sorted(p[1] for p in pairs))
        for lb_id, pairs in sorted(by_lbs.iteritems())
    ]
    return parallel(effs)
Example #12
0
    def _inherit_parent_cmd(self, change):
        """ Inherit config-related stuff from up the cmd-chain. """
        if self.parent:
            ## Collect parents, ordered like that:
            #    subapp, self, parent1, ...
            #
            cmd_chain = self.my_cmd_chain()

            ## Collect separately and merge  SPECs separately,
            #  to prepend them before SPECs at the end.
            #
            conf_classes = list(
                itz.concat(cmd.conf_classes for cmd in cmd_chain))

            ## Merge aliases/flags reversed.
            #
            cmd_aliases = dtz.merge(cmd.cmd_aliases for cmd in cmd_chain[::-1])
            cmd_flags = dtz.merge(cmd.cmd_flags for cmd in cmd_chain[::-1])
        else:
            ## We are root.

            cmd_chain = [self]
            conf_classes = list(self.conf_classes)
            cmd_aliases = self.cmd_aliases
            cmd_flags = self.cmd_flags

        cmd_classes = [type(cmd) for cmd in cmd_chain]
        self.classes = list(iset(cmd_classes + conf_classes))
        self.aliases.update(cmd_aliases)
        self.flags.update(cmd_flags)
Example #13
0
def formFeatureMatrix(heroIDs, match):
    currentHeroAmount = len(heroIDs) + 1
    result = match['radiant_win']  # True if radiant won
    teams = groupby('team', match['players'])
    dire = teams['D']
    radiant = teams['R']

    # Dire is first, then Radiant

    matchVector = []
    for player in dire:
        matchVector.append(player['hero_id'])
    for player in radiant:
        matchVector.append(player['hero_id'])

    matchVector.append(result)

    finalVector = list(concat([(2 * currentHeroAmount) * [0], [0]]))
    for direPick in matchVector[:5]:
        normalizeDirePick = direPick - 1
        finalVector[normalizeDirePick] = 1
    for radiantPick in matchVector[5:10]:
        normalizeRadiantPick = currentHeroAmount + (radiantPick - 1)
        finalVector[normalizeRadiantPick] = 1

    if result > 0:
        finalVector[-1] = 0  # dire lost aka radiant won
    else:
        finalVector[-1] = 1  # dire|radiant|direwon

    return finalVector
Example #14
0
 def optimize_steps(clb_steps):
     steps_by_lb = groupby(lambda s: s.lb_id, clb_steps)
     return [
         step_class(**{
             'lb_id': lb_id,
             attr_name: pset(concat(getattr(s, attr_name) for s in steps))})
         for lb_id, steps in steps_by_lb.iteritems()
     ]
def sample(stream, key, limit):
    items = ijson.items(sys.stdin, 'item')

    classes = groupby(key, items)
    samples = valmap(
        lambda xs: random.sample(xs, limit
                                 if len(xs) >= limit else len(xs)), classes)
    sample = list(concat(samples.values()))
    return sample
Example #16
0
def _log_remove_from_clb(steps):
    lbs = groupby(lambda s: s.lb_id, steps)
    effs = [
        cf_msg('convergence-remove-clb-nodes',
               lb_id=lb,
               nodes=sorted(concat(s.node_ids for s in lbsteps)))
        for lb, lbsteps in sorted(lbs.iteritems())
    ]
    return parallel(effs)
Example #17
0
def _log_bulk_rcv3(event, steps):
    by_lbs = groupby(lambda s: s[0], concat(s.lb_node_pairs for s in steps))
    effs = [
        cf_msg(event,
               lb_id=lb_id,
               servers=sorted(p[1] for p in pairs))
        for lb_id, pairs in sorted(by_lbs.iteritems())
    ]
    return parallel(effs)
Example #18
0
 def output_csv(self):
     out = list(concat([i.prep_csv_out() for i in self.compositions]))
     df = pd.DataFrame.from_records(out, columns=self.cfg["col_names"])
     if self.filename:
         df.to_csv(str(self.filename) + ".csv", index=False)    
     else:
         # set filename in case self.save_txt == True, both files should have the same name
         self.filename = f"results_{arrow.now().format('YYYYMMDD_HH:mm:ss')}"
         df.to_csv(self.filename + ".csv", index=False)
     self.logger.debug(f"Finished saving results as .csv")
Example #19
0
def resort(files):
    """
    make Readme and PDF files appear first in file list
    """
    def is_text(f):
        kw = ['.pdf', '.txt', '.docx', 'README', 'readme', 'Readme', 'ReadMe']
        return any(map(lambda k: k in f['filename'], kw))

    text = filter(is_text, files)
    data = remove(lambda x: x in text, files)
    return tuple(concat([text, data]))
Example #20
0
 def check_invalid_nodes(exc_info):
     code = exc_info[1].code
     body = exc_info[1].body
     if code == 400:
         message = try_json_with_keys(body, ["validationErrors", "messages", 0])
         if message is not None:
             match = _CLB_NODE_REMOVED_PATTERN.match(message)
             if match:
                 removed = concat([group.split(",") for group in match.groups()])
                 return remove_clb_nodes(lb_id, set(node_ids) - set(removed))
     six.reraise(*exc_info)
Example #21
0
def get_clb_contents():
    """Get Rackspace Cloud Load Balancer contents as list of `CLBNode`."""
    # If we get a CLBNotFoundError while fetching feeds, we should throw away
    # all nodes related to that load balancer, because we don't want to act on
    # data that we know is invalid/outdated (for example, if we can't fetch a
    # feed because CLB was deleted, we don't want to say that we have a node in
    # DRAINING with draining time of 0; we should just say that the node is
    # gone).

    def gone(r):
        return catch(CLBNotFoundError, lambda exc: r)
    lb_ids = [lb['id'] for lb in (yield _retry(get_clbs()))]
    node_reqs = [_retry(get_clb_nodes(lb_id).on(error=gone([])))
                 for lb_id in lb_ids]
    all_nodes = yield parallel(node_reqs)
    lb_nodes = {lb_id: [CLBNode.from_node_json(lb_id, node) for node in nodes]
                for lb_id, nodes in zip(lb_ids, all_nodes)}
    draining = [n for n in concat(lb_nodes.values())
                if n.description.condition == CLBNodeCondition.DRAINING]
    feeds = yield parallel(
        [_retry(get_clb_node_feed(n.description.lb_id, n.node_id).on(
            error=gone(None)))
         for n in draining]
    )
    nodes_to_feeds = dict(zip(draining, feeds))
    deleted_lbs = set([
        node.description.lb_id
        for (node, feed) in nodes_to_feeds.items() if feed is None])

    def update_drained_at(node):
        feed = nodes_to_feeds.get(node)
        if node.description.lb_id in deleted_lbs:
            return None
        if feed is not None:
            return assoc_obj(node, drained_at=extract_CLB_drained_at(feed))
        else:
            return node
    nodes = map(update_drained_at, concat(lb_nodes.values()))
    yield do_return(list(filter(bool, nodes)))
Example #22
0
def limit_steps_by_count(steps, step_limits):
    """
    Limits step count by type.

    :param steps: An iterable of steps.
    :param step_limits: A dict mapping step classes to their maximum allowable
        count. Classes not present in this dict have no limit.
    :return: The input steps
    :rtype: pset
    """
    return pbag(concat(typed_steps[:step_limits.get(cls)]
                       for (cls, typed_steps)
                       in groupby(type, steps).iteritems()))
Example #23
0
 def check_invalid_nodes(exc_info):
     code = exc_info[1].code
     body = exc_info[1].body
     if code == 400:
         message = try_json_with_keys(body,
                                      ["validationErrors", "messages", 0])
         if message is not None:
             match = _CLB_NODE_REMOVED_PATTERN.match(message)
             if match:
                 removed = concat(
                     [group.split(',') for group in match.groups()])
                 return remove_clb_nodes(lb_id,
                                         set(node_ids) - set(removed))
     six.reraise(*exc_info)
Example #24
0
def train_idf(tokens_stream, **kwargs):
    """train a IDF model on a list of files"""

    # we don't care about frequency, just unique tokens
    idfs = [set(tokens) for tokens in tokens_stream]
    N = len(idfs)  # n docs
    idf = Counter(concat(idfs))

    for k, v in idf.items():
        idf[k] = math.log(N / v)
        # v ~= N/(math.e ** idf[k])

    # Keep track of N to update IDFs
    idf['_n_docs'] = N
    return idf
Example #25
0
File: idf.py Project: frnsys/drip
def train_idf(tokens_stream, **kwargs):
    """train a IDF model on a list of files"""

    # we don't care about frequency, just unique tokens
    idfs = [set(tokens) for tokens in tokens_stream]
    N = len(idfs) # n docs
    idf = Counter(concat(idfs))

    for k, v in idf.items():
        idf[k] = math.log(N/v)
        # v ~= N/(math.e ** idf[k])

    # Keep track of N to update IDFs
    idf['_n_docs'] = N
    return idf
Example #26
0
def apply_sort(data, sort_keys):
    # Data is a list to be sorted. Sort_keys is a list of tuples (key, reverse)
    # where key is a dict key in a list item, and reverse says whether to sort
    # in reverse order or not. (i.e. False for ascending, True for descending)
    if not sort_keys:
        return data
    else:
        # Parse the first sort_key
        if isinstance(sort_keys[0], string_types):
            key = sort_keys
            reverse = False
        else:
            key, reverse = sort_keys[0]

        remaining_sort_keys = sort_keys[1:]

        # Sort into groups by this key
        groups = groupby(itemgetter(key), data)

        try:
            key_sample = next((k for k in groups.keys() if k is not None))
        except StopIteration:
            key_sample = None

        if key_sample is None:
            key_fn = lambda _: True
        elif isinstance(key_sample, string_types):
            key_fn = lambda s: s.lower() if s is not None else ''
        elif isinstance(key_sample, bool):
            key_fn = bool
        elif isinstance(key_sample, numbers.Number):
            key_fn = lambda n: n if n is not None else 0
        else:
            # Unknown, so we'll just use ident
            key_fn = lambda x: x

        sorted_indices = sorted(list(groups.keys()),
                                key=key_fn,
                                reverse=reverse)

        # Sort each group by remaining keys, and concat them together in an
        # order sorted by this key.
        return list(
            concat(
                apply_sort(groups[index], remaining_sort_keys)
                for index in sorted_indices))
Example #27
0
def spinner(inner, outer, repeats):
    "Make a path that repeats a flat part and a curve. The shape is 11 points long and repeats, with an overlap on the first point"
    all_innerpoints = inner # + inner[0:1]
    all_outerpoints = outer # + outer[0:1]
    shape = []
    start = "M %f,%f" % outer[-1]
    shape.append(start)
    for offset in range(repeats):
        offset_value = offset * 11
        innerpoints = all_innerpoints[offset * 11:(offset+1) * 11]
        outerpoints = all_outerpoints[offset * 11:(offset+1) * 11]
        cap = "L %f,%f" % outerpoints[2]
        shape.append(cap)
        curve_points = concat([innerpoints[5], innerpoints[7], outerpoints[10]]) 
        curve = "C %f,%f %f,%f %f,%f" % tuple(curve_points)
        shape.append(curve)
    return " ".join(shape)
Example #28
0
def sample(dataset, key, limit):
    by_id = groupby(
        lambda x:
        (x['id'], x['make'], x['model'], x['seller'], x['color'], x['year']),
        dataset)
    classes = groupby(key, by_id.items())

    def sample_images(xs):
        ads = random.sample(xs, limit if len(xs) >= limit else len(xs))
        images = mapcat(second, ads)
        return list(take(limit, images))

    samples = map(sample_images, classes.values())

    sample = concat(samples)

    return sample
Example #29
0
def pad(seq, before=0, after=0, fill=None):
    """ Pads a sequence by a fill value before and/or after.

    Pads the sequence before and after using the fill value provided
    by ``fill`` up to the lengths specified by ``before`` and
    ``after``. If either ``before`` or ``after`` is ``None``, pad
    the fill value infinitely on the respective end.

    Note:
        If ``before``is ``None``, the sequence will only be the fill
        value.

    Args:

        seq(iterable):          Sequence to pad.
        before(integral):       Amount to pad before.
        after(integral):        Amount to pad after.
        fill(any):              Some value to pad with.

    Returns:

        iterable:               A sequence that has been padded.

    Examples:

        >>> list(pad(range(2, 4), before=1, after=2, fill=0))
        [0, 2, 3, 0, 0]

    """

    all_seqs = []

    if before is None:
        return itertools.repeat(fill)
    elif before > 0:
        all_seqs.append(itertools.repeat(fill, before))

    all_seqs.append(seq)

    if after is None:
        all_seqs.append(itertools.repeat(fill))
    elif after > 0:
        all_seqs.append(itertools.repeat(fill, after))

    return concat(all_seqs)
Example #30
0
def fetch_quote(codes, is_index=False, n=800):
    """股票代码或指数列表报价.

    Args:
        codes (list-like): 代码列表
        is_index (bool, optional): 是否为指数代码. Defaults to False.
        n (int, optional): 每批请求代码数量. Defaults to 800.

    Returns:
        list of dictionary: 报价列表字典
    """
    url_fmt = 'http://api.money.126.net/data/feed/{}'
    codes = ensure_list(codes)
    b_codes = partition_all(n, codes)
    urls = [url_fmt.format(','.join([_query_code(code, is_index)
                                     for code in batch])) for batch in b_codes]
    with ThreadPoolExecutor(MAX_WORKER) as excutor:
        docs = excutor.map(_fetch_quote, urls)
        return concat(docs)
Example #31
0
def get_all_convergence_data(
        tenant_id,
        group_id,
        now,
        get_scaling_group_servers=get_scaling_group_servers,
        get_clb_contents=get_clb_contents,
        get_rcv3_contents=get_rcv3_contents):
    """
    Gather all data relevant for convergence w.r.t given time,
    in parallel where possible.

    Returns an Effect of ([NovaServer], [LBNode]).
    """
    eff = parallel(
        [get_scaling_group_servers(tenant_id, group_id, now)
         .on(map(NovaServer.from_server_details_json)).on(list),
         get_clb_contents(),
         get_rcv3_contents()]
    ).on(lambda (servers, clb, rcv3): (servers, list(concat([clb, rcv3]))))
    return eff
Example #32
0
    def __dl_over_time(self):
        """
        get a sorted dictionary with month as key
        """
        # XXX can we do this more functional?
        def update_dic(obj):
            dates = annotations(obj)
            for key in dates.keys():
                count = len(dates[key])
                dl[key]['Sum'] += count
                dl[key][obj.portal_type] += count
                dl[key]['new'] += self.__was_new(dates[key], obj.created())

        annotations = lambda obj: IAnnotations(obj)['hbxt.clickdates']
        dates = map(annotations, self.get_clickdates_objects())
        keyse = concat(map(lambda date: map(lambda k: k, date.keys()), dates))
        dl = {k: {'Sum': 0, 'new': 0, 'JournalPaper': 0,
            'DiscussionPaper': 0} for k in keyse}
        map(update_dic, self.get_clickdates_objects())
        return keymap(add_leading_zero, dl)
Example #33
0
def optimize_steps(steps):
    """
    Optimize steps.

    Currently only optimizes per step type. See the :func:`_optimizer`
    decorator for more information on how to register an optimizer.

    :param pbag steps: Collection of steps.
    :return: a pbag of steps.
    """
    def grouping_fn(step):
        step_type = type(step)
        if step_type in _optimizers:
            return step_type
        else:
            return "unoptimizable"

    steps_by_type = groupby(grouping_fn, steps)
    unoptimizable = steps_by_type.pop("unoptimizable", [])
    omg_optimized = concat(_optimizers[step_type](steps)
                           for step_type, steps in steps_by_type.iteritems())
    return pbag(concatv(omg_optimized, unoptimizable))
Example #34
0
def get_all_launch_server_data(
        tenant_id,
        group_id,
        now,
        get_scaling_group_servers=get_scaling_group_servers,
        get_clb_contents=get_clb_contents,
        get_rcv3_contents=get_rcv3_contents):
    """
    Gather all launch_server data relevant for convergence w.r.t given time,
    in parallel where possible.

    Returns an Effect of {'servers': [NovaServer], 'lb_nodes': [LBNode]}.
    """
    eff = parallel([
        get_scaling_group_servers(tenant_id, group_id, now).on(
            map(NovaServer.from_server_details_json)).on(list),
        get_clb_contents(),
        get_rcv3_contents()
    ]).on(lambda (servers, clb, rcv3): {
        'servers': servers,
        'lb_nodes': list(concat([clb, rcv3]))
    })
    return eff
Example #35
0
def test_concat():
    assert list(concat([[], [], []])) == []
    assert (list(take(5, concat([['a', 'b'], range(1000000000)]))) ==
            ['a', 'b', 0, 1, 2])
Example #36
0
 def optimize_steps(rcv3_steps):
     return [
         step_class(
             lb_node_pairs=pset(
                 concat(s.lb_node_pairs for s in rcv3_steps)))
     ]
                compose(list, partial(take, args.predictions_limit))),
            ujson.load(args.dataset_file)))

    sections = groupby(lambda x: tuple(map(x.get, ['make', 'model'])),
                       dataset).items()

    evaluation_base_url = f'https://storage.cloud.google.com/dev_visual_search/evaluations/output/by-id/{args.evaluation_id}'

    def link_to_page(key):
        if key is None:
            return None
        make, model = key
        return f'{evaluation_base_url}/prediction-{make}-{model}.html'

    for prev, current, next in sliding_window(
            3, cons(None, concat([sections, [None]]))):
        key, section = current
        make, model = key

        prev_key, _ = prev if prev is not None else (None, None)
        next_key, _ = next if next is not None else (None, None)

        page = to_page(
            section, {
                'prev': link_to_page(prev_key),
                'parent': '',
                'next': link_to_page(next_key)
            }, {
                'title': f'Prediction report for {make} / {model}',
                'evaluation_id': args.evaluation_id,
                'image_base_path': args.image_base_path,
Example #38
0
 def __iter__(self):
     yield from concat(self.fn(partition) for partition in self.partitions)
Example #39
0
    def _process(self, device_id, device_path):
        """Pipeline to format/prepare the usb device."""
        device = device_path
        log = getLogger('%s.%s' % (__name__, device)).info
        partition = '%s1' % device
        tmp_mount = os.path.join(self._data['tmp_mount'],
                                 hex(abs(hash(device))))
        sudo = self._data['sudo']  # type: Command

        # possible cleanup later
        self._tmp_mounts.append(tmp_mount)

        # yapf: disable
        # ~~
        def do_umount(max_attempts=5):
            for attempt in range(max_attempts):
                log('looking for device mount %s (%d/%d)',
                    partition,
                    attempt + 1,
                    max_attempts)
                time.sleep(0.75)
                try:
                    grep(df('-h'), partition)
                except ErrorReturnCode:
                    pass
                else:
                    try:
                        sudo.umount(partition)
                    except ErrorReturnCode:
                        time.sleep(0.5)
                    else:
                        return

        do_umount()

        log('scrubbing partition table')
        sudo.dd('if=/dev/zero', 'of=' + device, 'bs=4k', 'count=1000')
        sync()

        log('creating partition table')
        flow = [
            'g',  # GPT partition table
            'n',  # new partition
            '1',  # number "1"
            '',  # <first sector default>
            '',  # <last sector default>
            '',  # <>
            'w'  # <write>
        ]
        sudo.fdisk(device, _in=pipe(flow))
        sync()

        do_umount()
        time.sleep(1.0)

        log('creating new filesystem')
        sudo.partprobe(device)

        sudo.mkfs(
            '--type=ext4',
            'discard',
            '-b',  # block size
            '4096',
            '-L',  # label
            self._data['label'],
            partition)

        self.scan_clone_dirs()

        log('copying contents from directories')
        mkdir(tmp_mount)
        sudo.mount(partition, tmp_mount)
        for path in self._data['clone']:
            sudo.rsync(
                '--verbose',
                '--archive',
                '--copy-links',
                '--keep-dirlinks',
                '--checksum',
                '--whole-file',
                '--no-perms',
                '--no-owner',
                '--no-group',
                '--omit-dir-times',
                *list(concat([
                    ('--exclude', '*%s' % ext) for ext in self._data['exclude']])),
                path,
                tmp_mount)
        sync()

        success = self.validate_hashes(tmp_mount)
        log('validation success? %s', success)
        if not success:
            self.on_unsuccessful_copy(device_id, device_path)

        # yapf: enable
        log('cleaning up')
        sudo.umount(partition)
        sudo.rm('-rf', tmp_mount)
        sudo.eject(device)
        log('done')

        if success:
            self.on_successful_copy(device_id, device_path)
Example #40
0
def _reduce(*mapped):
    """ Reduce worker """
    return list(concat(mapped))