def test_predict_directory(datafiles, tmp_path) -> None: cli_inference.predict_directory( datafiles, tmp_path, pattern="fse", bs=1, image_formats=[".jpg"], model_id="flyswot/convnext-tiny-224_flyswot", ) csv_file = list(tmp_path.rglob("*.csv")) assert csv_file with open(csv_file[0], newline="") as csvfile: reader = csv.DictReader(csvfile) for row in reader: assert row["path"] assert row["directory"] columns = defaultdict(list) with open(csv_file[0], newline="") as csvfile: reader = csv.DictReader(csvfile) for row in reader: for (k, v) in row.items(): columns[k].append(v) assert any("prediction" in k for k in columns) labels = [columns[k] for k in columns if "prediction" in k] confidences = [columns[k] for k in columns if "confidence" in k] # check all labels are strings assert all( map(lambda x: isinstance(x, str), (itertoolz.concat(labels)))) # check all confidences can be cast to float assert all( map( lambda x: isinstance(x, float), map(lambda x: float(x), (itertoolz.concat(confidences))), ))
def get_groups(parsed, store, conf): """ Return groups based on argument provided :param Namespace parsed: arguments parsed :param store: Otter scaling group collection :param dict conf: config :return: Deferred fired with list of {"tenantId": .., "groupId": ..} dict """ log = mock_log() if parsed.group: groups = [g.split(":") for g in parsed.group] return succeed([{"tenantId": tid, "groupId": gid} for tid, gid in groups]) elif parsed.all: d = store.get_all_groups() d.addCallback(lambda tgs: concat(tgs.values())) elif parsed.tenant_id: d = get_groups_of_tenants(log, store, parsed.tenant_id) elif parsed.disabled_tenants: non_conv_tenants = conf["non-convergence-tenants"] d = store.get_all_groups() d.addCallback(keyfilter(lambda k: k not in set(non_conv_tenants))) d.addCallback(lambda tgs: concat(tgs.values())) elif parsed.conf_conv_tenants: d = get_groups_of_tenants(log, store, conf["convergence-tenants"]) else: raise SystemExit("Unexpected group selection") return d
def get_clb_contents(): """ Get Rackspace Cloud Load Balancer contents as list of `CLBNode`. CLB health monitor information is also returned as a pmap of :obj:`CLB` objects mapped on LB ID. :return: Effect of (``list`` of :obj:`CLBNode`, `pmap` of :obj:`CLB`) :rtype: :obj:`Effect` """ # If we get a CLBNotFoundError while fetching feeds, we should throw away # all nodes related to that load balancer, because we don't want to act on # data that we know is invalid/outdated (for example, if we can't fetch a # feed because CLB was deleted, we don't want to say that we have a node in # DRAINING with draining time of 0; we should just say that the node is # gone). def gone(r): return catch(CLBNotFoundError, lambda exc: r) lb_ids = [lb['id'] for lb in (yield _retry(get_clbs()))] node_reqs = [_retry(get_clb_nodes(lb_id).on(error=gone([]))) for lb_id in lb_ids] healthmon_reqs = [ _retry(get_clb_health_monitor(lb_id).on(error=gone(None))) for lb_id in lb_ids] all_nodes_hms = yield parallel(node_reqs + healthmon_reqs) all_nodes, hms = all_nodes_hms[:len(lb_ids)], all_nodes_hms[len(lb_ids):] lb_nodes = { lb_id: [CLBNode.from_node_json(lb_id, node) for node in nodes] for lb_id, nodes in zip(lb_ids, all_nodes)} clbs = { str(lb_id): CLB(bool(health_mon)) for lb_id, health_mon in zip(lb_ids, hms) if health_mon is not None} draining = [n for n in concat(lb_nodes.values()) if n.description.condition == CLBNodeCondition.DRAINING] feeds = yield parallel( [_retry(get_clb_node_feed(n.description.lb_id, n.node_id).on( error=gone(None))) for n in draining] ) nodes_to_feeds = dict(zip(draining, feeds)) deleted_lbs = set([ node.description.lb_id for (node, feed) in nodes_to_feeds.items() if feed is None]) def update_drained_at(node): feed = nodes_to_feeds.get(node) if node.description.lb_id in deleted_lbs: return None if feed is not None: node.drained_at = extract_clb_drained_at(feed) return node nodes = map(update_drained_at, concat(lb_nodes.values())) yield do_return(( list(filter(bool, nodes)), pmap(keyfilter(lambda k: k not in deleted_lbs, clbs))))
def __new__(mcs, name, bases, namespace, normalizers=None): all_bases = set(concat(base.__mro__ for base in bases)) for key in namespace: verify_key_attr = verify_attr(name, key) verify_key_attr(concat(base.__dict__.keys() for base in all_bases)) if normalizers: processed_namespace = web3.utils.formatters.apply_formatters_to_dict( normalizers, namespace) else: processed_namespace = namespace return super().__new__(mcs, name, bases, processed_namespace)
def get_clb_contents(): """Get Rackspace Cloud Load Balancer contents as list of `CLBNode`.""" # If we get a CLBNotFoundError while fetching feeds, we should throw away # all nodes related to that load balancer, because we don't want to act on # data that we know is invalid/outdated (for example, if we can't fetch a # feed because CLB was deleted, we don't want to say that we have a node in # DRAINING with draining time of 0; we should just say that the node is # gone). def gone(r): return catch(CLBNotFoundError, lambda exc: r) lb_ids = [lb['id'] for lb in (yield _retry(get_clbs()))] node_reqs = [ _retry(get_clb_nodes(lb_id).on(error=gone([]))) for lb_id in lb_ids ] all_nodes = yield parallel(node_reqs) lb_nodes = { lb_id: [CLBNode.from_node_json(lb_id, node) for node in nodes] for lb_id, nodes in zip(lb_ids, all_nodes) } draining = [ n for n in concat(lb_nodes.values()) if n.description.condition == CLBNodeCondition.DRAINING ] feeds = yield parallel([ _retry( get_clb_node_feed(n.description.lb_id, n.node_id).on(error=gone(None))) for n in draining ]) nodes_to_feeds = dict(zip(draining, feeds)) deleted_lbs = set([ node.description.lb_id for (node, feed) in nodes_to_feeds.items() if feed is None ]) def update_drained_at(node): feed = nodes_to_feeds.get(node) if node.description.lb_id in deleted_lbs: return None if feed is not None: return assoc_obj(node, drained_at=extract_CLB_drained_at(feed)) else: return node nodes = map(update_drained_at, concat(lb_nodes.values())) yield do_return(list(filter(bool, nodes)))
def parse_passport(entries): entries = concat(map(lambda s: s.split(), chain(entries))) passport = { k: v for (k, v) in map(lambda entry: entry.split(":"), entries) } return passport
def cycles(seq, n=1): """ Cycles through the sequence n-times. Basically the same as ``itertools.cycle`` except that this sets an upper limit on how many cycles will be done. Note: If ``n`` is `None`, this is identical to ``itertools.cycle``. Args: seq(iterable): The sequence to grab items from. n(integral): Number of times to cycle through. Returns: generator: The cycled sequence generator. Examples: >>> list(cycles([1, 2, 3], 2)) [1, 2, 3, 1, 2, 3] """ if n is None: return (itertools.cycle(seq)) assert (n >= 0), "n must be positive, but got n = " + repr(n) assert ((n % 1) == 0), "n must be an integer, but got n = " + repr(n) return concat(itertools.tee(seq, n))
def disperse_helper(b, part_seq_1): if b != 0: half_diff = float(b) / 2.0 mid_1 = int(math.floor(half_diff)) mid_2 = int(math.ceil(half_diff)) if 0 < mid_1 and b > mid_2: part_seq_1, part_seq_2 = itertools.tee(part_seq_1) front_mid_1_seq, mid_1_val, _ = split(mid_1, part_seq_1) _, mid_2_val, back_mid_2_seq = split(mid_2, part_seq_2) del _ mid_2_val = itertools.tee(mid_2_val) back_mid_2_seq = concat([mid_2_val[0], back_mid_2_seq]) mid_2_val = mid_2_val[1] yield (first(mid_2_val)) for _1, _2 in zip(disperse_helper(mid_1 - 0, front_mid_1_seq), disperse_helper(b - mid_2, back_mid_2_seq)): yield (_2) yield (_1) if mid_1 != mid_2: yield (first(mid_1_val))
def _log_remove_from_clb(steps): lbs = groupby(lambda s: s.lb_id, steps) effs = [ cf_msg('convergence-remove-clb-nodes', lb_id=lb, nodes=sorted(concat(s.node_ids for s in lbsteps))) for lb, lbsteps in sorted(lbs.iteritems())] return parallel(effs)
def duplicate(seq, n=1): """ Gets each element multiple times. Like ``itertools.repeat`` this will repeat each element n-times. However, it will do this for each element of the sequence. Args: seq(iterable): The sequence to grab items from. n(integral): Number of repeats for each element. Returns: generator: A generator of repeated elements. Examples: >>> list(duplicate([1, 2, 3], 2)) [1, 1, 2, 2, 3, 3] """ assert (n >= 0), "n must be positive, but got n = " + repr(n) assert ((n % 1) == 0), "n must be an integer, but got n = " + repr(n) return concat(map(lambda _: itertools.repeat(_, n), seq))
def _log_bulk_rcv3(event, steps): by_lbs = groupby(lambda s: s[0], concat(s.lb_node_pairs for s in steps)) effs = [ cf_msg(event, lb_id=lb_id, servers=sorted(p[1] for p in pairs)) for lb_id, pairs in sorted(by_lbs.iteritems()) ] return parallel(effs)
def _inherit_parent_cmd(self, change): """ Inherit config-related stuff from up the cmd-chain. """ if self.parent: ## Collect parents, ordered like that: # subapp, self, parent1, ... # cmd_chain = self.my_cmd_chain() ## Collect separately and merge SPECs separately, # to prepend them before SPECs at the end. # conf_classes = list( itz.concat(cmd.conf_classes for cmd in cmd_chain)) ## Merge aliases/flags reversed. # cmd_aliases = dtz.merge(cmd.cmd_aliases for cmd in cmd_chain[::-1]) cmd_flags = dtz.merge(cmd.cmd_flags for cmd in cmd_chain[::-1]) else: ## We are root. cmd_chain = [self] conf_classes = list(self.conf_classes) cmd_aliases = self.cmd_aliases cmd_flags = self.cmd_flags cmd_classes = [type(cmd) for cmd in cmd_chain] self.classes = list(iset(cmd_classes + conf_classes)) self.aliases.update(cmd_aliases) self.flags.update(cmd_flags)
def formFeatureMatrix(heroIDs, match): currentHeroAmount = len(heroIDs) + 1 result = match['radiant_win'] # True if radiant won teams = groupby('team', match['players']) dire = teams['D'] radiant = teams['R'] # Dire is first, then Radiant matchVector = [] for player in dire: matchVector.append(player['hero_id']) for player in radiant: matchVector.append(player['hero_id']) matchVector.append(result) finalVector = list(concat([(2 * currentHeroAmount) * [0], [0]])) for direPick in matchVector[:5]: normalizeDirePick = direPick - 1 finalVector[normalizeDirePick] = 1 for radiantPick in matchVector[5:10]: normalizeRadiantPick = currentHeroAmount + (radiantPick - 1) finalVector[normalizeRadiantPick] = 1 if result > 0: finalVector[-1] = 0 # dire lost aka radiant won else: finalVector[-1] = 1 # dire|radiant|direwon return finalVector
def optimize_steps(clb_steps): steps_by_lb = groupby(lambda s: s.lb_id, clb_steps) return [ step_class(**{ 'lb_id': lb_id, attr_name: pset(concat(getattr(s, attr_name) for s in steps))}) for lb_id, steps in steps_by_lb.iteritems() ]
def sample(stream, key, limit): items = ijson.items(sys.stdin, 'item') classes = groupby(key, items) samples = valmap( lambda xs: random.sample(xs, limit if len(xs) >= limit else len(xs)), classes) sample = list(concat(samples.values())) return sample
def _log_remove_from_clb(steps): lbs = groupby(lambda s: s.lb_id, steps) effs = [ cf_msg('convergence-remove-clb-nodes', lb_id=lb, nodes=sorted(concat(s.node_ids for s in lbsteps))) for lb, lbsteps in sorted(lbs.iteritems()) ] return parallel(effs)
def output_csv(self): out = list(concat([i.prep_csv_out() for i in self.compositions])) df = pd.DataFrame.from_records(out, columns=self.cfg["col_names"]) if self.filename: df.to_csv(str(self.filename) + ".csv", index=False) else: # set filename in case self.save_txt == True, both files should have the same name self.filename = f"results_{arrow.now().format('YYYYMMDD_HH:mm:ss')}" df.to_csv(self.filename + ".csv", index=False) self.logger.debug(f"Finished saving results as .csv")
def resort(files): """ make Readme and PDF files appear first in file list """ def is_text(f): kw = ['.pdf', '.txt', '.docx', 'README', 'readme', 'Readme', 'ReadMe'] return any(map(lambda k: k in f['filename'], kw)) text = filter(is_text, files) data = remove(lambda x: x in text, files) return tuple(concat([text, data]))
def check_invalid_nodes(exc_info): code = exc_info[1].code body = exc_info[1].body if code == 400: message = try_json_with_keys(body, ["validationErrors", "messages", 0]) if message is not None: match = _CLB_NODE_REMOVED_PATTERN.match(message) if match: removed = concat([group.split(",") for group in match.groups()]) return remove_clb_nodes(lb_id, set(node_ids) - set(removed)) six.reraise(*exc_info)
def get_clb_contents(): """Get Rackspace Cloud Load Balancer contents as list of `CLBNode`.""" # If we get a CLBNotFoundError while fetching feeds, we should throw away # all nodes related to that load balancer, because we don't want to act on # data that we know is invalid/outdated (for example, if we can't fetch a # feed because CLB was deleted, we don't want to say that we have a node in # DRAINING with draining time of 0; we should just say that the node is # gone). def gone(r): return catch(CLBNotFoundError, lambda exc: r) lb_ids = [lb['id'] for lb in (yield _retry(get_clbs()))] node_reqs = [_retry(get_clb_nodes(lb_id).on(error=gone([]))) for lb_id in lb_ids] all_nodes = yield parallel(node_reqs) lb_nodes = {lb_id: [CLBNode.from_node_json(lb_id, node) for node in nodes] for lb_id, nodes in zip(lb_ids, all_nodes)} draining = [n for n in concat(lb_nodes.values()) if n.description.condition == CLBNodeCondition.DRAINING] feeds = yield parallel( [_retry(get_clb_node_feed(n.description.lb_id, n.node_id).on( error=gone(None))) for n in draining] ) nodes_to_feeds = dict(zip(draining, feeds)) deleted_lbs = set([ node.description.lb_id for (node, feed) in nodes_to_feeds.items() if feed is None]) def update_drained_at(node): feed = nodes_to_feeds.get(node) if node.description.lb_id in deleted_lbs: return None if feed is not None: return assoc_obj(node, drained_at=extract_CLB_drained_at(feed)) else: return node nodes = map(update_drained_at, concat(lb_nodes.values())) yield do_return(list(filter(bool, nodes)))
def limit_steps_by_count(steps, step_limits): """ Limits step count by type. :param steps: An iterable of steps. :param step_limits: A dict mapping step classes to their maximum allowable count. Classes not present in this dict have no limit. :return: The input steps :rtype: pset """ return pbag(concat(typed_steps[:step_limits.get(cls)] for (cls, typed_steps) in groupby(type, steps).iteritems()))
def check_invalid_nodes(exc_info): code = exc_info[1].code body = exc_info[1].body if code == 400: message = try_json_with_keys(body, ["validationErrors", "messages", 0]) if message is not None: match = _CLB_NODE_REMOVED_PATTERN.match(message) if match: removed = concat( [group.split(',') for group in match.groups()]) return remove_clb_nodes(lb_id, set(node_ids) - set(removed)) six.reraise(*exc_info)
def train_idf(tokens_stream, **kwargs): """train a IDF model on a list of files""" # we don't care about frequency, just unique tokens idfs = [set(tokens) for tokens in tokens_stream] N = len(idfs) # n docs idf = Counter(concat(idfs)) for k, v in idf.items(): idf[k] = math.log(N / v) # v ~= N/(math.e ** idf[k]) # Keep track of N to update IDFs idf['_n_docs'] = N return idf
def train_idf(tokens_stream, **kwargs): """train a IDF model on a list of files""" # we don't care about frequency, just unique tokens idfs = [set(tokens) for tokens in tokens_stream] N = len(idfs) # n docs idf = Counter(concat(idfs)) for k, v in idf.items(): idf[k] = math.log(N/v) # v ~= N/(math.e ** idf[k]) # Keep track of N to update IDFs idf['_n_docs'] = N return idf
def apply_sort(data, sort_keys): # Data is a list to be sorted. Sort_keys is a list of tuples (key, reverse) # where key is a dict key in a list item, and reverse says whether to sort # in reverse order or not. (i.e. False for ascending, True for descending) if not sort_keys: return data else: # Parse the first sort_key if isinstance(sort_keys[0], string_types): key = sort_keys reverse = False else: key, reverse = sort_keys[0] remaining_sort_keys = sort_keys[1:] # Sort into groups by this key groups = groupby(itemgetter(key), data) try: key_sample = next((k for k in groups.keys() if k is not None)) except StopIteration: key_sample = None if key_sample is None: key_fn = lambda _: True elif isinstance(key_sample, string_types): key_fn = lambda s: s.lower() if s is not None else '' elif isinstance(key_sample, bool): key_fn = bool elif isinstance(key_sample, numbers.Number): key_fn = lambda n: n if n is not None else 0 else: # Unknown, so we'll just use ident key_fn = lambda x: x sorted_indices = sorted(list(groups.keys()), key=key_fn, reverse=reverse) # Sort each group by remaining keys, and concat them together in an # order sorted by this key. return list( concat( apply_sort(groups[index], remaining_sort_keys) for index in sorted_indices))
def spinner(inner, outer, repeats): "Make a path that repeats a flat part and a curve. The shape is 11 points long and repeats, with an overlap on the first point" all_innerpoints = inner # + inner[0:1] all_outerpoints = outer # + outer[0:1] shape = [] start = "M %f,%f" % outer[-1] shape.append(start) for offset in range(repeats): offset_value = offset * 11 innerpoints = all_innerpoints[offset * 11:(offset+1) * 11] outerpoints = all_outerpoints[offset * 11:(offset+1) * 11] cap = "L %f,%f" % outerpoints[2] shape.append(cap) curve_points = concat([innerpoints[5], innerpoints[7], outerpoints[10]]) curve = "C %f,%f %f,%f %f,%f" % tuple(curve_points) shape.append(curve) return " ".join(shape)
def sample(dataset, key, limit): by_id = groupby( lambda x: (x['id'], x['make'], x['model'], x['seller'], x['color'], x['year']), dataset) classes = groupby(key, by_id.items()) def sample_images(xs): ads = random.sample(xs, limit if len(xs) >= limit else len(xs)) images = mapcat(second, ads) return list(take(limit, images)) samples = map(sample_images, classes.values()) sample = concat(samples) return sample
def pad(seq, before=0, after=0, fill=None): """ Pads a sequence by a fill value before and/or after. Pads the sequence before and after using the fill value provided by ``fill`` up to the lengths specified by ``before`` and ``after``. If either ``before`` or ``after`` is ``None``, pad the fill value infinitely on the respective end. Note: If ``before``is ``None``, the sequence will only be the fill value. Args: seq(iterable): Sequence to pad. before(integral): Amount to pad before. after(integral): Amount to pad after. fill(any): Some value to pad with. Returns: iterable: A sequence that has been padded. Examples: >>> list(pad(range(2, 4), before=1, after=2, fill=0)) [0, 2, 3, 0, 0] """ all_seqs = [] if before is None: return itertools.repeat(fill) elif before > 0: all_seqs.append(itertools.repeat(fill, before)) all_seqs.append(seq) if after is None: all_seqs.append(itertools.repeat(fill)) elif after > 0: all_seqs.append(itertools.repeat(fill, after)) return concat(all_seqs)
def fetch_quote(codes, is_index=False, n=800): """股票代码或指数列表报价. Args: codes (list-like): 代码列表 is_index (bool, optional): 是否为指数代码. Defaults to False. n (int, optional): 每批请求代码数量. Defaults to 800. Returns: list of dictionary: 报价列表字典 """ url_fmt = 'http://api.money.126.net/data/feed/{}' codes = ensure_list(codes) b_codes = partition_all(n, codes) urls = [url_fmt.format(','.join([_query_code(code, is_index) for code in batch])) for batch in b_codes] with ThreadPoolExecutor(MAX_WORKER) as excutor: docs = excutor.map(_fetch_quote, urls) return concat(docs)
def get_all_convergence_data( tenant_id, group_id, now, get_scaling_group_servers=get_scaling_group_servers, get_clb_contents=get_clb_contents, get_rcv3_contents=get_rcv3_contents): """ Gather all data relevant for convergence w.r.t given time, in parallel where possible. Returns an Effect of ([NovaServer], [LBNode]). """ eff = parallel( [get_scaling_group_servers(tenant_id, group_id, now) .on(map(NovaServer.from_server_details_json)).on(list), get_clb_contents(), get_rcv3_contents()] ).on(lambda (servers, clb, rcv3): (servers, list(concat([clb, rcv3])))) return eff
def __dl_over_time(self): """ get a sorted dictionary with month as key """ # XXX can we do this more functional? def update_dic(obj): dates = annotations(obj) for key in dates.keys(): count = len(dates[key]) dl[key]['Sum'] += count dl[key][obj.portal_type] += count dl[key]['new'] += self.__was_new(dates[key], obj.created()) annotations = lambda obj: IAnnotations(obj)['hbxt.clickdates'] dates = map(annotations, self.get_clickdates_objects()) keyse = concat(map(lambda date: map(lambda k: k, date.keys()), dates)) dl = {k: {'Sum': 0, 'new': 0, 'JournalPaper': 0, 'DiscussionPaper': 0} for k in keyse} map(update_dic, self.get_clickdates_objects()) return keymap(add_leading_zero, dl)
def optimize_steps(steps): """ Optimize steps. Currently only optimizes per step type. See the :func:`_optimizer` decorator for more information on how to register an optimizer. :param pbag steps: Collection of steps. :return: a pbag of steps. """ def grouping_fn(step): step_type = type(step) if step_type in _optimizers: return step_type else: return "unoptimizable" steps_by_type = groupby(grouping_fn, steps) unoptimizable = steps_by_type.pop("unoptimizable", []) omg_optimized = concat(_optimizers[step_type](steps) for step_type, steps in steps_by_type.iteritems()) return pbag(concatv(omg_optimized, unoptimizable))
def get_all_launch_server_data( tenant_id, group_id, now, get_scaling_group_servers=get_scaling_group_servers, get_clb_contents=get_clb_contents, get_rcv3_contents=get_rcv3_contents): """ Gather all launch_server data relevant for convergence w.r.t given time, in parallel where possible. Returns an Effect of {'servers': [NovaServer], 'lb_nodes': [LBNode]}. """ eff = parallel([ get_scaling_group_servers(tenant_id, group_id, now).on( map(NovaServer.from_server_details_json)).on(list), get_clb_contents(), get_rcv3_contents() ]).on(lambda (servers, clb, rcv3): { 'servers': servers, 'lb_nodes': list(concat([clb, rcv3])) }) return eff
def test_concat(): assert list(concat([[], [], []])) == [] assert (list(take(5, concat([['a', 'b'], range(1000000000)]))) == ['a', 'b', 0, 1, 2])
def optimize_steps(rcv3_steps): return [ step_class( lb_node_pairs=pset( concat(s.lb_node_pairs for s in rcv3_steps))) ]
compose(list, partial(take, args.predictions_limit))), ujson.load(args.dataset_file))) sections = groupby(lambda x: tuple(map(x.get, ['make', 'model'])), dataset).items() evaluation_base_url = f'https://storage.cloud.google.com/dev_visual_search/evaluations/output/by-id/{args.evaluation_id}' def link_to_page(key): if key is None: return None make, model = key return f'{evaluation_base_url}/prediction-{make}-{model}.html' for prev, current, next in sliding_window( 3, cons(None, concat([sections, [None]]))): key, section = current make, model = key prev_key, _ = prev if prev is not None else (None, None) next_key, _ = next if next is not None else (None, None) page = to_page( section, { 'prev': link_to_page(prev_key), 'parent': '', 'next': link_to_page(next_key) }, { 'title': f'Prediction report for {make} / {model}', 'evaluation_id': args.evaluation_id, 'image_base_path': args.image_base_path,
def __iter__(self): yield from concat(self.fn(partition) for partition in self.partitions)
def _process(self, device_id, device_path): """Pipeline to format/prepare the usb device.""" device = device_path log = getLogger('%s.%s' % (__name__, device)).info partition = '%s1' % device tmp_mount = os.path.join(self._data['tmp_mount'], hex(abs(hash(device)))) sudo = self._data['sudo'] # type: Command # possible cleanup later self._tmp_mounts.append(tmp_mount) # yapf: disable # ~~ def do_umount(max_attempts=5): for attempt in range(max_attempts): log('looking for device mount %s (%d/%d)', partition, attempt + 1, max_attempts) time.sleep(0.75) try: grep(df('-h'), partition) except ErrorReturnCode: pass else: try: sudo.umount(partition) except ErrorReturnCode: time.sleep(0.5) else: return do_umount() log('scrubbing partition table') sudo.dd('if=/dev/zero', 'of=' + device, 'bs=4k', 'count=1000') sync() log('creating partition table') flow = [ 'g', # GPT partition table 'n', # new partition '1', # number "1" '', # <first sector default> '', # <last sector default> '', # <> 'w' # <write> ] sudo.fdisk(device, _in=pipe(flow)) sync() do_umount() time.sleep(1.0) log('creating new filesystem') sudo.partprobe(device) sudo.mkfs( '--type=ext4', 'discard', '-b', # block size '4096', '-L', # label self._data['label'], partition) self.scan_clone_dirs() log('copying contents from directories') mkdir(tmp_mount) sudo.mount(partition, tmp_mount) for path in self._data['clone']: sudo.rsync( '--verbose', '--archive', '--copy-links', '--keep-dirlinks', '--checksum', '--whole-file', '--no-perms', '--no-owner', '--no-group', '--omit-dir-times', *list(concat([ ('--exclude', '*%s' % ext) for ext in self._data['exclude']])), path, tmp_mount) sync() success = self.validate_hashes(tmp_mount) log('validation success? %s', success) if not success: self.on_unsuccessful_copy(device_id, device_path) # yapf: enable log('cleaning up') sudo.umount(partition) sudo.rm('-rf', tmp_mount) sudo.eject(device) log('done') if success: self.on_successful_copy(device_id, device_path)
def _reduce(*mapped): """ Reduce worker """ return list(concat(mapped))