def shash(*args: Any, hash_algorithm: Optional[Any] = None, str_encoding: str = "utf-8") -> str: """Stable hash function. Args: hash_algorithm: object implementing the `hash algorithm` interface from :mod:`hashlib`. str_encoding: encoding use by :func:`str.encode()` to generate a :obj:`bytes` object for hashing. """ if hash_algorithm is None: hash_algorithm = xxhash.xxh64() for item in args: if not isinstance(item, bytes): if not isinstance(item, str): if isinstance(item, collections.abc.Iterable): item = flatten(item) elif isinstance(item, collections.abc.Mapping): item = flatten(item.items()) item = repr(item) item = item.encode(str_encoding) hash_algorithm.update(item) return typing.cast(str, hash_algorithm.hexdigest())
def compose_regex_from_rule(rules: Dict[int, Union[List[str], str]], skip_11: int) -> str: skip_until = 0 rule_regex = list() for rule in rules[0]: rule_regex.append(rule) while True: for index, rule in enumerate(rule_regex): if rule not in ("|", "(", ")", "+") and rule.isdigit(): rule_value = rules[int(rule)] if int(rule) == 11 and skip_until > skip_11: rule_regex.remove("11") continue elif int(rule) == 11: skip_until += 1 if isinstance(rule_value, str): rule_regex[index] = rule_value if isinstance(rule_value, list): if int(rule) == 8: rule_with_parenthesis = ["(", ")+"] else: rule_with_parenthesis = ["(", ")"] rule_with_parenthesis.insert(1, rules[int(rule)]) rule_regex[index] = rule_with_parenthesis rule_regex = flatten(rule_regex) if any([rule.isdigit() for rule in rule_regex]): continue else: break return r"^(" + "".join(rule_regex) + ")$"
def render_cli(cmd, cli_format='plain'): """ Return a formatted CLI in the requested format. * ``plain`` returns a simple string * ``fragments`` returns a list of strings * ``bitbar`` returns a CLI with parameters formatted into the bitbar dialect. """ assert isinstance(cmd, list) assert cli_format in CLI_FORMATS cmd = map(str, flatten(cmd)) # Serialize Path instances. if cli_format == 'fragments': return list(cmd) if cli_format == 'plain': return ' '.join(cmd) # Renders the CLI into BitBar dialect. bitbar_cli = '' for index, param in enumerate(cmd): if index == 0: bitbar_cli += "bash={}".format(param) else: if '=' in param: param = "\\\"{}\\\"".format(param) bitbar_cli += " param{}={}".format(index, param) return bitbar_cli
def get_models(): """Generate models.""" songs = get_songs() song_model = markovify.Text( flatten([[song] * int(num) for (num, _, song) in songs if num > 50]), state_size=2, ) # artist_model = markovify.Text( # flatten([[artist] * int(num) for (num, artist, _) in songs if num > 50]), # state_size=2, # ) return (song_model, None) # artist_model)
def from_list(uri_list: str) -> ParseMultipleAction: """ read many input files in one go. It take a list of coma separated uris. The uri scheme should be a known format similar to from* commands. fromjson /tmp/a.json translate to json:/tmp/a.json, similar to other formats. Globs can be used in the uris like json:/tmp/report_*.json,cppunit:/tmp/result_*.xml """ uris = [urlparse(uri) for uri in uri_list.split(",")] actions = flatten([get_actions_for_uri(uri) for uri in uris]) return ComposedParseAction(actions)
def from_list(uri_list: str) -> ComposedParseAction: """ Reads multiple input files in one go. It takes a list of comma separated URIs. The URI scheme should be a known format similar to from* commands, e.g. fromjson /tmp/a.json translates to json:/tmp/a.json. Globs can be used in the URIs like json:/tmp/report_*.json,cppunit:/tmp/result_*.xml :param uri_list: comma separated string of URIs """ uris = [urlparse(uri) for uri in uri_list.split(",")] actions = flatten([get_actions_for_uri(uri) for uri in uris]) return ComposedParseAction(actions)
def test_changelog(): with PROJECT_ROOT.joinpath('CHANGES.rst').resolve().open() as doc: content = doc.read() assert content.startswith("Changelog\n=========\n") entry_pattern = re.compile(r"^\* \[(?P<category>[a-z,]+)\] (?P<entry>.+)") for line in content.splitlines(): if line.startswith('*'): match = entry_pattern.match(line) assert match entry = match.groupdict() assert entry['category'] assert set(entry['category'].split(',')).issubset( flatten([MANAGER_IDS, OS_DEFINITIONS.keys(), 'mpm', 'bitbar']))
def run(self, *args, dry_run=False): """ Run a shell command, return the output and keep error message. Removes ANSI escape codes, and returns ready-to-use strings. """ # Serialize Path objects to strings. args = list(map(str, flatten(args))) args_str = click.style(' '.join(args), fg='white') logger.debug(f"► {args_str}") code = 0 output = None error = None if not dry_run: code, output, error = run(*args) else: logger.warning("Dry-run: skip execution of command.") # Normalize messages. if error: error = strip_ansi(error) error = error if error else None if output: output = strip_ansi(output) output = output if output else None # Non-successful run. if code and error: # Produce an exception and eventually raise it. exception = CLIError(code, output, error) if self.raise_on_cli_error: raise exception # Accumulate errors. self.cli_errors.append(exception) # Log <stdout> and <stderr> output. if output: logger.debug(indent(output, ' ')) if error: # Non-fatal error messages are logged as warnings. log_func = logger.error if code else logger.warning log_func(indent(error, ' ')) return output
def _run(*args, color=False): # We allow for nested iterables and None values as args for # convenience. We just need to flatten and filters them out. args = list(filter(None.__ne__, flatten(args))) if args: assert same(map(type, args), str) result = runner.invoke(mdedup, args, color=color) print_cli_output([CLI_NAME] + args, result.output) # Print some more debug info. print(result) if result.exception: print( ExceptionInfo.from_exc_info(*result.exc_info).get_formatted()) return result
def run(self, *args, dry_run=False): """ Run a shell command, return the output and keep error message. Removes ANSI escape codes, and returns ready-to-use strings. """ # Serialize Path objects to strings. args = list(map(str, flatten(args))) args_str = click.style(' '.join(args), fg='white') logger.debug(f"► {args_str}") code = 0 output = None error = None if not dry_run: code, output, error = run(*args) else: logger.warning("Dry-run: skip execution of command.") # Normalize messages. if error: error = strip_ansi(error) error = error if error else None if output: output = strip_ansi(output) output = output if output else None if code and error: exception = CLIError(code, output, error) if self.raise_on_cli_error: raise exception logger.error(error) self.cli_errors.append(exception) if output: logger.debug(indent(output, ' ')) return output
def _run(*args, **kwargs): color = kwargs.get('color', False) # We allow for nested iterables and None values as args for # convenience. We just need to flatten and filters them out. args = list(filter(None.__ne__, flatten(args))) if args: assert set(map(type, args)) == {str} result = runner.invoke(cli, args, color=color) # Strip colors out of results. result.stdout_bytes = strip_ansi(result.stdout_bytes) result.stderr_bytes = strip_ansi(result.stderr_bytes) print_cli_output(['mpm'] + args, result.output) # Print some more debug info. print(result) if result.exception: print(ExceptionInfo.from_exc_info( *result.exc_info).get_formatted()) return result
'dpkg-like': {'dpkg', 'apt', 'opkg'}, 'npm-like': {'npm', 'yarn'}, } # Define some colors. PLATFORM_COLOR = '#bfd4f2' MANAGER_COLOR = '#bfdadc' # Create one label per platform. for platform_id in ALL_OS_LABELS: label_id = 'platform: {}'.format(platform_id) LABELS.append((label_id, PLATFORM_COLOR, platform_id)) PLATFORM_LABELS[platform_id] = label_id # Create one label per manager. Add mpm as its own manager. non_grouped_managers = set(pool()) - set(flatten(MANAGER_GROUPS.values())) | { 'mpm' } for manager_id in non_grouped_managers: label_id = 'manager: {}'.format(manager_id) LABELS.append((label_id, MANAGER_COLOR, manager_id)) if manager_id != 'mpm': MANAGER_LABELS[manager_id] = label_id # Add labels for grouped managers. for group_label, manager_ids in MANAGER_GROUPS.items(): label_id = 'manager: {}'.format(group_label) LABELS.append((label_id, MANAGER_COLOR, ', '.join(sorted(manager_ids)))) for manager_id in manager_ids: MANAGER_LABELS[manager_id] = label_id
def test_help(self, invoke, subcmd): result = invoke(subcmd, '--help') assert result.exit_code == 0 assert "Usage: " in result.stdout assert flatten([subcmd])[0] in result.stdout assert not result.stderr
def test_labeller_rules(): # Extract list of canonically defined labels. with PROJECT_ROOT.joinpath('.github/labels.json').resolve().open() as doc: content = doc.read() defined_labels = [lbl['name'] for lbl in json.loads(content)] # Canonical labels are uniques. assert len(defined_labels) == len(set(defined_labels)) canonical_labels = set(defined_labels) # Extract and categorize labels. canonical_managers = { lbl for lbl in canonical_labels if lbl.startswith('manager: ') and 'mpm' not in lbl } canonical_platforms = { lbl for lbl in canonical_labels if lbl.startswith('platform: ') } assert canonical_managers assert canonical_platforms # Extract rules from json blurb serialized into YAML. with PROJECT_ROOT.joinpath( '.github/workflows/labels_issue.yaml').resolve().open() as doc: content = doc.read() assert "Naturalclar/issue-action" in content json_rules = load( content, Loader=Loader)['jobs']['labeller']['steps'][0]['with']['parameters'] rules = json.loads(json_rules) assert rules # Each keyword match one rule only. rules_keywords = Counter(flatten([r['keywords'] for r in rules])) assert rules_keywords assert max(rules_keywords.values()) == 1 # Extract and categorize labels. rules_labels = Counter(flatten([r['labels'] for r in rules])) assert rules_labels # Check that all labels are canonically defined. assert canonical_labels.issuperset(rules_labels) rules_managers = Counter({ label: count for label, count in rules_labels.items() if label.startswith('manager: ') }) rules_platforms = Counter({ label: count for label, count in rules_labels.items() if label.startswith('platform: ') }) assert rules_managers # Each canonical manager labels is defined. assert len(canonical_managers.symmetric_difference(rules_managers)) == 0 # Each manager has a rule and one only. assert max(rules_managers.values()) == 1 assert rules_platforms # Each canonical platform labels is defined. assert len(canonical_platforms.symmetric_difference(rules_platforms)) == 0 # Each registered OS has a rule. assert len(rules_platforms) == len(OS_DEFINITIONS) # Each platforms has at least a rule. assert min(rules_platforms.values()) >= 1 # Check that all labels are canonically defined. assert canonical_labels.issuperset(rules_labels) # Check each rule definition. for rule in rules: # No duplicate labels. assert len(set(rule['labels'])) == len(rule['labels']) # Special checks for rules targetting manager labels. manager_label = canonical_managers.intersection(rule['labels']) if manager_label: # Extract manager label assert len(manager_label) == 1 manager_label = manager_label.pop() # Only platforms are expected alongside manager labels. platforms = set(rule['labels']) - canonical_managers assert platforms.issubset(canonical_platforms) # Check managers sharing the same label shares the same platforms. supported_platforms = [ pool()[mid].platforms for mid, lbl in MANAGER_LABELS.items() # Relying on pool() restrict our checks, as the pool exclude # non-locally supported managers. if lbl == manager_label and mid in pool() ] assert len(set(supported_platforms)) == 1 # Check the right platforms is associated with the manager. supported_platform_labels = { PLATFORM_LABELS[os_label(p)] for p in supported_platforms[0] } assert platforms == supported_platform_labels
def annotated_image(file, boxes, service, size=12, color='r', shift='0,0', display=['text'], score_threshold=0): service_name = service.name().title() fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(20, 20)) axes.get_xaxis().set_visible(False) axes.get_yaxis().set_visible(False) axes.set_title(service_name, color=color, fontweight='bold', fontsize=20) if __debug__: log(f'reading image file for {service_name}: {relative(file)}') img = mpimg.imread(file) axes.imshow(img, cmap="gray") boxes = [item for item in boxes if item.score >= score_threshold] if __debug__: log(f'{len(boxes)} boxes pass threshold for {relative(file)}') if boxes and any(d.startswith('bb') for d in display): if 'bb' in display: # If user indicated 'bb', it means all. show_bb = ['word', 'line', 'para'] else: show_bb = set(flatten(d.split('-') for d in display)) - {'text', 'bb'} if __debug__: log(f'will show {", ".join(show_bb)} bb for {relative(file)}') box_list = [] for bb_type in show_bb: box_list += list(box for box in boxes if box.kind == bb_type) for box in box_list: vertices = [(box.bb[i], box.bb[i + 1]) for i in range(0, len(box.bb), 2)] poly = Polygon(vertices, facecolor='None', zorder=_Z_ORDER[box.kind], edgecolor=_EDGE_COLOR[box.kind]) axes.add_patch(poly) if boxes and any(d == 'text' for d in display): x_shift, y_shift = 0, 0 shift = shift.strip('()" \\\\').split(',') if len(shift) == 2: try: x_shift, y_shift = int(shift[0]), int(shift[1]) except ValueError: pass props = { 'facecolor': 'white', 'edgecolor': 'none', 'alpha': 0.8, 'pad': 1 } for box in filter(lambda item: item.kind == 'word', boxes): x = max(0, box.bb[0] + x_shift) y = max(0, box.bb[1] + y_shift) plt.text(x, y, box.text, color=color, fontsize=size, va="center", bbox=props, zorder=10) if __debug__: log(f'generating png for {service_name} for {relative(file)}') buf = io.BytesIO() fig.savefig(buf, format='png', dpi=300, bbox_inches='tight', pad_inches=0.02) buf.flush() buf.seek(0) plt.close(fig) return buf
# Generates our own box_type_id for use in CLI parameters. box_type_id = klass.__name__.lower() yield box_type_id, constructor # Mapping between supported box type IDs and their constructors. BOX_TYPES = FrozenDict(build_box_constructors()) # Categorize each box type into its structure type. BOX_STRUCTURES = FrozenDict({ "file": {"mbox", "mmdf", "babyl"}, "folder": {"maildir", "mh"}, }) # Check we did not forgot any box type. assert set(flatten(BOX_STRUCTURES.values())) == set(BOX_TYPES) # List of required sub-folders defining a properly structured maildir. MAILDIR_SUBDIRS = frozenset(("cur", "new", "tmp")) def autodetect_box_type(path): """Auto-detect the format of the mailbox located at the provided path. Returns a box type as indexed in the ``box_types`` dictionnary above. If the path is a file, then it is considered as an ``mbox``. Else, if th provided path is a folder and feature the expecteed sub-directories, it is parsed as a ``maildir``. Future finer autodetection heuristics should be implemented here. Some ideas: