Ejemplo n.º 1
0
def shash(*args: Any,
          hash_algorithm: Optional[Any] = None,
          str_encoding: str = "utf-8") -> str:
    """Stable hash function.

    Args:
        hash_algorithm: object implementing the `hash algorithm` interface
            from :mod:`hashlib`.
        str_encoding: encoding use by :func:`str.encode()` to generate a
            :obj:`bytes` object for hashing.

    """
    if hash_algorithm is None:
        hash_algorithm = xxhash.xxh64()

    for item in args:
        if not isinstance(item, bytes):
            if not isinstance(item, str):
                if isinstance(item, collections.abc.Iterable):
                    item = flatten(item)
                elif isinstance(item, collections.abc.Mapping):
                    item = flatten(item.items())
                item = repr(item)

            item = item.encode(str_encoding)

        hash_algorithm.update(item)

    return typing.cast(str, hash_algorithm.hexdigest())
Ejemplo n.º 2
0
def compose_regex_from_rule(rules: Dict[int, Union[List[str], str]],
                            skip_11: int) -> str:
    skip_until = 0
    rule_regex = list()
    for rule in rules[0]:
        rule_regex.append(rule)
    while True:
        for index, rule in enumerate(rule_regex):
            if rule not in ("|", "(", ")", "+") and rule.isdigit():
                rule_value = rules[int(rule)]
                if int(rule) == 11 and skip_until > skip_11:
                    rule_regex.remove("11")
                    continue
                elif int(rule) == 11:
                    skip_until += 1
                if isinstance(rule_value, str):
                    rule_regex[index] = rule_value
                if isinstance(rule_value, list):
                    if int(rule) == 8:
                        rule_with_parenthesis = ["(", ")+"]
                    else:
                        rule_with_parenthesis = ["(", ")"]
                    rule_with_parenthesis.insert(1, rules[int(rule)])
                    rule_regex[index] = rule_with_parenthesis
        rule_regex = flatten(rule_regex)
        if any([rule.isdigit() for rule in rule_regex]):
            continue
        else:
            break
    return r"^(" + "".join(rule_regex) + ")$"
Ejemplo n.º 3
0
    def render_cli(cmd, cli_format='plain'):
        """ Return a formatted CLI in the requested format.

        * ``plain`` returns a simple string
        * ``fragments`` returns a list of strings
        * ``bitbar`` returns a CLI with parameters formatted into the bitbar
        dialect.
        """
        assert isinstance(cmd, list)
        assert cli_format in CLI_FORMATS
        cmd = map(str, flatten(cmd))  # Serialize Path instances.

        if cli_format == 'fragments':
            return list(cmd)

        if cli_format == 'plain':
            return ' '.join(cmd)

        # Renders the CLI into BitBar dialect.
        bitbar_cli = ''
        for index, param in enumerate(cmd):
            if index == 0:
                bitbar_cli += "bash={}".format(param)
            else:
                if '=' in param:
                    param = "\\\"{}\\\"".format(param)
                bitbar_cli += " param{}={}".format(index, param)
        return bitbar_cli
Ejemplo n.º 4
0
def get_models():
    """Generate models."""
    songs = get_songs()
    song_model = markovify.Text(
        flatten([[song] * int(num) for (num, _, song) in songs if num > 50]),
        state_size=2,
    )
    # artist_model = markovify.Text(
    #     flatten([[artist] * int(num) for (num, artist, _) in songs if num > 50]),
    #     state_size=2,
    # )
    return (song_model, None)  # artist_model)
Ejemplo n.º 5
0
def from_list(uri_list: str) -> ParseMultipleAction:
    """
    read many input files in one go.

    It take a list of coma separated uris. The uri scheme should be a known format similar to from* commands.
    fromjson /tmp/a.json translate to json:/tmp/a.json, similar to other formats. Globs can be used in the uris like
    json:/tmp/report_*.json,cppunit:/tmp/result_*.xml

    """
    uris = [urlparse(uri) for uri in uri_list.split(",")]
    actions = flatten([get_actions_for_uri(uri) for uri in uris])

    return ComposedParseAction(actions)
Ejemplo n.º 6
0
def from_list(uri_list: str) -> ComposedParseAction:
    """
    Reads multiple input files in one go.

    It takes a list of comma separated URIs. The URI scheme should be a known
    format similar to from* commands, e.g. fromjson /tmp/a.json translates to
    json:/tmp/a.json. Globs can be used in the URIs like
    json:/tmp/report_*.json,cppunit:/tmp/result_*.xml

    :param uri_list: comma separated string of URIs
    """
    uris = [urlparse(uri) for uri in uri_list.split(",")]
    actions = flatten([get_actions_for_uri(uri) for uri in uris])

    return ComposedParseAction(actions)
Ejemplo n.º 7
0
def test_changelog():
    with PROJECT_ROOT.joinpath('CHANGES.rst').resolve().open() as doc:
        content = doc.read()

    assert content.startswith("Changelog\n=========\n")

    entry_pattern = re.compile(r"^\* \[(?P<category>[a-z,]+)\] (?P<entry>.+)")
    for line in content.splitlines():
        if line.startswith('*'):
            match = entry_pattern.match(line)
            assert match
            entry = match.groupdict()
            assert entry['category']
            assert set(entry['category'].split(',')).issubset(
                flatten([MANAGER_IDS,
                         OS_DEFINITIONS.keys(), 'mpm', 'bitbar']))
Ejemplo n.º 8
0
    def run(self, *args, dry_run=False):
        """ Run a shell command, return the output and keep error message.

        Removes ANSI escape codes, and returns ready-to-use strings.
        """
        # Serialize Path objects to strings.
        args = list(map(str, flatten(args)))
        args_str = click.style(' '.join(args), fg='white')
        logger.debug(f"► {args_str}")

        code = 0
        output = None
        error = None

        if not dry_run:
            code, output, error = run(*args)
        else:
            logger.warning("Dry-run: skip execution of command.")

        # Normalize messages.
        if error:
            error = strip_ansi(error)
            error = error if error else None
        if output:
            output = strip_ansi(output)
            output = output if output else None

        # Non-successful run.
        if code and error:
            # Produce an exception and eventually raise it.
            exception = CLIError(code, output, error)
            if self.raise_on_cli_error:
                raise exception
            # Accumulate errors.
            self.cli_errors.append(exception)

        # Log <stdout> and <stderr> output.
        if output:
            logger.debug(indent(output, '  '))
        if error:
            # Non-fatal error messages are logged as warnings.
            log_func = logger.error if code else logger.warning
            log_func(indent(error, '  '))

        return output
Ejemplo n.º 9
0
    def _run(*args, color=False):
        # We allow for nested iterables and None values as args for
        # convenience. We just need to flatten and filters them out.
        args = list(filter(None.__ne__, flatten(args)))
        if args:
            assert same(map(type, args), str)

        result = runner.invoke(mdedup, args, color=color)

        print_cli_output([CLI_NAME] + args, result.output)

        # Print some more debug info.
        print(result)
        if result.exception:
            print(
                ExceptionInfo.from_exc_info(*result.exc_info).get_formatted())

        return result
Ejemplo n.º 10
0
    def run(self, *args, dry_run=False):
        """ Run a shell command, return the output and keep error message.

        Removes ANSI escape codes, and returns ready-to-use strings.
        """
        # Serialize Path objects to strings.
        args = list(map(str, flatten(args)))
        args_str = click.style(' '.join(args), fg='white')
        logger.debug(f"► {args_str}")

        code = 0
        output = None
        error = None

        if not dry_run:
            code, output, error = run(*args)
        else:
            logger.warning("Dry-run: skip execution of command.")

        # Normalize messages.
        if error:
            error = strip_ansi(error)
            error = error if error else None
        if output:
            output = strip_ansi(output)
            output = output if output else None

        if code and error:
            exception = CLIError(code, output, error)
            if self.raise_on_cli_error:
                raise exception
            logger.error(error)
            self.cli_errors.append(exception)

        if output:
            logger.debug(indent(output, '  '))

        return output
Ejemplo n.º 11
0
    def _run(*args, **kwargs):
        color = kwargs.get('color', False)

        # We allow for nested iterables and None values as args for
        # convenience. We just need to flatten and filters them out.
        args = list(filter(None.__ne__, flatten(args)))
        if args:
            assert set(map(type, args)) == {str}

        result = runner.invoke(cli, args, color=color)

        # Strip colors out of results.
        result.stdout_bytes = strip_ansi(result.stdout_bytes)
        result.stderr_bytes = strip_ansi(result.stderr_bytes)

        print_cli_output(['mpm'] + args, result.output)

        # Print some more debug info.
        print(result)
        if result.exception:
            print(ExceptionInfo.from_exc_info(
                *result.exc_info).get_formatted())

        return result
Ejemplo n.º 12
0
    'dpkg-like': {'dpkg', 'apt', 'opkg'},
    'npm-like': {'npm', 'yarn'},
}

# Define some colors.
PLATFORM_COLOR = '#bfd4f2'
MANAGER_COLOR = '#bfdadc'

# Create one label per platform.
for platform_id in ALL_OS_LABELS:
    label_id = 'platform: {}'.format(platform_id)
    LABELS.append((label_id, PLATFORM_COLOR, platform_id))
    PLATFORM_LABELS[platform_id] = label_id

# Create one label per manager. Add mpm as its own manager.
non_grouped_managers = set(pool()) - set(flatten(MANAGER_GROUPS.values())) | {
    'mpm'
}
for manager_id in non_grouped_managers:
    label_id = 'manager: {}'.format(manager_id)
    LABELS.append((label_id, MANAGER_COLOR, manager_id))
    if manager_id != 'mpm':
        MANAGER_LABELS[manager_id] = label_id

# Add labels for grouped managers.
for group_label, manager_ids in MANAGER_GROUPS.items():
    label_id = 'manager: {}'.format(group_label)
    LABELS.append((label_id, MANAGER_COLOR, ', '.join(sorted(manager_ids))))
    for manager_id in manager_ids:
        MANAGER_LABELS[manager_id] = label_id
Ejemplo n.º 13
0
 def test_help(self, invoke, subcmd):
     result = invoke(subcmd, '--help')
     assert result.exit_code == 0
     assert "Usage: " in result.stdout
     assert flatten([subcmd])[0] in result.stdout
     assert not result.stderr
Ejemplo n.º 14
0
def test_labeller_rules():

    # Extract list of canonically defined labels.
    with PROJECT_ROOT.joinpath('.github/labels.json').resolve().open() as doc:
        content = doc.read()
    defined_labels = [lbl['name'] for lbl in json.loads(content)]

    # Canonical labels are uniques.
    assert len(defined_labels) == len(set(defined_labels))
    canonical_labels = set(defined_labels)

    # Extract and categorize labels.
    canonical_managers = {
        lbl
        for lbl in canonical_labels
        if lbl.startswith('manager: ') and 'mpm' not in lbl
    }
    canonical_platforms = {
        lbl
        for lbl in canonical_labels if lbl.startswith('platform: ')
    }
    assert canonical_managers
    assert canonical_platforms

    # Extract rules from json blurb serialized into YAML.
    with PROJECT_ROOT.joinpath(
            '.github/workflows/labels_issue.yaml').resolve().open() as doc:
        content = doc.read()
    assert "Naturalclar/issue-action" in content
    json_rules = load(
        content,
        Loader=Loader)['jobs']['labeller']['steps'][0]['with']['parameters']
    rules = json.loads(json_rules)
    assert rules

    # Each keyword match one rule only.
    rules_keywords = Counter(flatten([r['keywords'] for r in rules]))
    assert rules_keywords
    assert max(rules_keywords.values()) == 1

    # Extract and categorize labels.
    rules_labels = Counter(flatten([r['labels'] for r in rules]))

    assert rules_labels
    # Check that all labels are canonically defined.
    assert canonical_labels.issuperset(rules_labels)

    rules_managers = Counter({
        label: count
        for label, count in rules_labels.items()
        if label.startswith('manager: ')
    })
    rules_platforms = Counter({
        label: count
        for label, count in rules_labels.items()
        if label.startswith('platform: ')
    })

    assert rules_managers
    # Each canonical manager labels is defined.
    assert len(canonical_managers.symmetric_difference(rules_managers)) == 0
    # Each manager has a rule and one only.
    assert max(rules_managers.values()) == 1

    assert rules_platforms
    # Each canonical platform labels is defined.
    assert len(canonical_platforms.symmetric_difference(rules_platforms)) == 0
    # Each registered OS has a rule.
    assert len(rules_platforms) == len(OS_DEFINITIONS)
    # Each platforms has at least a rule.
    assert min(rules_platforms.values()) >= 1

    # Check that all labels are canonically defined.
    assert canonical_labels.issuperset(rules_labels)

    # Check each rule definition.
    for rule in rules:

        # No duplicate labels.
        assert len(set(rule['labels'])) == len(rule['labels'])

        # Special checks for rules targetting manager labels.
        manager_label = canonical_managers.intersection(rule['labels'])
        if manager_label:

            # Extract manager label
            assert len(manager_label) == 1
            manager_label = manager_label.pop()

            # Only platforms are expected alongside manager labels.
            platforms = set(rule['labels']) - canonical_managers
            assert platforms.issubset(canonical_platforms)

            # Check managers sharing the same label shares the same platforms.
            supported_platforms = [
                pool()[mid].platforms for mid, lbl in MANAGER_LABELS.items()
                # Relying on pool() restrict our checks, as the pool exclude
                # non-locally supported managers.
                if lbl == manager_label and mid in pool()
            ]
            assert len(set(supported_platforms)) == 1

            # Check the right platforms is associated with the manager.
            supported_platform_labels = {
                PLATFORM_LABELS[os_label(p)]
                for p in supported_platforms[0]
            }
            assert platforms == supported_platform_labels
Ejemplo n.º 15
0
def annotated_image(file,
                    boxes,
                    service,
                    size=12,
                    color='r',
                    shift='0,0',
                    display=['text'],
                    score_threshold=0):
    service_name = service.name().title()

    fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(20, 20))
    axes.get_xaxis().set_visible(False)
    axes.get_yaxis().set_visible(False)
    axes.set_title(service_name, color=color, fontweight='bold', fontsize=20)

    if __debug__:
        log(f'reading image file for {service_name}: {relative(file)}')
    img = mpimg.imread(file)
    axes.imshow(img, cmap="gray")

    boxes = [item for item in boxes if item.score >= score_threshold]
    if __debug__:
        log(f'{len(boxes)} boxes pass threshold for {relative(file)}')
    if boxes and any(d.startswith('bb') for d in display):
        if 'bb' in display:  # If user indicated 'bb', it means all.
            show_bb = ['word', 'line', 'para']
        else:
            show_bb = set(flatten(d.split('-')
                                  for d in display)) - {'text', 'bb'}
        if __debug__:
            log(f'will show {", ".join(show_bb)} bb for {relative(file)}')

        box_list = []
        for bb_type in show_bb:
            box_list += list(box for box in boxes if box.kind == bb_type)
        for box in box_list:
            vertices = [(box.bb[i], box.bb[i + 1])
                        for i in range(0, len(box.bb), 2)]
            poly = Polygon(vertices,
                           facecolor='None',
                           zorder=_Z_ORDER[box.kind],
                           edgecolor=_EDGE_COLOR[box.kind])
            axes.add_patch(poly)

    if boxes and any(d == 'text' for d in display):
        x_shift, y_shift = 0, 0
        shift = shift.strip('()" \\\\').split(',')
        if len(shift) == 2:
            try:
                x_shift, y_shift = int(shift[0]), int(shift[1])
            except ValueError:
                pass

        props = {
            'facecolor': 'white',
            'edgecolor': 'none',
            'alpha': 0.8,
            'pad': 1
        }
        for box in filter(lambda item: item.kind == 'word', boxes):
            x = max(0, box.bb[0] + x_shift)
            y = max(0, box.bb[1] + y_shift)
            plt.text(x,
                     y,
                     box.text,
                     color=color,
                     fontsize=size,
                     va="center",
                     bbox=props,
                     zorder=10)

    if __debug__:
        log(f'generating png for {service_name} for {relative(file)}')
    buf = io.BytesIO()
    fig.savefig(buf,
                format='png',
                dpi=300,
                bbox_inches='tight',
                pad_inches=0.02)
    buf.flush()
    buf.seek(0)
    plt.close(fig)

    return buf
Ejemplo n.º 16
0
            # Generates our own box_type_id for use in CLI parameters.
            box_type_id = klass.__name__.lower()

            yield box_type_id, constructor


# Mapping between supported box type IDs and their constructors.
BOX_TYPES = FrozenDict(build_box_constructors())

# Categorize each box type into its structure type.
BOX_STRUCTURES = FrozenDict({
    "file": {"mbox", "mmdf", "babyl"},
    "folder": {"maildir", "mh"},
})
# Check we did not forgot any box type.
assert set(flatten(BOX_STRUCTURES.values())) == set(BOX_TYPES)

# List of required sub-folders defining a properly structured maildir.
MAILDIR_SUBDIRS = frozenset(("cur", "new", "tmp"))


def autodetect_box_type(path):
    """Auto-detect the format of the mailbox located at the provided path.

    Returns a box type as indexed in the ``box_types`` dictionnary above.

    If the path is a file, then it is considered as an ``mbox``. Else, if th
    provided path is a folder and feature the expecteed sub-directories, it is
    parsed as a ``maildir``.

    Future finer autodetection heuristics should be implemented here. Some ideas: