Ejemplo n.º 1
0
    def get_metrics_from_stat(
        self,
        _: Commit,
        file_diff_stats: Tuple[FileDiffStat, ...],
    ) -> Generator[Metric, None, None]:
        total_lines = 0
        lines_by_file_type: Dict[str, int] = collections.defaultdict(int)

        for file_diff_stat in file_diff_stats:
            lines_changed = (len(file_diff_stat.lines_added) -
                             len(file_diff_stat.lines_removed))

            # Track total overall
            total_lines += lines_changed

            filename = file_diff_stat.filename.decode('UTF-8')
            tags = identify.tags_from_filename(filename) or {UNKNOWN}

            for tag in tags:
                lines_by_file_type[tag] += lines_changed

        # Yield overall metric and one per type of expected mapping types
        yield Metric('TotalLinesOfCode', total_lines)
        for tag, val in lines_by_file_type.items():
            if tag in ALL_TAGS and val:
                yield Metric(f'TotalLinesOfCode_{tag}', val)
Ejemplo n.º 2
0
    def get_metrics_from_stat(
        self,
        _: Commit,
        file_diff_stats: Tuple[FileDiffStat, ...],
    ) -> Generator[Metric, None, None]:
        total_curses = 0
        curses_by_file_type: Dict[str, int] = collections.defaultdict(int)

        for file_diff_stat in file_diff_stats:
            curses_added = count_curse_words(file_diff_stat.lines_added)
            curses_removed = count_curse_words(file_diff_stat.lines_removed)
            curses_changed = curses_added - curses_removed

            # Track total overall
            total_curses = total_curses + curses_changed

            # Track by file extension -> type mapping
            filename = file_diff_stat.filename.decode('UTF-8')
            tags = identify.tags_from_filename(filename) or {UNKNOWN}

            for tag in tags:
                curses_by_file_type[tag] += curses_changed

        # Yield overall metric and one per type of expected mapping types
        yield Metric('TotalCurseWords', total_curses)
        for tag, value in curses_by_file_type.items():
            if tag in ALL_TAGS and value:
                yield Metric(f'TotalCurseWords_{tag}', value)
Ejemplo n.º 3
0
 def create(cls, project: Project, path_from_root: str) -> "FileInfo":
     """Clean the file name and get its tags."""
     if Deprecation.pre_commit_without_dash(path_from_root):
         clean_path = DOT + path_from_root
     else:
         clean_path = DOT + path_from_root[1:] if path_from_root.startswith(
             "-") else path_from_root
     tags = set(identify.tags_from_filename(clean_path))
     return cls(project, clean_path, tags)
Ejemplo n.º 4
0
    def _find_subclasses(self, data, handled_tags, new_files_found):
        for possible_file in data.keys():
            found_subclasses = []
            for file_tag in identify.tags_from_filename(possible_file):
                handler_subclass = handled_tags.get(file_tag)
                if handler_subclass:
                    found_subclasses.append(handler_subclass)

            for found_subclass in found_subclasses:
                new_files_found.update(self.file_field_pair(possible_file, found_subclass))
Ejemplo n.º 5
0
    def compiler_for_file(self, filename: str, first_line: str) -> Compiler:
        for tag in tags_from_filename(filename) - {'text'}:
            with contextlib.suppress(KeyError):
                return self.compiler_for_scope(f'source.{tag}')

        _, _, ext = os.path.basename(filename).rpartition('.')
        for extensions, first_line_match, scope_name in self._find_scope:
            if (
                    ext in extensions or
                    first_line_match.match(
                        first_line, 0, first_line=True, boundary=True,
                    )
            ):
                return self.compiler_for_scope(scope_name)
        else:
            return self.compiler_for_scope('source.unknown')
Ejemplo n.º 6
0
    def run(self) -> YieldFlake8Error:
        """Run the check plugin."""
        has_errors = False
        app = NitpickApp.current()
        for err in app.init_errors:
            has_errors = True
            yield NitpickApp.as_flake8_warning(err)
        if has_errors:
            return []

        current_python_file = Path(self.filename)
        if current_python_file.absolute() != app.main_python_file.absolute():
            # Only report warnings once, for the main Python file of this project.
            LOGGER.debug("Ignoring file: %s", self.filename)
            return []
        LOGGER.debug("Nitpicking file: %s", self.filename)

        yield from itertools.chain(app.config.merge_styles(),
                                   self.check_files(True),
                                   self.check_files(False))

        has_errors = False
        for err in app.style_errors:
            has_errors = True
            yield NitpickApp.as_flake8_warning(err)
        if has_errors:
            return []

        # Get all root keys from the style TOML.
        for path, config_dict in app.config.style_dict.items():
            # All except "nitpick" are file names.
            if path == PROJECT_NAME:
                continue

            # For each file name, find the plugin that can handle the file.
            tags = identify.tags_from_filename(path)
            for base_file in app.plugin_manager.hook.handle_config_file(  # pylint: disable=no-member
                    config=config_dict,
                    file_name=path,
                    tags=tags):
                yield from base_file.check_exists()

        return []
Ejemplo n.º 7
0
    def compiler_for_file(self, filename: str, first_line: str) -> Compiler:
        for tag in tags_from_filename(filename) - {'text'}:
            try:
                # TODO: this doesn't always match even if we detect it
                return self.compiler_for_scope(f'source.{tag}')
            except KeyError:
                pass

        # didn't find it in the fast path, need to read all the json
        for k in tuple(self._scope_to_files):
            self._raw_for_scope(k)

        _, _, ext = os.path.basename(filename).rpartition('.')
        for extensions, scope in self._file_types:
            if ext in extensions:
                return self.compiler_for_scope(scope)

        for reg, scope in self._first_line:
            if reg.match(first_line, 0, first_line=True, boundary=True):
                return self.compiler_for_scope(scope)

        return self.compiler_for_scope('source.unknown')
Ejemplo n.º 8
0
    def get_metrics_from_stat(self, _, file_diff_stats):
        total_lines = 0
        lines_by_file_type = collections.defaultdict(int)

        for file_diff_stat in file_diff_stats:
            lines_changed = (len(file_diff_stat.lines_added) -
                             len(file_diff_stat.lines_removed))

            # Track total overall
            total_lines += lines_changed

            filename = file_diff_stat.filename.decode('UTF-8')
            tags = identify.tags_from_filename(filename) or {UNKNOWN}

            for tag in tags:
                lines_by_file_type[tag] += lines_changed

        # Yield overall metric and one per type of expected mapping types
        yield Metric('TotalLinesOfCode', total_lines)
        for tag, val in lines_by_file_type.items():
            if tag in ALL_TAGS and val:
                yield Metric('TotalLinesOfCode_{}'.format(tag), val)
Ejemplo n.º 9
0
    def get_metrics_from_stat(self, _, file_diff_stats):
        total_lines = 0
        lines_by_file_type = collections.defaultdict(int)

        for file_diff_stat in file_diff_stats:
            lines_changed = (
                len(file_diff_stat.lines_added) -
                len(file_diff_stat.lines_removed)
            )

            # Track total overall
            total_lines += lines_changed

            filename = file_diff_stat.filename.decode('UTF-8')
            tags = identify.tags_from_filename(filename) or {UNKNOWN}

            for tag in tags:
                lines_by_file_type[tag] += lines_changed

        # Yield overall metric and one per type of expected mapping types
        yield Metric('TotalLinesOfCode', total_lines)
        for tag, val in lines_by_file_type.items():
            if tag in ALL_TAGS and val:
                yield Metric('TotalLinesOfCode_{}'.format(tag), val)
Ejemplo n.º 10
0
    def get_metrics_from_stat(self, _, file_diff_stats):
        total_curses = 0
        curses_by_file_type = collections.defaultdict(int)

        for file_diff_stat in file_diff_stats:
            curses_added = count_curse_words(file_diff_stat.lines_added)
            curses_removed = count_curse_words(file_diff_stat.lines_removed)
            curses_changed = curses_added - curses_removed

            # Track total overall
            total_curses = total_curses + curses_changed

            # Track by file extension -> type mapping
            filename = file_diff_stat.filename.decode('UTF-8')
            tags = identify.tags_from_filename(filename) or {UNKNOWN}

            for tag in tags:
                curses_by_file_type[tag] += curses_changed

        # Yield overall metric and one per type of expected mapping types
        yield Metric('TotalCurseWords', total_curses)
        for tag, value in curses_by_file_type.items():
            if tag in ALL_TAGS and value:
                yield Metric('TotalCurseWords_{}'.format(tag), value)
Ejemplo n.º 11
0
    def get_metrics_from_stat(self, _, file_diff_stats):
        total_curses = 0
        curses_by_file_type = collections.defaultdict(int)

        for file_diff_stat in file_diff_stats:
            curses_added = count_curse_words(file_diff_stat.lines_added)
            curses_removed = count_curse_words(file_diff_stat.lines_removed)
            curses_changed = curses_added - curses_removed

            # Track total overall
            total_curses = total_curses + curses_changed

            # Track by file extension -> type mapping
            filename = file_diff_stat.filename.decode('UTF-8')
            tags = identify.tags_from_filename(filename) or {UNKNOWN}

            for tag in tags:
                curses_by_file_type[tag] += curses_changed

        # Yield overall metric and one per type of expected mapping types
        yield Metric('TotalCurseWords', total_curses)
        for tag, value in curses_by_file_type.items():
            if tag in ALL_TAGS and value:
                yield Metric('TotalCurseWords_{}'.format(tag), value)
Ejemplo n.º 12
0
def test_tags_from_filename(filename, expected):
    assert identify.tags_from_filename(filename) == expected
Ejemplo n.º 13
0
def format_file(
    filename: str,
    *,
    min_py3_version: Tuple[int, int],
    max_py_version: Tuple[int, int],
) -> bool:
    with open(filename) as f:
        contents = f.read()

    cfg = configparser.ConfigParser()
    cfg.read_string(contents)
    _clean_sections(cfg)

    # normalize names to underscores so sdist / wheel have the same prefix
    cfg['metadata']['name'] = cfg['metadata']['name'].replace('-', '_')

    # if README.md exists, set `long_description` + content type
    readme = _first_file(filename, 'readme')
    if readme is not None:
        long_description = f'file: {os.path.basename(readme)}'
        cfg['metadata']['long_description'] = long_description

        tags = identify.tags_from_filename(readme)
        if 'markdown' in tags:
            cfg['metadata']['long_description_content_type'] = 'text/markdown'
        elif 'rst' in tags:
            cfg['metadata']['long_description_content_type'] = 'text/x-rst'
        else:
            cfg['metadata']['long_description_content_type'] = 'text/plain'

    # set license fields if a license exists
    license_filename = _first_file(filename, 'licen[sc]e')
    if license_filename is not None:
        cfg['metadata']['license_file'] = os.path.basename(license_filename)

        license_id = identify.license_id(license_filename)
        if license_id is not None:
            cfg['metadata']['license'] = license_id

        if license_id in LICENSE_TO_CLASSIFIER:
            cfg['metadata']['classifiers'] = (
                cfg['metadata'].get('classifiers', '').rstrip() +
                f'\n{LICENSE_TO_CLASSIFIER[license_id]}')

    requires = _python_requires(filename, min_py3_version=min_py3_version)
    if requires is not None:
        if not cfg.has_section('options'):
            cfg.add_section('options')
        cfg['options']['python_requires'] = requires

    install_requires = _requires(cfg, 'install_requires')
    if install_requires:
        cfg['options']['install_requires'] = '\n'.join(install_requires)

    setup_requires = _requires(cfg, 'setup_requires')
    if setup_requires:
        cfg['options']['setup_requires'] = '\n'.join(setup_requires)

    if cfg.has_section('options.extras_require'):
        for key in cfg['options.extras_require']:
            group_requires = _requires(cfg, key, 'options.extras_require')
            cfg['options.extras_require'][key] = '\n'.join(group_requires)

    py_classifiers = _py_classifiers(requires, max_py_version=max_py_version)
    if py_classifiers:
        cfg['metadata']['classifiers'] = (
            cfg['metadata'].get('classifiers', '').rstrip() +
            f'\n{py_classifiers}')

    imp_classifiers = _imp_classifiers(filename)
    if imp_classifiers:
        cfg['metadata']['classifiers'] = (
            cfg['metadata'].get('classifiers', '').rstrip() +
            f'\n{imp_classifiers}')

    # sort the classifiers if present
    if 'classifiers' in cfg['metadata']:
        classifiers = sorted(set(cfg['metadata']['classifiers'].split('\n')))
        classifiers = _trim_py_classifiers(
            classifiers,
            requires,
            max_py_version=max_py_version,
        )
        cfg['metadata']['classifiers'] = '\n'.join(classifiers)

    sections: Dict[str, Dict[str, str]] = {}
    for section, key_order in KEYS_ORDER:
        if section not in cfg:
            continue

        entries = {k.replace('-', '_'): v for k, v in cfg[section].items()}

        new_section = {k: entries.pop(k) for k in key_order if k in entries}
        # sort any remaining keys
        new_section.update(sorted(entries.items()))

        sections[section] = new_section
        cfg.pop(section)

    for section in cfg.sections():
        sections[section] = dict(cfg[section])
        cfg.pop(section)

    for k, v in sections.items():
        cfg[k] = v

    sio = io.StringIO()
    cfg.write(sio)
    new_contents = sio.getvalue().strip() + '\n'
    new_contents = new_contents.replace('\t', '    ')
    new_contents = new_contents.replace(' \n', '\n')

    if new_contents != contents:
        with open(filename, 'w') as f:
            f.write(new_contents)

    return new_contents != contents
Ejemplo n.º 14
0
 async def process_asset(
     self,
     asset: RemoteAsset,
     sha256_digest: str,
     sender: trio.MemorySendChannel[ToDownload],
 ) -> None:
     async with sender:
         if self.last_timestamp is None or self.last_timestamp < asset.created:
             self.last_timestamp = asset.created
         dest = self.repo / asset.path
         if not self.tracker.register_asset(asset, force=self.config.force):
             log.debug(
                 "%s: metadata unchanged; not taking any further action",
                 asset.path,
             )
             self.tracker.finish_asset(asset.path)
             return
         if not self.config.match_asset(asset.path):
             log.debug("%s: Skipping asset", asset.path)
             self.tracker.finish_asset(asset.path)
             return
         log.info("%s: Syncing", asset.path)
         dest.parent.mkdir(parents=True, exist_ok=True)
         to_update = False
         if not (dest.exists() or dest.is_symlink()):
             log.info("%s: Not in dataset; will add", asset.path)
             to_update = True
             self.report.added += 1
         else:
             log.debug("%s: About to fetch hash from annex", asset.path)
             if sha256_digest == await self.get_annex_hash(dest):
                 log.info(
                     "%s: Asset in dataset, and hash shows no modification;"
                     " will not update",
                     asset.path,
                 )
                 self.tracker.finish_asset(asset.path)
             else:
                 log.info(
                     "%s: Asset in dataset, and hash shows modification;"
                     " will update",
                     asset.path,
                 )
                 to_update = True
                 self.report.updated += 1
         if to_update:
             bucket_url = await self.get_file_bucket_url(asset)
             dest.unlink(missing_ok=True)
             key = await self.annex.mkkey(
                 PurePosixPath(asset.path).name, asset.size, sha256_digest
             )
             remotes = await self.annex.get_key_remotes(key)
             if "text" not in tags_from_filename(asset.path):
                 log.info(
                     "%s: File is binary; registering key with git-annex", asset.path
                 )
                 await self.annex.from_key(key, asset.path)
                 await self.register_url(asset.path, key, bucket_url)
                 await self.register_url(asset.path, key, asset.base_download_url)
                 if (
                     remotes is not None
                     and self.config.backup_remote is not None
                     and self.config.backup_remote not in remotes
                 ):
                     log.info(
                         "%s: Not in backup remote %s",
                         asset.path,
                         self.config.backup_remote,
                     )
                 self.tracker.finish_asset(asset.path)
                 self.report.registered += 1
             elif asset.size > (10 << 20):
                 raise RuntimeError(
                     f"{asset.path} identified as text but is {asset.size} bytes!"
                 )
             else:
                 log.info(
                     "%s: File is text; sending off for download from %s",
                     asset.path,
                     bucket_url,
                 )
                 await sender.send(
                     ToDownload(
                         path=asset.path,
                         url=bucket_url,
                         extra_urls=[asset.base_download_url],
                         sha256_digest=sha256_digest,
                     )
                 )