def get_metrics_from_stat( self, _: Commit, file_diff_stats: Tuple[FileDiffStat, ...], ) -> Generator[Metric, None, None]: total_lines = 0 lines_by_file_type: Dict[str, int] = collections.defaultdict(int) for file_diff_stat in file_diff_stats: lines_changed = (len(file_diff_stat.lines_added) - len(file_diff_stat.lines_removed)) # Track total overall total_lines += lines_changed filename = file_diff_stat.filename.decode('UTF-8') tags = identify.tags_from_filename(filename) or {UNKNOWN} for tag in tags: lines_by_file_type[tag] += lines_changed # Yield overall metric and one per type of expected mapping types yield Metric('TotalLinesOfCode', total_lines) for tag, val in lines_by_file_type.items(): if tag in ALL_TAGS and val: yield Metric(f'TotalLinesOfCode_{tag}', val)
def get_metrics_from_stat( self, _: Commit, file_diff_stats: Tuple[FileDiffStat, ...], ) -> Generator[Metric, None, None]: total_curses = 0 curses_by_file_type: Dict[str, int] = collections.defaultdict(int) for file_diff_stat in file_diff_stats: curses_added = count_curse_words(file_diff_stat.lines_added) curses_removed = count_curse_words(file_diff_stat.lines_removed) curses_changed = curses_added - curses_removed # Track total overall total_curses = total_curses + curses_changed # Track by file extension -> type mapping filename = file_diff_stat.filename.decode('UTF-8') tags = identify.tags_from_filename(filename) or {UNKNOWN} for tag in tags: curses_by_file_type[tag] += curses_changed # Yield overall metric and one per type of expected mapping types yield Metric('TotalCurseWords', total_curses) for tag, value in curses_by_file_type.items(): if tag in ALL_TAGS and value: yield Metric(f'TotalCurseWords_{tag}', value)
def create(cls, project: Project, path_from_root: str) -> "FileInfo": """Clean the file name and get its tags.""" if Deprecation.pre_commit_without_dash(path_from_root): clean_path = DOT + path_from_root else: clean_path = DOT + path_from_root[1:] if path_from_root.startswith( "-") else path_from_root tags = set(identify.tags_from_filename(clean_path)) return cls(project, clean_path, tags)
def _find_subclasses(self, data, handled_tags, new_files_found): for possible_file in data.keys(): found_subclasses = [] for file_tag in identify.tags_from_filename(possible_file): handler_subclass = handled_tags.get(file_tag) if handler_subclass: found_subclasses.append(handler_subclass) for found_subclass in found_subclasses: new_files_found.update(self.file_field_pair(possible_file, found_subclass))
def compiler_for_file(self, filename: str, first_line: str) -> Compiler: for tag in tags_from_filename(filename) - {'text'}: with contextlib.suppress(KeyError): return self.compiler_for_scope(f'source.{tag}') _, _, ext = os.path.basename(filename).rpartition('.') for extensions, first_line_match, scope_name in self._find_scope: if ( ext in extensions or first_line_match.match( first_line, 0, first_line=True, boundary=True, ) ): return self.compiler_for_scope(scope_name) else: return self.compiler_for_scope('source.unknown')
def run(self) -> YieldFlake8Error: """Run the check plugin.""" has_errors = False app = NitpickApp.current() for err in app.init_errors: has_errors = True yield NitpickApp.as_flake8_warning(err) if has_errors: return [] current_python_file = Path(self.filename) if current_python_file.absolute() != app.main_python_file.absolute(): # Only report warnings once, for the main Python file of this project. LOGGER.debug("Ignoring file: %s", self.filename) return [] LOGGER.debug("Nitpicking file: %s", self.filename) yield from itertools.chain(app.config.merge_styles(), self.check_files(True), self.check_files(False)) has_errors = False for err in app.style_errors: has_errors = True yield NitpickApp.as_flake8_warning(err) if has_errors: return [] # Get all root keys from the style TOML. for path, config_dict in app.config.style_dict.items(): # All except "nitpick" are file names. if path == PROJECT_NAME: continue # For each file name, find the plugin that can handle the file. tags = identify.tags_from_filename(path) for base_file in app.plugin_manager.hook.handle_config_file( # pylint: disable=no-member config=config_dict, file_name=path, tags=tags): yield from base_file.check_exists() return []
def compiler_for_file(self, filename: str, first_line: str) -> Compiler: for tag in tags_from_filename(filename) - {'text'}: try: # TODO: this doesn't always match even if we detect it return self.compiler_for_scope(f'source.{tag}') except KeyError: pass # didn't find it in the fast path, need to read all the json for k in tuple(self._scope_to_files): self._raw_for_scope(k) _, _, ext = os.path.basename(filename).rpartition('.') for extensions, scope in self._file_types: if ext in extensions: return self.compiler_for_scope(scope) for reg, scope in self._first_line: if reg.match(first_line, 0, first_line=True, boundary=True): return self.compiler_for_scope(scope) return self.compiler_for_scope('source.unknown')
def get_metrics_from_stat(self, _, file_diff_stats): total_lines = 0 lines_by_file_type = collections.defaultdict(int) for file_diff_stat in file_diff_stats: lines_changed = (len(file_diff_stat.lines_added) - len(file_diff_stat.lines_removed)) # Track total overall total_lines += lines_changed filename = file_diff_stat.filename.decode('UTF-8') tags = identify.tags_from_filename(filename) or {UNKNOWN} for tag in tags: lines_by_file_type[tag] += lines_changed # Yield overall metric and one per type of expected mapping types yield Metric('TotalLinesOfCode', total_lines) for tag, val in lines_by_file_type.items(): if tag in ALL_TAGS and val: yield Metric('TotalLinesOfCode_{}'.format(tag), val)
def get_metrics_from_stat(self, _, file_diff_stats): total_lines = 0 lines_by_file_type = collections.defaultdict(int) for file_diff_stat in file_diff_stats: lines_changed = ( len(file_diff_stat.lines_added) - len(file_diff_stat.lines_removed) ) # Track total overall total_lines += lines_changed filename = file_diff_stat.filename.decode('UTF-8') tags = identify.tags_from_filename(filename) or {UNKNOWN} for tag in tags: lines_by_file_type[tag] += lines_changed # Yield overall metric and one per type of expected mapping types yield Metric('TotalLinesOfCode', total_lines) for tag, val in lines_by_file_type.items(): if tag in ALL_TAGS and val: yield Metric('TotalLinesOfCode_{}'.format(tag), val)
def get_metrics_from_stat(self, _, file_diff_stats): total_curses = 0 curses_by_file_type = collections.defaultdict(int) for file_diff_stat in file_diff_stats: curses_added = count_curse_words(file_diff_stat.lines_added) curses_removed = count_curse_words(file_diff_stat.lines_removed) curses_changed = curses_added - curses_removed # Track total overall total_curses = total_curses + curses_changed # Track by file extension -> type mapping filename = file_diff_stat.filename.decode('UTF-8') tags = identify.tags_from_filename(filename) or {UNKNOWN} for tag in tags: curses_by_file_type[tag] += curses_changed # Yield overall metric and one per type of expected mapping types yield Metric('TotalCurseWords', total_curses) for tag, value in curses_by_file_type.items(): if tag in ALL_TAGS and value: yield Metric('TotalCurseWords_{}'.format(tag), value)
def test_tags_from_filename(filename, expected): assert identify.tags_from_filename(filename) == expected
def format_file( filename: str, *, min_py3_version: Tuple[int, int], max_py_version: Tuple[int, int], ) -> bool: with open(filename) as f: contents = f.read() cfg = configparser.ConfigParser() cfg.read_string(contents) _clean_sections(cfg) # normalize names to underscores so sdist / wheel have the same prefix cfg['metadata']['name'] = cfg['metadata']['name'].replace('-', '_') # if README.md exists, set `long_description` + content type readme = _first_file(filename, 'readme') if readme is not None: long_description = f'file: {os.path.basename(readme)}' cfg['metadata']['long_description'] = long_description tags = identify.tags_from_filename(readme) if 'markdown' in tags: cfg['metadata']['long_description_content_type'] = 'text/markdown' elif 'rst' in tags: cfg['metadata']['long_description_content_type'] = 'text/x-rst' else: cfg['metadata']['long_description_content_type'] = 'text/plain' # set license fields if a license exists license_filename = _first_file(filename, 'licen[sc]e') if license_filename is not None: cfg['metadata']['license_file'] = os.path.basename(license_filename) license_id = identify.license_id(license_filename) if license_id is not None: cfg['metadata']['license'] = license_id if license_id in LICENSE_TO_CLASSIFIER: cfg['metadata']['classifiers'] = ( cfg['metadata'].get('classifiers', '').rstrip() + f'\n{LICENSE_TO_CLASSIFIER[license_id]}') requires = _python_requires(filename, min_py3_version=min_py3_version) if requires is not None: if not cfg.has_section('options'): cfg.add_section('options') cfg['options']['python_requires'] = requires install_requires = _requires(cfg, 'install_requires') if install_requires: cfg['options']['install_requires'] = '\n'.join(install_requires) setup_requires = _requires(cfg, 'setup_requires') if setup_requires: cfg['options']['setup_requires'] = '\n'.join(setup_requires) if cfg.has_section('options.extras_require'): for key in cfg['options.extras_require']: group_requires = _requires(cfg, key, 'options.extras_require') cfg['options.extras_require'][key] = '\n'.join(group_requires) py_classifiers = _py_classifiers(requires, max_py_version=max_py_version) if py_classifiers: cfg['metadata']['classifiers'] = ( cfg['metadata'].get('classifiers', '').rstrip() + f'\n{py_classifiers}') imp_classifiers = _imp_classifiers(filename) if imp_classifiers: cfg['metadata']['classifiers'] = ( cfg['metadata'].get('classifiers', '').rstrip() + f'\n{imp_classifiers}') # sort the classifiers if present if 'classifiers' in cfg['metadata']: classifiers = sorted(set(cfg['metadata']['classifiers'].split('\n'))) classifiers = _trim_py_classifiers( classifiers, requires, max_py_version=max_py_version, ) cfg['metadata']['classifiers'] = '\n'.join(classifiers) sections: Dict[str, Dict[str, str]] = {} for section, key_order in KEYS_ORDER: if section not in cfg: continue entries = {k.replace('-', '_'): v for k, v in cfg[section].items()} new_section = {k: entries.pop(k) for k in key_order if k in entries} # sort any remaining keys new_section.update(sorted(entries.items())) sections[section] = new_section cfg.pop(section) for section in cfg.sections(): sections[section] = dict(cfg[section]) cfg.pop(section) for k, v in sections.items(): cfg[k] = v sio = io.StringIO() cfg.write(sio) new_contents = sio.getvalue().strip() + '\n' new_contents = new_contents.replace('\t', ' ') new_contents = new_contents.replace(' \n', '\n') if new_contents != contents: with open(filename, 'w') as f: f.write(new_contents) return new_contents != contents
async def process_asset( self, asset: RemoteAsset, sha256_digest: str, sender: trio.MemorySendChannel[ToDownload], ) -> None: async with sender: if self.last_timestamp is None or self.last_timestamp < asset.created: self.last_timestamp = asset.created dest = self.repo / asset.path if not self.tracker.register_asset(asset, force=self.config.force): log.debug( "%s: metadata unchanged; not taking any further action", asset.path, ) self.tracker.finish_asset(asset.path) return if not self.config.match_asset(asset.path): log.debug("%s: Skipping asset", asset.path) self.tracker.finish_asset(asset.path) return log.info("%s: Syncing", asset.path) dest.parent.mkdir(parents=True, exist_ok=True) to_update = False if not (dest.exists() or dest.is_symlink()): log.info("%s: Not in dataset; will add", asset.path) to_update = True self.report.added += 1 else: log.debug("%s: About to fetch hash from annex", asset.path) if sha256_digest == await self.get_annex_hash(dest): log.info( "%s: Asset in dataset, and hash shows no modification;" " will not update", asset.path, ) self.tracker.finish_asset(asset.path) else: log.info( "%s: Asset in dataset, and hash shows modification;" " will update", asset.path, ) to_update = True self.report.updated += 1 if to_update: bucket_url = await self.get_file_bucket_url(asset) dest.unlink(missing_ok=True) key = await self.annex.mkkey( PurePosixPath(asset.path).name, asset.size, sha256_digest ) remotes = await self.annex.get_key_remotes(key) if "text" not in tags_from_filename(asset.path): log.info( "%s: File is binary; registering key with git-annex", asset.path ) await self.annex.from_key(key, asset.path) await self.register_url(asset.path, key, bucket_url) await self.register_url(asset.path, key, asset.base_download_url) if ( remotes is not None and self.config.backup_remote is not None and self.config.backup_remote not in remotes ): log.info( "%s: Not in backup remote %s", asset.path, self.config.backup_remote, ) self.tracker.finish_asset(asset.path) self.report.registered += 1 elif asset.size > (10 << 20): raise RuntimeError( f"{asset.path} identified as text but is {asset.size} bytes!" ) else: log.info( "%s: File is text; sending off for download from %s", asset.path, bucket_url, ) await sender.send( ToDownload( path=asset.path, url=bucket_url, extra_urls=[asset.base_download_url], sha256_digest=sha256_digest, ) )