def test_init() -> None: my_dict = {d.id: d for d in DATA} rm1: ResultMap[Data] = ResultMap(my_dict) rm2: ResultMap[Data] = ResultMap(DATA) assert rm1 == rm2
def run(self) -> ResultMap[GalaxyRole]: """Run the stage.""" galaxy_api = GalaxyAPI() it_roles = galaxy_api.search_roles(limit=self.config.count) if not self.config.progress: return ResultMap(it_roles) return ResultMap(tqdm( it_roles, desc='Searching roles', total=self.config.count))
def test_unstructure() -> None: rm = ResultMap(DATA) d = rm.unstructure() assert d == { 'test': { 'id': 'test', 'val': 1 }, 'tset': { 'id': 'tset', 'val': -1 } }
def run( self, extract_versions: ResultMap[RepoVersions], clone: ResultMap[GitRepo] ) -> ResultMap[RepoDiffMetrics]: results = [] repos_to_bumps: Dict[GitRepo, List[Tuple[Version, Version]]] repos_to_bumps = defaultdict(list) for repo_id, repo_versions in extract_versions.items(): repos_to_bumps[clone[repo_id]].extend( zip(repo_versions.versions, repo_versions.versions[1:])) repo_list: Iterable[Tuple[GitRepo, List[Tuple[Version, Version]]]] bump_pbar: Optional[tqdm] repo_list = [(r, b) for r, b in repos_to_bumps.items() if b] if self.config.progress: repo_list = tqdm( repo_list, desc='Extract structural diffs', unit=' repos') bump_pbar = tqdm( desc='Extract structural diffs', unit=' version pairs', total=sum(len(rb[1]) for rb in repos_to_bumps.items())) else: bump_pbar = None for repo_path, versions in repo_list: results.append(self.diff_repo(repo_path, versions, bump_pbar)) if bump_pbar is not None: bump_pbar.close() self.export_to_csv(results) return ResultMap(results)
def test_stage_run_empty_input(config: CloneConfig, mock_clone: None) -> None: stage = Clone(config) prev: ResultMap[GalaxyRole] = ResultMap([]) paths = stage.run(prev) assert not paths
def run(self, analyze_versions: ResultMap[AnalyzedRepoVersions], clone: ResultMap[GitRepoPath], discover: ResultMap[GalaxyRole]) -> ResultMap[RepoVersionDiffs]: results = [] repos_to_versions: Iterable[AnalyzedRepoVersions] = ( analyze_versions.values()) if self.config.progress: repos_to_versions = tqdm(repos_to_versions, desc='Extract commits between versions', unit=' repos') # TODO(ROpdebee): Would be nice if we could speed this up a bit, # currently takes 1-2s per repository. Likely due to # the diffing, haven't profiled yet though. Caching # should already massively help on consecutive runs # though. for repo_versions in repos_to_versions: diff = RepoVersionDiffs.create( repo_versions, git.Repo(self.config.output_directory / clone[repo_versions.id].path)) results.append(diff) self.export_to_csv(results, clone, discover) return ResultMap(results)
def test_iter(data: List[Data]) -> None: rm: ResultMap[Data] = ResultMap(data) keys = set() for k in rm: keys.add(k) assert keys == {d.id for d in data}
def run(self, first_stage: ResultMap[MyModel], second_stage: ResultMap[NewData]) -> ResultMap[NewData]: transformed = [] for m in first_stage.values(): for n in second_stage.values(): transformed.append(NewData(m.string + ' ' + n.id)) return ResultMap(transformed)
def run( self, extract_role_metadata: ResultMap[GalaxyMetadata], extract_git_metadata: ResultMap[GitRepoMetadata], clone: ResultMap[GitRepo] ) -> ResultMap[MultiStructuralRoleModel]: """Run the stage.""" role_repos = self.get_role_repositories(extract_role_metadata, clone, extract_git_metadata) num_revs = sum(len(revs) for (_, _, revs) in role_repos) if not self.config.commits: num_revs += len(role_repos) task_list: Iterable[Tuple[GitRepo, str, List[Tuple[str, str]]]] rev_pbar: Optional[tqdm] if self.config.progress: task_list = tqdm( role_repos, desc='Extract structural models', unit=' repos') rev_pbar = tqdm( desc='Extract structural models', unit=' revs', total=num_revs) else: task_list = role_repos rev_pbar = None results: List[MultiStructuralRoleModel] = [] failures = 0 for repo, role_name, revs in task_list: git_repo_obj = git.Repo(self.config.output_directory / 'Repositories' / repo.path) save_branch = git_repo_obj.active_branch role_models = [] try: for sha1, rev in revs: model = self.extract(git_repo_obj, role_name, sha1, rev, rev_pbar) if model is None: failures += 1 else: role_models.append(model) # Also extract for the latest commit if we're extracting tags. if not self.config.commits: save_branch.checkout(force=True) model = self.extract(git_repo_obj, role_name, 'HEAD', 'HEAD', rev_pbar) if model is None: failures += 1 else: role_models.append(model) finally: # Make sure to reset the repo to the HEAD from before save_branch.checkout(force=True) results.append(MultiStructuralRoleModel(role_name, role_models)) if rev_pbar is not None: rev_pbar.close() print(f'{failures} roles failed to load') return ResultMap(results)
def test_report_empty(config: CloneConfig, capsys: _pytest.capture.CaptureFixture) -> None: stage = Clone(config) results: ResultMap[GitRepoPath] = ResultMap([]) stage.report_results(results) captured = capsys.readouterr() assert 'Cloned 0 ' in captured.out assert str(config.output_directory) in captured.out
def run(self, galaxy_scrape: ResultMap[GalaxyAPIPage] ) -> ResultMap[GalaxyMetadata]: """Run the stage.""" metadata_map = MetadataMap(list(galaxy_scrape._storage.values())) metadata_map.verify_schema() num_roles = cast(int, galaxy_scrape['roles/1'].response['count']) meta = GalaxyMetadata.from_metamap(metadata_map) return ResultMap([meta])
def run( self, extract_structural_models: ResultMap[MultiStructuralRoleModel] ) -> ResultMap[StructuralRoleEvolution]: """Run the stage.""" models_it: Iterable[ MultiStructuralRoleModel] = extract_structural_models.values() if self.config.progress: models_it = tqdm(models_it, desc='Extract structural diffs') return ResultMap(map(StructuralRoleEvolution.create, models_it))
def test_report(config: CloneConfig, resource: TextIO, capsys: _pytest.capture.CaptureFixture) -> None: stage = Clone(config) data = json.load(resource) results: ResultMap[GitRepoPath] = ResultMap.structure( data, ResultMap[GitRepoPath]) stage.report_results(results) captured = capsys.readouterr() assert 'Cloned 100 ' in captured.out assert str(config.output_directory) in captured.out
def load_from_dataset(self) -> ResultMap[GalaxyMetadata]: """Load the results of a previous run from the dataset. Raises `CacheMiss` when not found in the dataset. """ dataset_dir_path = self.config.output_directory / self.dataset_dir_name try: return ResultMap( [GalaxyMetadata.lazy_load('dummy', dataset_dir_path)]) except: raise CacheMiss()
def run(self) -> ResultMap[GalaxyAPIPage]: """Run the stage.""" all_results: List[GalaxyAPIPage] = [] for name, url in API_URLS.items(): pages = self.load_pages(name, url) all_results.extend(pages) # Might happen that some roles in the role page fail to load because # of 500 Internal Server Error at Galaxy side. Can't fix this. The # role page includes more information though, and we've got both the # role search and the roles themselves. Any roles in the search page # that aren't present in the role pages need to be loaded separately # too. We'll give these incremental page numbers. all_results = self.import_missing_roles(all_results) return ResultMap(all_results)
def test_stage_run(config: CloneConfig, progress: bool, resource: TextIO, mock_clone: None) -> None: config.progress = progress stage = Clone(config) data = json.load(resource) prev: ResultMap[GalaxyRole] = ResultMap.structure(data, ResultMap[GalaxyRole]) paths = stage.run(prev) assert len(paths) == 2 for id_, path in paths.items(): assert not path.path.is_absolute() assert (config.output_directory / path.path).exists() assert path.owner == path.path.parent.name == prev[id_].github_user assert path.name == path.path.name == prev[id_].github_repo assert path.id == id_
def run(self, discover: ResultMap[GalaxyRole]) -> ResultMap[GitRepoPath]: """Run the stage: Clone the repositories.""" repo_paths = set() roles: Iterable[GalaxyRole] = discover.values() if self.config.progress: roles = tqdm(roles, desc='Cloning repos') for role in roles: user, repo = role.github_user, role.github_repo try: path = self.clone(user, repo) except CloneException as exc: # pragma: no cover tqdm.write(f'Failed to clone repository {user}/{repo}: {exc}') continue repo_paths.add( GitRepoPath(role.github_user, role.github_repo, role.id, path)) return ResultMap(repo_paths)
def run(self, extract_role_metadata: ResultMap[GalaxyMetadata]) -> ResultMap[GitRepo]: """Run the stage: Clone the repositories.""" repo_paths = set() repos: Iterable[Repository] = extract_role_metadata['dummy'].repositories.values() if self.config.progress: repos = tqdm(repos, desc='Cloning repos') for repo in repos: try: user, repo_name = self._parse_info(repo) path = self.clone(user, repo_name, repo) except CloneException as exc: # pragma: no cover tqdm.write(f'Failed to clone repository {repo.github_url}: {exc}') continue repo_paths.add(GitRepo( user, repo_name, XrefID(Repository, repo.entity_id), path)) return ResultMap(repo_paths)
def run(self) -> ResultMap[NewData]: return ResultMap([NewData(self.config.option)])
def run(self) -> ResultMap[NewData]: return ResultMap([NewData('append'), NewData('test')])
def run(self) -> ResultMap[MyModel]: return ResultMap(TEST_DATA_RUN)
@attr.s(auto_attribs=True) class MyModel(Model): """Test model for stage result type.""" number: int string: str boolean: bool @property def id(self) -> str: return self.string CACHE_NAME = 'cache.json' TEST_DATA_CACHE = ResultMap( [MyModel(1, 'test', False), MyModel(2, 'test2', True)]) TEST_DATA_RUN = ResultMap( [MyModel(3, 'Ran', False), MyModel(4, 'Ran 2', True)]) CONFIG = MainConfig() # Forward declarations of classes created in fixtures. if TYPE_CHECKING: class MyStage(Stage[MyModel, MainConfig]): has_run: bool has_reported: bool reported_data: ResultMap[MyModel]
def run(self, clone: ResultMap[GitRepo]) -> ResultMap[GitRepoMetadata]: repos: Iterable[GitRepo] = clone.values() if self.config.progress: repos = tqdm(repos, total=len(clone)) return ResultMap(map(self.extract_meta, repos))
def test_len(data: List[Data]) -> None: rm: ResultMap[Data] = ResultMap(data) assert len(rm) == len(data)
def test_get_item() -> None: rm: ResultMap[Data] = ResultMap(DATA) assert rm['test'] == DATA[0] assert rm['tset'] == DATA[1]
def test_get_empty() -> None: rm: ResultMap[Data] = ResultMap([]) with pytest.raises(KeyError): rm['test']
def run(self, first_stage: ResultMap[MyModel]) -> ResultMap[NewData]: transformed = [ NewData(m.string * m.number) for m in first_stage.values() ] return ResultMap(transformed)
def test_structure() -> None: d = {'test': {'id': 'test', 'val': 1}, 'tset': {'id': 'tset', 'val': -1}} rm: ResultMap[Data] = ResultMap.structure(d, ResultMap[Data]) assert rm == ResultMap(DATA)
def test_get_nonexistant() -> None: rm: ResultMap[Data] = ResultMap(DATA) with pytest.raises(KeyError): rm['doesnt_exist']
def run( self, extract_versions: ResultMap[RepoVersions] ) -> ResultMap[AnalyzedRepoVersions]: return ResultMap( AnalyzedRepoVersions(repo_versions) for repo_versions in extract_versions.values())