Example #1
0
def test_init() -> None:
    my_dict = {d.id: d for d in DATA}

    rm1: ResultMap[Data] = ResultMap(my_dict)
    rm2: ResultMap[Data] = ResultMap(DATA)

    assert rm1 == rm2
Example #2
0
    def run(self) -> ResultMap[GalaxyRole]:
        """Run the stage."""
        galaxy_api = GalaxyAPI()
        it_roles = galaxy_api.search_roles(limit=self.config.count)

        if not self.config.progress:
            return ResultMap(it_roles)
        return ResultMap(tqdm(
                it_roles, desc='Searching roles', total=self.config.count))
Example #3
0
def test_unstructure() -> None:
    rm = ResultMap(DATA)

    d = rm.unstructure()

    assert d == {
        'test': {
            'id': 'test',
            'val': 1
        },
        'tset': {
            'id': 'tset',
            'val': -1
        }
    }
Example #4
0
    def run(
            self, extract_versions: ResultMap[RepoVersions],
            clone: ResultMap[GitRepo]
    ) -> ResultMap[RepoDiffMetrics]:
        results = []
        repos_to_bumps: Dict[GitRepo, List[Tuple[Version, Version]]]
        repos_to_bumps = defaultdict(list)
        for repo_id, repo_versions in extract_versions.items():
            repos_to_bumps[clone[repo_id]].extend(
                    zip(repo_versions.versions, repo_versions.versions[1:]))

        repo_list: Iterable[Tuple[GitRepo, List[Tuple[Version, Version]]]]
        bump_pbar: Optional[tqdm]
        repo_list = [(r, b) for r, b in repos_to_bumps.items() if b]
        if self.config.progress:
            repo_list = tqdm(
                    repo_list, desc='Extract structural diffs',
                    unit=' repos')
            bump_pbar = tqdm(
                    desc='Extract structural diffs', unit=' version pairs',
                    total=sum(len(rb[1]) for rb in repos_to_bumps.items()))
        else:
            bump_pbar = None

        for repo_path, versions in repo_list:
            results.append(self.diff_repo(repo_path, versions, bump_pbar))
        if bump_pbar is not None:
            bump_pbar.close()

        self.export_to_csv(results)
        return ResultMap(results)
Example #5
0
def test_stage_run_empty_input(config: CloneConfig, mock_clone: None) -> None:
    stage = Clone(config)
    prev: ResultMap[GalaxyRole] = ResultMap([])

    paths = stage.run(prev)

    assert not paths
Example #6
0
    def run(self, analyze_versions: ResultMap[AnalyzedRepoVersions],
            clone: ResultMap[GitRepoPath],
            discover: ResultMap[GalaxyRole]) -> ResultMap[RepoVersionDiffs]:
        results = []
        repos_to_versions: Iterable[AnalyzedRepoVersions] = (
            analyze_versions.values())
        if self.config.progress:
            repos_to_versions = tqdm(repos_to_versions,
                                     desc='Extract commits between versions',
                                     unit=' repos')

        # TODO(ROpdebee): Would be nice if we could speed this up a bit,
        #                 currently takes 1-2s per repository. Likely due to
        #                 the diffing, haven't profiled yet though. Caching
        #                 should already massively help on consecutive runs
        #                 though.
        for repo_versions in repos_to_versions:
            diff = RepoVersionDiffs.create(
                repo_versions,
                git.Repo(self.config.output_directory /
                         clone[repo_versions.id].path))
            results.append(diff)

        self.export_to_csv(results, clone, discover)
        return ResultMap(results)
Example #7
0
def test_iter(data: List[Data]) -> None:
    rm: ResultMap[Data] = ResultMap(data)

    keys = set()
    for k in rm:
        keys.add(k)

    assert keys == {d.id for d in data}
Example #8
0
        def run(self, first_stage: ResultMap[MyModel],
                second_stage: ResultMap[NewData]) -> ResultMap[NewData]:
            transformed = []
            for m in first_stage.values():
                for n in second_stage.values():
                    transformed.append(NewData(m.string + ' ' + n.id))

            return ResultMap(transformed)
Example #9
0
    def run(
            self,
            extract_role_metadata: ResultMap[GalaxyMetadata],
            extract_git_metadata: ResultMap[GitRepoMetadata],
            clone: ResultMap[GitRepo]
    ) -> ResultMap[MultiStructuralRoleModel]:
        """Run the stage."""
        role_repos = self.get_role_repositories(extract_role_metadata, clone, extract_git_metadata)
        num_revs = sum(len(revs) for (_, _, revs) in role_repos)
        if not self.config.commits:
            num_revs += len(role_repos)

        task_list: Iterable[Tuple[GitRepo, str, List[Tuple[str, str]]]]
        rev_pbar: Optional[tqdm]
        if self.config.progress:
            task_list = tqdm(
                    role_repos, desc='Extract structural models',
                    unit=' repos')
            rev_pbar = tqdm(
                    desc='Extract structural models', unit=' revs',
                    total=num_revs)
        else:
            task_list = role_repos
            rev_pbar = None

        results: List[MultiStructuralRoleModel] = []
        failures = 0
        for repo, role_name, revs in task_list:
            git_repo_obj = git.Repo(self.config.output_directory / 'Repositories' / repo.path)
            save_branch = git_repo_obj.active_branch
            role_models = []
            try:
                for sha1, rev in revs:
                    model = self.extract(git_repo_obj, role_name, sha1, rev, rev_pbar)
                    if model is None:
                        failures += 1
                    else:
                        role_models.append(model)

                # Also extract for the latest commit if we're extracting tags.
                if not self.config.commits:
                    save_branch.checkout(force=True)
                    model = self.extract(git_repo_obj, role_name, 'HEAD', 'HEAD', rev_pbar)
                    if model is None:
                        failures += 1
                    else:
                        role_models.append(model)
            finally:
                # Make sure to reset the repo to the HEAD from before
                save_branch.checkout(force=True)
            results.append(MultiStructuralRoleModel(role_name, role_models))
        if rev_pbar is not None:
            rev_pbar.close()

        print(f'{failures} roles failed to load')

        return ResultMap(results)
Example #10
0
def test_report_empty(config: CloneConfig,
                      capsys: _pytest.capture.CaptureFixture) -> None:
    stage = Clone(config)
    results: ResultMap[GitRepoPath] = ResultMap([])

    stage.report_results(results)

    captured = capsys.readouterr()
    assert 'Cloned 0 ' in captured.out
    assert str(config.output_directory) in captured.out
Example #11
0
    def run(self, galaxy_scrape: ResultMap[GalaxyAPIPage]
            ) -> ResultMap[GalaxyMetadata]:
        """Run the stage."""
        metadata_map = MetadataMap(list(galaxy_scrape._storage.values()))
        metadata_map.verify_schema()

        num_roles = cast(int, galaxy_scrape['roles/1'].response['count'])

        meta = GalaxyMetadata.from_metamap(metadata_map)

        return ResultMap([meta])
    def run(
        self, extract_structural_models: ResultMap[MultiStructuralRoleModel]
    ) -> ResultMap[StructuralRoleEvolution]:
        """Run the stage."""
        models_it: Iterable[
            MultiStructuralRoleModel] = extract_structural_models.values()

        if self.config.progress:
            models_it = tqdm(models_it, desc='Extract structural diffs')

        return ResultMap(map(StructuralRoleEvolution.create, models_it))
Example #13
0
def test_report(config: CloneConfig, resource: TextIO,
                capsys: _pytest.capture.CaptureFixture) -> None:
    stage = Clone(config)
    data = json.load(resource)
    results: ResultMap[GitRepoPath] = ResultMap.structure(
        data, ResultMap[GitRepoPath])

    stage.report_results(results)

    captured = capsys.readouterr()
    assert 'Cloned 100 ' in captured.out
    assert str(config.output_directory) in captured.out
Example #14
0
    def load_from_dataset(self) -> ResultMap[GalaxyMetadata]:
        """Load the results of a previous run from the dataset.

        Raises `CacheMiss` when not found in the dataset.
        """
        dataset_dir_path = self.config.output_directory / self.dataset_dir_name

        try:
            return ResultMap(
                [GalaxyMetadata.lazy_load('dummy', dataset_dir_path)])
        except:
            raise CacheMiss()
Example #15
0
    def run(self) -> ResultMap[GalaxyAPIPage]:
        """Run the stage."""
        all_results: List[GalaxyAPIPage] = []
        for name, url in API_URLS.items():
            pages = self.load_pages(name, url)
            all_results.extend(pages)

        # Might happen that some roles in the role page fail to load because
        # of 500 Internal Server Error at Galaxy side. Can't fix this. The
        # role page includes more information though, and we've got both the
        # role search and the roles themselves. Any roles in the search page
        # that aren't present in the role pages need to be loaded separately
        # too. We'll give these incremental page numbers.
        all_results = self.import_missing_roles(all_results)
        return ResultMap(all_results)
Example #16
0
def test_stage_run(config: CloneConfig, progress: bool, resource: TextIO,
                   mock_clone: None) -> None:
    config.progress = progress
    stage = Clone(config)
    data = json.load(resource)
    prev: ResultMap[GalaxyRole] = ResultMap.structure(data,
                                                      ResultMap[GalaxyRole])

    paths = stage.run(prev)

    assert len(paths) == 2
    for id_, path in paths.items():
        assert not path.path.is_absolute()
        assert (config.output_directory / path.path).exists()
        assert path.owner == path.path.parent.name == prev[id_].github_user
        assert path.name == path.path.name == prev[id_].github_repo
        assert path.id == id_
Example #17
0
    def run(self, discover: ResultMap[GalaxyRole]) -> ResultMap[GitRepoPath]:
        """Run the stage: Clone the repositories."""
        repo_paths = set()
        roles: Iterable[GalaxyRole] = discover.values()
        if self.config.progress:
            roles = tqdm(roles, desc='Cloning repos')

        for role in roles:
            user, repo = role.github_user, role.github_repo
            try:
                path = self.clone(user, repo)
            except CloneException as exc:  # pragma: no cover
                tqdm.write(f'Failed to clone repository {user}/{repo}: {exc}')
                continue

            repo_paths.add(
                GitRepoPath(role.github_user, role.github_repo, role.id, path))
        return ResultMap(repo_paths)
Example #18
0
    def run(self, extract_role_metadata: ResultMap[GalaxyMetadata]) -> ResultMap[GitRepo]:
        """Run the stage: Clone the repositories."""
        repo_paths = set()
        repos: Iterable[Repository] = extract_role_metadata['dummy'].repositories.values()
        if self.config.progress:
            repos = tqdm(repos, desc='Cloning repos')

        for repo in repos:
            try:
                user, repo_name = self._parse_info(repo)
                path = self.clone(user, repo_name, repo)
            except CloneException as exc:  # pragma: no cover
                tqdm.write(f'Failed to clone repository {repo.github_url}: {exc}')
                continue

            repo_paths.add(GitRepo(
                    user, repo_name, XrefID(Repository, repo.entity_id), path))
        return ResultMap(repo_paths)
Example #19
0
 def run(self) -> ResultMap[NewData]:
     return ResultMap([NewData(self.config.option)])
Example #20
0
 def run(self) -> ResultMap[NewData]:
     return ResultMap([NewData('append'), NewData('test')])
Example #21
0
 def run(self) -> ResultMap[MyModel]:
     return ResultMap(TEST_DATA_RUN)
Example #22
0
@attr.s(auto_attribs=True)
class MyModel(Model):
    """Test model for stage result type."""
    number: int
    string: str
    boolean: bool

    @property
    def id(self) -> str:
        return self.string


CACHE_NAME = 'cache.json'
TEST_DATA_CACHE = ResultMap(
    [MyModel(1, 'test', False),
     MyModel(2, 'test2', True)])
TEST_DATA_RUN = ResultMap(
    [MyModel(3, 'Ran', False),
     MyModel(4, 'Ran 2', True)])
CONFIG = MainConfig()

# Forward declarations of classes created in fixtures.
if TYPE_CHECKING:

    class MyStage(Stage[MyModel, MainConfig]):
        has_run: bool
        has_reported: bool
        reported_data: ResultMap[MyModel]

Example #23
0
    def run(self, clone: ResultMap[GitRepo]) -> ResultMap[GitRepoMetadata]:
        repos: Iterable[GitRepo] = clone.values()
        if self.config.progress:
            repos = tqdm(repos, total=len(clone))

        return ResultMap(map(self.extract_meta, repos))
Example #24
0
def test_len(data: List[Data]) -> None:
    rm: ResultMap[Data] = ResultMap(data)

    assert len(rm) == len(data)
Example #25
0
def test_get_item() -> None:
    rm: ResultMap[Data] = ResultMap(DATA)

    assert rm['test'] == DATA[0]
    assert rm['tset'] == DATA[1]
Example #26
0
def test_get_empty() -> None:
    rm: ResultMap[Data] = ResultMap([])

    with pytest.raises(KeyError):
        rm['test']
Example #27
0
 def run(self, first_stage: ResultMap[MyModel]) -> ResultMap[NewData]:
     transformed = [
         NewData(m.string * m.number) for m in first_stage.values()
     ]
     return ResultMap(transformed)
Example #28
0
def test_structure() -> None:
    d = {'test': {'id': 'test', 'val': 1}, 'tset': {'id': 'tset', 'val': -1}}

    rm: ResultMap[Data] = ResultMap.structure(d, ResultMap[Data])

    assert rm == ResultMap(DATA)
Example #29
0
def test_get_nonexistant() -> None:
    rm: ResultMap[Data] = ResultMap(DATA)

    with pytest.raises(KeyError):
        rm['doesnt_exist']
Example #30
0
 def run(
     self, extract_versions: ResultMap[RepoVersions]
 ) -> ResultMap[AnalyzedRepoVersions]:
     return ResultMap(
         AnalyzedRepoVersions(repo_versions)
         for repo_versions in extract_versions.values())