Exemple #1
0
def load_data(
    database_file,
    repo,
    package_names,
    skip_defaults,
):
    metric_parsers = get_metric_parsers_from_args(package_names, skip_defaults)

    with sqlite3.connect(database_file) as db:
        metric_mapping = get_metric_mapping(db)

        repo_parser = RepoParser(repo)

        with repo_parser.repo_checked_out():
            previous_sha = get_previous_sha(db)
            commits = repo_parser.get_commits(since_sha=previous_sha)

            # If there is nothing to check gtfo
            if len(commits) == 1 and previous_sha is not None:
                return

            # Maps metric_name to a running value
            metric_values = collections.defaultdict(int)

            # Grab the state of our metrics at the last place
            compare_commit = None
            if previous_sha is not None:
                compare_commit = commits[0]
                metric_values.update(
                    get_metric_values(
                        db,
                        compare_commit.sha,
                    ))
                commits = commits[1:]

            mp_args = six.moves.zip(
                [compare_commit] + commits,
                commits,
                itertools.repeat(repo_parser),
                itertools.repeat(metric_parsers),
            )
            pool = multiprocessing.pool.Pool(15)
            for commit, metrics in six.moves.zip(
                    commits,
                    pool.imap(_get_metrics_inner, mp_args),
            ):
                increment_metric_values(metric_values, metrics)
                insert_metric_values(
                    db,
                    metric_values,
                    metric_mapping,
                    commit,
                )
                insert_metric_changes(db, metrics, metric_mapping, commit)
Exemple #2
0
def load_data(
    database_file: str,
    repo: str,
    repo_type: str,
    package_names: List[str],
    skip_defaults: bool,
    exclude: Pattern[bytes],
    jobs: int,
) -> None:
    metric_parsers = get_metric_parsers_from_args(package_names, skip_defaults)

    with sqlite3.connect(database_file) as db:
        metric_mapping = get_metric_mapping(db)
        has_data = get_metric_has_data(db)

        if repo_type == 'git':
            repo_parser = GitRepoParser(repo)
        elif repo_type == 'hg':
            repo_parser = HgRepoParser(repo)

        with repo_parser.repo_checked_out():
            previous_sha = get_previous_sha(db)
            commits = repo_parser.get_commits(since_sha=previous_sha)

            # If there is nothing to check gtfo
            if len(commits) == 1 and previous_sha is not None:
                return

            # Maps metric_id to a running value
            metric_values: Counter[int] = collections.Counter()

            # Grab the state of our metrics at the last place
            compare_commit = None
            if previous_sha is not None:
                compare_commit = commits.pop(0)
                metric_values.update(get_metric_values(db, compare_commit.sha))

            mp_args = zip(
                [compare_commit, *commits],
                commits,
                itertools.repeat(repo_parser),
                itertools.repeat(metric_parsers),
                itertools.repeat(exclude),
            )
            with mapper(jobs) as do_map:
                for commit, metrics in zip(
                        commits,
                        do_map(_get_metrics_inner, mp_args),
                ):
                    update_has_data(db, metrics, metric_mapping, has_data)
                    increment_metrics(metric_values, metric_mapping, metrics)
                    insert_metric_values(db, metric_values, has_data, commit)
                    insert_metric_changes(db, metrics, metric_mapping, commit)
Exemple #3
0
def load_data(
    database_file,
    repo,
    package_names,
    skip_defaults,
    exclude,
    jobs,
):
    metric_parsers = get_metric_parsers_from_args(package_names, skip_defaults)

    with sqlite3.connect(database_file) as db:
        metric_mapping = get_metric_mapping(db)  # type: Dict[str, int]
        has_data = get_metric_has_data(db)  # type: Dict[int, bool]

        repo_parser = RepoParser(repo)

        with repo_parser.repo_checked_out():
            previous_sha = get_previous_sha(db)
            commits = repo_parser.get_commits(since_sha=previous_sha)

            # If there is nothing to check gtfo
            if len(commits) == 1 and previous_sha is not None:
                return

            # Maps metric_id to a running value
            metric_values = collections.Counter()  # type: Counter[int]

            # Grab the state of our metrics at the last place
            compare_commit = None
            if previous_sha is not None:
                compare_commit = commits.pop(0)
                metric_values.update(get_metric_values(db, compare_commit.sha))

            mp_args = six.moves.zip(
                [compare_commit] + commits,
                commits,
                itertools.repeat(repo_parser),
                itertools.repeat(metric_parsers),
                itertools.repeat(exclude),
            )
            do_map = mapper(jobs)
            for commit, metrics in six.moves.zip(
                    commits,
                    do_map(_get_metrics_inner, mp_args),
            ):
                update_has_data(db, metrics, metric_mapping, has_data)
                increment_metric_values(metric_values, metric_mapping, metrics)
                insert_metric_values(db, metric_values, has_data, commit)
                insert_metric_changes(db, metrics, metric_mapping, commit)
Exemple #4
0
def load_data(
        database_file,
        repo,
        package_names,
        skip_defaults,
        exclude,
        jobs,
):
    metric_parsers = get_metric_parsers_from_args(package_names, skip_defaults)

    with sqlite3.connect(database_file) as db:
        metric_mapping = get_metric_mapping(db)
        has_data = get_metric_has_data(db)

        repo_parser = RepoParser(repo)

        with repo_parser.repo_checked_out():
            previous_sha = get_previous_sha(db)
            commits = repo_parser.get_commits(since_sha=previous_sha)

            # If there is nothing to check gtfo
            if len(commits) == 1 and previous_sha is not None:
                return

            # Maps metric_id to a running value
            metric_values = collections.Counter()

            # Grab the state of our metrics at the last place
            compare_commit = None
            if previous_sha is not None:
                compare_commit = commits.pop(0)
                metric_values.update(get_metric_values(db, compare_commit.sha))

            mp_args = six.moves.zip(
                [compare_commit] + commits,
                commits,
                itertools.repeat(repo_parser),
                itertools.repeat(metric_parsers),
                itertools.repeat(exclude),
            )
            with mapper(jobs) as do_map:
                for commit, metrics in six.moves.zip(
                        commits, do_map(_get_metrics_inner, mp_args),
                ):
                    update_has_data(db, metrics, metric_mapping, has_data)
                    increment_metrics(metric_values, metric_mapping, metrics)
                    insert_metric_values(db, metric_values, has_data, commit)
                    insert_metric_changes(db, metrics, metric_mapping, commit)
def load_data(
        database_file,
        repo,
        package_names,
        skip_defaults,
        tempdir_location,
):
    metric_parsers = get_metric_parsers_from_args(package_names, skip_defaults)

    with sqlite3.connect(database_file) as db:
        metric_mapping = get_metric_mapping(db)

        repo_parser = RepoParser(repo, tempdir_location=tempdir_location)

        with repo_parser.repo_checked_out():
            previous_sha = get_previous_sha(db)
            commits = repo_parser.get_commits(since_sha=previous_sha)

            # If there is nothing to check gtfo
            if len(commits) == 1 and previous_sha is not None:
                return

            # Maps metric_name to a running value
            metric_values = collections.defaultdict(int)

            # Grab the state of our metrics at the last place
            compare_commit = None
            if previous_sha is not None:
                compare_commit = commits[0]
                metric_values.update(get_metric_values(
                    db, compare_commit.sha,
                ))
                commits = commits[1:]

            for commit in commits:
                if compare_commit is None:
                    diff = repo_parser.get_original_commit(commit.sha)
                else:
                    diff = repo_parser.get_commit_diff(
                        compare_commit.sha, commit.sha,
                    )

                metrics = get_metrics(diff, metric_parsers)
                increment_metric_values(metric_values, metrics)
                insert_metric_values(db, metric_values, metric_mapping, commit)
                insert_metric_changes(db, metrics, metric_mapping, commit)

                compare_commit = commit
Exemple #6
0
def load_data(
        database_file,
        repo,
        package_names,
        skip_defaults,
):
    metric_parsers = get_metric_parsers_from_args(package_names, skip_defaults)

    with sqlite3.connect(database_file) as db:
        metric_mapping = get_metric_mapping(db)

        repo_parser = RepoParser(repo)

        with repo_parser.repo_checked_out():
            previous_sha = get_previous_sha(db)
            commits = repo_parser.get_commits(since_sha=previous_sha)

            # If there is nothing to check gtfo
            if len(commits) == 1 and previous_sha is not None:
                return

            # Maps metric_name to a running value
            metric_values = collections.defaultdict(int)

            # Grab the state of our metrics at the last place
            compare_commit = None
            if previous_sha is not None:
                compare_commit = commits[0]
                metric_values.update(get_metric_values(
                    db, compare_commit.sha,
                ))
                commits = commits[1:]

            mp_args = six.moves.zip(
                [compare_commit] + commits,
                commits,
                itertools.repeat(repo_parser),
                itertools.repeat(metric_parsers),
            )
            pool = multiprocessing.pool.Pool(15)
            for commit, metrics in six.moves.zip(
                    commits, pool.imap(_get_metrics_inner, mp_args),
            ):
                increment_metric_values(metric_values, metrics)
                insert_metric_values(
                    db, metric_values, metric_mapping, commit,
                )
                insert_metric_changes(db, metrics, metric_mapping, commit)
Exemple #7
0
def load_data(
        database_file,
        repo,
        package_names,
        skip_defaults,
        tempdir_location,
):
    metric_parsers = get_metric_parsers_from_args(package_names, skip_defaults)

    with sqlite3.connect(database_file) as db:
        metric_mapping = get_metric_mapping(db)

        repo_parser = RepoParser(repo, tempdir_location=tempdir_location)

        with repo_parser.repo_checked_out():
            previous_sha = get_previous_sha(db)
            commits = repo_parser.get_commits(since_sha=previous_sha)

            # If there is nothing to check gtfo
            if len(commits) == 1 and previous_sha is not None:
                return

            # Maps metric_name to a running value
            metric_values = collections.defaultdict(int)

            # Grab the state of our metrics at the last place
            compare_commit = None
            if previous_sha is not None:
                compare_commit = commits[0]
                metric_values.update(get_metric_values(
                    db, compare_commit.sha,
                ))
                commits = commits[1:]

            for commit in commits:
                if compare_commit is None:
                    diff = repo_parser.get_original_commit(commit.sha)
                else:
                    diff = repo_parser.get_commit_diff(
                        compare_commit.sha, commit.sha,
                    )

                metrics = get_metrics(diff, metric_parsers)
                increment_metric_values(metric_values, metrics)
                insert_metric_values(db, metric_values, metric_mapping, commit)
                insert_metric_changes(db, metrics, metric_mapping, commit)

                compare_commit = commit
Exemple #8
0
def test_insert_and_get_metric_values(sandbox):
    with sandbox.db() as db:
        fake_metrics = get_fake_metrics(get_metric_mapping(db))
        fake_commit = get_fake_commit()
        insert_fake_metrics(db)
        assert fake_metrics == get_metric_values(db, fake_commit)
Exemple #9
0
def test_insert_and_get_metric_values(sandbox):
    with sandbox.db() as db:
        fake_metrics = get_fake_metrics(get_metric_mapping(db))
        insert_fake_metrics(db)
        assert fake_metrics == get_metric_values(db, 'a' * 40)
Exemple #10
0
def test_insert_and_get_metric_values(sandbox):
    with sandbox.db() as db:
        fake_metrics = dict.fromkeys(get_metric_mapping(db).values(), 2)
        insert_fake_metrics(db)
        assert fake_metrics == get_metric_values(db, 'b' * 40)