def load_data( database_file, repo, package_names, skip_defaults, ): metric_parsers = get_metric_parsers_from_args(package_names, skip_defaults) with sqlite3.connect(database_file) as db: metric_mapping = get_metric_mapping(db) repo_parser = RepoParser(repo) with repo_parser.repo_checked_out(): previous_sha = get_previous_sha(db) commits = repo_parser.get_commits(since_sha=previous_sha) # If there is nothing to check gtfo if len(commits) == 1 and previous_sha is not None: return # Maps metric_name to a running value metric_values = collections.defaultdict(int) # Grab the state of our metrics at the last place compare_commit = None if previous_sha is not None: compare_commit = commits[0] metric_values.update( get_metric_values( db, compare_commit.sha, )) commits = commits[1:] mp_args = six.moves.zip( [compare_commit] + commits, commits, itertools.repeat(repo_parser), itertools.repeat(metric_parsers), ) pool = multiprocessing.pool.Pool(15) for commit, metrics in six.moves.zip( commits, pool.imap(_get_metrics_inner, mp_args), ): increment_metric_values(metric_values, metrics) insert_metric_values( db, metric_values, metric_mapping, commit, ) insert_metric_changes(db, metrics, metric_mapping, commit)
def load_data( database_file: str, repo: str, repo_type: str, package_names: List[str], skip_defaults: bool, exclude: Pattern[bytes], jobs: int, ) -> None: metric_parsers = get_metric_parsers_from_args(package_names, skip_defaults) with sqlite3.connect(database_file) as db: metric_mapping = get_metric_mapping(db) has_data = get_metric_has_data(db) if repo_type == 'git': repo_parser = GitRepoParser(repo) elif repo_type == 'hg': repo_parser = HgRepoParser(repo) with repo_parser.repo_checked_out(): previous_sha = get_previous_sha(db) commits = repo_parser.get_commits(since_sha=previous_sha) # If there is nothing to check gtfo if len(commits) == 1 and previous_sha is not None: return # Maps metric_id to a running value metric_values: Counter[int] = collections.Counter() # Grab the state of our metrics at the last place compare_commit = None if previous_sha is not None: compare_commit = commits.pop(0) metric_values.update(get_metric_values(db, compare_commit.sha)) mp_args = zip( [compare_commit, *commits], commits, itertools.repeat(repo_parser), itertools.repeat(metric_parsers), itertools.repeat(exclude), ) with mapper(jobs) as do_map: for commit, metrics in zip( commits, do_map(_get_metrics_inner, mp_args), ): update_has_data(db, metrics, metric_mapping, has_data) increment_metrics(metric_values, metric_mapping, metrics) insert_metric_values(db, metric_values, has_data, commit) insert_metric_changes(db, metrics, metric_mapping, commit)
def load_data( database_file, repo, package_names, skip_defaults, exclude, jobs, ): metric_parsers = get_metric_parsers_from_args(package_names, skip_defaults) with sqlite3.connect(database_file) as db: metric_mapping = get_metric_mapping(db) # type: Dict[str, int] has_data = get_metric_has_data(db) # type: Dict[int, bool] repo_parser = RepoParser(repo) with repo_parser.repo_checked_out(): previous_sha = get_previous_sha(db) commits = repo_parser.get_commits(since_sha=previous_sha) # If there is nothing to check gtfo if len(commits) == 1 and previous_sha is not None: return # Maps metric_id to a running value metric_values = collections.Counter() # type: Counter[int] # Grab the state of our metrics at the last place compare_commit = None if previous_sha is not None: compare_commit = commits.pop(0) metric_values.update(get_metric_values(db, compare_commit.sha)) mp_args = six.moves.zip( [compare_commit] + commits, commits, itertools.repeat(repo_parser), itertools.repeat(metric_parsers), itertools.repeat(exclude), ) do_map = mapper(jobs) for commit, metrics in six.moves.zip( commits, do_map(_get_metrics_inner, mp_args), ): update_has_data(db, metrics, metric_mapping, has_data) increment_metric_values(metric_values, metric_mapping, metrics) insert_metric_values(db, metric_values, has_data, commit) insert_metric_changes(db, metrics, metric_mapping, commit)
def load_data( database_file, repo, package_names, skip_defaults, exclude, jobs, ): metric_parsers = get_metric_parsers_from_args(package_names, skip_defaults) with sqlite3.connect(database_file) as db: metric_mapping = get_metric_mapping(db) has_data = get_metric_has_data(db) repo_parser = RepoParser(repo) with repo_parser.repo_checked_out(): previous_sha = get_previous_sha(db) commits = repo_parser.get_commits(since_sha=previous_sha) # If there is nothing to check gtfo if len(commits) == 1 and previous_sha is not None: return # Maps metric_id to a running value metric_values = collections.Counter() # Grab the state of our metrics at the last place compare_commit = None if previous_sha is not None: compare_commit = commits.pop(0) metric_values.update(get_metric_values(db, compare_commit.sha)) mp_args = six.moves.zip( [compare_commit] + commits, commits, itertools.repeat(repo_parser), itertools.repeat(metric_parsers), itertools.repeat(exclude), ) with mapper(jobs) as do_map: for commit, metrics in six.moves.zip( commits, do_map(_get_metrics_inner, mp_args), ): update_has_data(db, metrics, metric_mapping, has_data) increment_metrics(metric_values, metric_mapping, metrics) insert_metric_values(db, metric_values, has_data, commit) insert_metric_changes(db, metrics, metric_mapping, commit)
def load_data( database_file, repo, package_names, skip_defaults, tempdir_location, ): metric_parsers = get_metric_parsers_from_args(package_names, skip_defaults) with sqlite3.connect(database_file) as db: metric_mapping = get_metric_mapping(db) repo_parser = RepoParser(repo, tempdir_location=tempdir_location) with repo_parser.repo_checked_out(): previous_sha = get_previous_sha(db) commits = repo_parser.get_commits(since_sha=previous_sha) # If there is nothing to check gtfo if len(commits) == 1 and previous_sha is not None: return # Maps metric_name to a running value metric_values = collections.defaultdict(int) # Grab the state of our metrics at the last place compare_commit = None if previous_sha is not None: compare_commit = commits[0] metric_values.update(get_metric_values( db, compare_commit.sha, )) commits = commits[1:] for commit in commits: if compare_commit is None: diff = repo_parser.get_original_commit(commit.sha) else: diff = repo_parser.get_commit_diff( compare_commit.sha, commit.sha, ) metrics = get_metrics(diff, metric_parsers) increment_metric_values(metric_values, metrics) insert_metric_values(db, metric_values, metric_mapping, commit) insert_metric_changes(db, metrics, metric_mapping, commit) compare_commit = commit
def load_data( database_file, repo, package_names, skip_defaults, ): metric_parsers = get_metric_parsers_from_args(package_names, skip_defaults) with sqlite3.connect(database_file) as db: metric_mapping = get_metric_mapping(db) repo_parser = RepoParser(repo) with repo_parser.repo_checked_out(): previous_sha = get_previous_sha(db) commits = repo_parser.get_commits(since_sha=previous_sha) # If there is nothing to check gtfo if len(commits) == 1 and previous_sha is not None: return # Maps metric_name to a running value metric_values = collections.defaultdict(int) # Grab the state of our metrics at the last place compare_commit = None if previous_sha is not None: compare_commit = commits[0] metric_values.update(get_metric_values( db, compare_commit.sha, )) commits = commits[1:] mp_args = six.moves.zip( [compare_commit] + commits, commits, itertools.repeat(repo_parser), itertools.repeat(metric_parsers), ) pool = multiprocessing.pool.Pool(15) for commit, metrics in six.moves.zip( commits, pool.imap(_get_metrics_inner, mp_args), ): increment_metric_values(metric_values, metrics) insert_metric_values( db, metric_values, metric_mapping, commit, ) insert_metric_changes(db, metrics, metric_mapping, commit)
def test_insert_and_get_metric_values(sandbox): with sandbox.db() as db: fake_metrics = get_fake_metrics(get_metric_mapping(db)) fake_commit = get_fake_commit() insert_fake_metrics(db) assert fake_metrics == get_metric_values(db, fake_commit)
def test_insert_and_get_metric_values(sandbox): with sandbox.db() as db: fake_metrics = get_fake_metrics(get_metric_mapping(db)) insert_fake_metrics(db) assert fake_metrics == get_metric_values(db, 'a' * 40)
def test_insert_and_get_metric_values(sandbox): with sandbox.db() as db: fake_metrics = dict.fromkeys(get_metric_mapping(db).values(), 2) insert_fake_metrics(db) assert fake_metrics == get_metric_values(db, 'b' * 40)