Exemple #1
0
def test_fixes():
    repos = get_repos()
    for project in repos:
        dir = path.join('data', project, 'test', 'test_fixes.json')
        fixes = load_data(dir)
        test_antikeywords(project, fixes)
        test_small_bugs(project, fixes)
Exemple #2
0
def combine():
    T = time()
    combines = dict()
    for repo in get_repos():
        dir = os.path.join('data', repo, 'authors.json')
        authors = load_data(dir)
        for author in authors.keys():
            if combines.get(author) is None:
                combines.update({author: authors.get(author)})
            else:
                for date in authors.get(author).keys():
                    if combines.get(author).get(date) is None:
                        combines.get(author).update({date: authors.get(author).get(date)})
                    else:
                        combines.get(author).get(date)['commits'].extend(authors.get(author).get(date)['commits'])
                        combines.get(author).get(date)['changes'][0] += authors.get(author).get(date)['changes'][0] # buggy
                        combines.get(author).get(date)['changes'][1] += authors.get(author).get(date)['changes'][1] # good
                        combines.get(author).get(date)['changes'][2] += authors.get(author).get(date)['changes'][2] # inserted
                        # ratio
                        if combines.get(author).get(date)['changes'][2] == 0:
                            combines.get(author).get(date)['changes'][3] = 0
                        else:
                            combines.get(author).get(date)['changes'][3] = combines.get(author).get(date)['changes'][0] / combines.get(author).get(date)['changes'][1]
    log_debug(None, 'Combined authors in {0[0]}s. There are {0[1]} authors.', (time()-T, len(combines)))
    return combines
Exemple #3
0
def get_fixes_stats():
    no_deletes = 0
    total = 0
    total_commits = 0
    no_deletes_commits = 0
    for repo in get_repos():
        file = path.join('data', repo, 'test', 'test_buggy_fix_diffs.json')
        differences = load_data(file)
        for commit in differences.values():
            total_commits += 1
            nd = True  # no delete
            for files in commit.values():
                for file in files:
                    if len(file['deleted']) == 0:
                        no_deletes += 1
                    total += 1
                    if len(file['deleted']) > 0:
                        nd = False
                        continue
            if nd:
                no_deletes_commits += 1
        log_info(
            repo,
            'Number of fixed files with no deleted lines is {0[0]} out of {0[1]}',
            (no_deletes, total))
        log_info(
            repo,
            'Number of fixes with no deleted lines is {0[0]} out of {0[1]}',
            (no_deletes_commits, total_commits))
def test_dates():
    for project in get_repos():
        directory = path.join('data', project)
        commits = load_commits(directory)
        authors = load_authors(project)
        errors = list()
        for commit in commits:
            author = authors.get(commit['author_email'])
            if author is None:
                continue
            d = datetime.utcfromtimestamp(int(commit['time']))
            date = author.get(d.strftime('%Y/%m/%d'))
            if date is None:
                if 2018 > d.year >= 2014:
                    log_info(
                        project,
                        'No such date: {0[0]} for {0[1]}. The commit timestamp: {0[2]}',
                        (d, commit['author_email'], commit['time']))
                    errors.append(commit['commit'])
                continue
            if commit['commit'] not in [
                    list(x.keys())[0] for x in date.get('commits')
            ]:
                errors.append(commit['commit'])
                log_info(project, 'No commit{0[0]} for {0[1]}',
                         (commit['commit'], commit['time']))
        if len(errors) == 0:
            log_info(project, 'OK -- All dates correct', None)
Exemple #5
0
def overview():
    """
    Creates overview of all repos with data: repo, min line num, max line ratio, max daily line ratio,
    commit min, commit ratio max, devs num, devs with ratio of exactly 0.
    """
    repos = get_repos()
    overview = dict()
    for repo in repos:
        ratios_dir = os.path.join('data', repo, 'authors_line_ratio.json')
        commit_dir = os.path.join('data', repo, 'authors_commits_ratio.json')
        authors = load_data(ratios_dir)
        commits = load_data(commit_dir)
        data = [1000, 0, 0, 10, 0, 0, 0]
        for author in authors:
            if data[0] > author['commits']['sum']:
                data[0] = author['commits']['sum']
            if data[1] < author['commits']['ratio']:
                data[1] = author['commits']['ratio']
            if data[2] < author['commits']['daily_ratio']:
                data[2] = author['commits']['daily_ratio']
            c = [(x['commits']['buggy'], x['commits']['good']) for x in commits
                 if x['dev'] == author['dev']]
            if (c[0][0] + c[0][1]) < data[3]:
                data[3] = (c[0][0] + c[0][1])
            if c[0][1] == 0:
                cr = 1
            else:
                cr = c[0][0] / c[0][1]
            if cr > data[4]:
                data[4] = cr
            data[5] += 1
            if author['commits']['ratio'] == 0:
                data[6] += 1
        log_debug(repo, data, None)
        overview.update({repo: data})
Exemple #6
0
def test_buggy():
    repos = get_repos()
    for project in repos:
        test_buggy_annotated(project)
        test_annotated_sums(project)
        test_annotated_negatives(project)
        test_buggy_duplicates(project)
        test_buggy_lines(project)
def test_changes_sum():
    for project in get_repos():
        authors = load_authors(project)
        errors = list()
        for author, dates in authors.items():
            for date, d in dates.items():
                if d.get('changes')[0] + d.get('changes')[1] != d.get(
                        'changes')[2]:
                    errors.append((author, date))
        report_results(errors, project, 'test_changes_sum')
def test_changes_inserted():
    for project in get_repos():
        authors = load_authors(project)
        errors = list()
        for author, dates in authors.items():
            for date, d in dates.items():
                inserted = 0
                for c in d.get('commits'):
                    for sha, files in c.items():
                        for file in files:
                            inserted += len(file.get('inserted'))
                if inserted != d.get('changes')[2]:
                    errors.append((author, list(c.keys())[0]))
        report_results(errors, project, 'test_changes_inserted')
Exemple #9
0
def devs_table():
    """
    Create a csv file summing up the data of each dev for each repo separately - dev email, total commits, total lines, ratio, daily ratio, commit ratio
    :return: None
    """
    repos = get_repos()
    # todo: should we do it one for all repos?
    for repo in repos:
        data = list()
        data.append([
            'Author', 'Line Ratio', 'Daily Line Ratio', 'Lines',
            'Commit Ratio', 'Commits'
        ])
        ratios_dir = os.path.join('data', repo, 'authors_line_ratio.json')
        commit_dir = os.path.join('data', repo, 'authors_commits_ratio.json')
        authors = load_data(ratios_dir)
        commits = load_data(commit_dir)
        for author in authors:
            d = [
                author['dev'], author['commits']['ratio'],
                author['commits']['daily_ratio'], author['commits']['sum']
            ]
            c = [(x['commits']['buggy'], x['commits']['good']) for x in commits
                 if x['dev'] == author['dev']]
            if c[0][1] == 0:
                d.append(1)
            else:
                d.append(c[0][0] / c[0][1])
            d.append(c[0][0] + c[0][1])
            data.append(d)
        save_in_file([], 'authors.csv', os.path.join('data', repo))
        myFile = open(os.path.join('data', repo, 'authors.csv'), 'w')
        with myFile:
            writer = csv.writer(myFile)
            writer.writerows(data)
        log_debug(repo, 'done', None)
Exemple #10
0
def get_authors():
    for repo in get_repos():
        prep_data(repo)
Exemple #11
0
def run_fixes():
    for repo in get_repos():
        directory = os.path.join('data', repo)
        extract_fixes(directory, repo)
Exemple #12
0
def get_data():
    for repo in get_repos():
        repo_dir = os.path.join('repos', repo)
        directory = os.path.join('data', repo)
        save_commits(repo, repo_dir, directory)
        save_bugs(repo)
Exemple #13
0
def load_all():
    for project in get_repos():
        authors = load_authors(project)
        for author, data in authors.items():
            all_authors.append(author)
            all.append({author: data})