Ejemplo n.º 1
0
def static_analysis_by_folder(folders: list, date: str) -> dict:
    """return the statistic analysis result for target folders in one day

    Arguments:
        folders {str} -- the target folders to be analysed
        date {str}

    Returns:
        dict -- the analysis result
    """
    dataFile1 = 'files_detail_' + date + '.csv'
    dataFile2 = 'methods_detail_' + date + '.csv'

    total_complex_ave = 0
    total_complex_max = 0
    total_lines = 0
    total_files = 0
    total_range = {'1~10': 0, '11~20': 0, '21~50': 0, '>50': 0}

    for idx in range(len(folders)):
        folders[idx] = folders[idx].replace('/', '\\')

    with open(dataFile1, 'r') as fin:
        with open(dataFile2, 'r') as fin2:
            with open('config.json', 'r') as config_file:
                config = json.loads(config_file.read())
                data = csv.DictReader(fin)
                data2 = csv.DictReader(fin2)
                for i in data:
                    flag = False
                    for folder in folders:
                        if i['File Name'].startswith(folder):
                            flag = True
                            break
                    flag = flag and is_keep(
                        'Source/media/media_driver/' + i['File Name'], config)
                    if flag:
                        total_files += 1
                        lines_i = int(i['Lines'])
                        total_lines += lines_i
                        complex_ave_i = float(i['Average Complexity*'])
                        complex_max_i = int(i['Maximum Complexity*'])
                        total_complex_ave += complex_ave_i
                        if complex_max_i > total_complex_max:
                            total_complex_max = complex_max_i

                for i in data2:
                    flag = False
                    for folder in folders:
                        if i['File Name'].startswith(folder):
                            flag = True
                            break
                    flag = flag and is_keep(
                        'Source/media/media_driver/' + i['File Name'], config)
                    if flag:
                        complex_i = int(i['Complexity'])
                        if complex_i > 50:
                            total_range['>50'] += 1
                        elif complex_i > 20:
                            total_range['21~50'] += 1
                        elif complex_i > 10:
                            total_range['11~20'] += 1
                        else:
                            total_range['1~10'] += 1

    result = {
        'LOC': total_lines,
        'Average Complexity': round(total_complex_ave / total_files, 2),
        'Max Complexity': total_complex_max,
        '1~10': total_range['1~10'],
        '11~20': total_range['11~20'],
        '21~50': total_range['21~50'],
        '>50': total_range['>50']
    }
    return result
Ejemplo n.º 2
0
    def analyze_code_churn_by_period(self, period: str, date_range: list,
                                     category_list: list, classify_func,
                                     tmp_folders: list) -> dict:
        """return code churn analysis by period, in date_range, classfied by category_list

        Args:
            period (str): 'daily','weekly','monthly'
            date_range (list): [start,end] yyyy-MM-DD, yyyy-WW, yyyy-MM for daily,weekly,monthly respectively
            category_list (list): all the components in the category
            classify_func ([function]): the function to classify each component

        Returns:
            dict:  {component1:{date:{add,delete,files},...},
                    component2:{date:{add,delete,files},...},...}
        """

        after, before = get_start_end(period, date_range)
        total_file_set = FileSet()  # record all files latest name
        dict_fileset = {}  # {date:set(file1,file2),...}
        d = {}  # return result
        config = self.get_tmp_config(tmp_folders)
        is_classify = True
        is_subfolder = False
        if classify_func == 'subfolder':
            is_subfolder = True
        else:
            if len(category_list) == 0:
                category_list = ['All']
                is_classify = False

            for category in category_list:
                d[category] = {}
        commits = self.commit.find({
            "commitDate": {
                "$gte": after
            },
        }, {
            '_id': False,
            'files': True,
            'regressions': True,
            'commitDate': True
        }).sort('commitDate', DESCENDING)
        for commit in commits:
            commit_date = commit['commitDate']
            files = commit['files']
            if commit_date > before:
                for f in files:
                    filename = f['filename']
                    self.update_fileset(total_file_set, filename)
                continue
            commit_comps = set()
            date_param = get_date_param(period, commit_date)
            if date_param not in dict_fileset:
                dict_fileset[date_param] = set()

            if not is_subfolder:
                for category in category_list:
                    if date_param not in d[category]:
                        d[category][date_param] = {
                            'loc': 0,
                            'files': 0,
                            'commit': 0,
                            'regression': 0
                        }

            for f in files:
                filename = f['filename']
                add = f['add']
                delete = f['delete']
                filename = self.update_fileset(total_file_set, filename)
                # filename not satisfy config whitelist or not changed
                if not is_keep(filename, config) or (add == 0 and delete == 0):
                    continue
                is_add_regression = 0
                if is_subfolder:
                    comp = get_folder(filename)
                    if comp not in d:
                        d[comp] = {}
                    if date_param not in d[comp]:
                        d[comp][date_param] = {
                            'loc': 0,
                            'files': 0,
                            'commit': 0,
                            'regression': 0
                        }

                elif is_classify:
                    comp = classify_func(filename)
                else:
                    comp = 'All'

                if filename in dict_fileset[date_param]:
                    is_add_file = 0
                else:
                    dict_fileset[date_param].add(filename)
                    is_add_file = 1
                if comp in commit_comps:
                    is_add_comp = 0
                else:
                    commit_comps.add(comp)
                    is_add_comp = 1
                    if 'regressions' in commit:
                        is_add_regression = 1

                d[comp][date_param]['loc'] += add + delete
                d[comp][date_param]['files'] += is_add_file
                d[comp][date_param]['commit'] += is_add_comp
                d[comp][date_param]['regression'] += is_add_regression

        return d
Ejemplo n.º 3
0
    def analyze_group_total_code_churn(self, date_range: list,
                                       groups: list) -> dict:
        """return group code churn analysis in date_range, classfied by category_list from give groups

        Args:
            date_range (list): [dayStart, dayEnd] yyyy-MM-DD inclusive
            groups (list): nested arrays indicating groups [[folder1,folder2],[folder3],[folder4,folder5]...]


        Returns:
            dict: {group1:count1, group1:count2, ...}
        """
        after, before = date_range
        total_file_set = FileSet()  # record all files latest name
        file_sets = []
        d = {}
        configs = []
        for group in groups:
            configs.append(self.get_tmp_config(group))
            file_sets.append(set())
        for index in range(len(groups)):
            d['Group' + str(index + 1)] = {
                'loc': 0,
                'files': 0,
                'commit': 0,
                'regression': 0
            }
        commits = self.commit.find({
            "commitDate": {
                "$gte": after
            },
        }, {
            '_id': False,
            'files': True,
            'regressions': True,
            'commitDate': True
        }).sort('commitDate', DESCENDING)
        for commit in commits:
            commit_date = commit['commitDate']
            files = commit['files']
            if commit_date > before:
                for f in files:
                    filename = f['filename']
                    self.update_fileset(total_file_set, filename)
                continue
            commit_comps = set()  # indicates all the compoents in this commit

            for f in files:
                filename = f['filename']
                add = f['add']
                delete = f['delete']
                filename = self.update_fileset(total_file_set, filename)
                if add == 0 and delete == 0:  # this file is not changed at all
                    continue
                for (index, config) in enumerate(configs):
                    file_set = file_sets[index]
                    if is_keep(filename, config):
                        is_add_regression = 0
                        comp = 'Group' + str(index + 1)
                        if filename in file_set:
                            is_add_file = 0
                        else:
                            file_set.add(filename)
                            is_add_file = 1
                        if comp in commit_comps:
                            is_add_comp = 0
                        else:
                            commit_comps.add(comp)
                            is_add_comp = 1
                            if 'regressions' in commit:
                                is_add_regression = 1
                        d[comp]['loc'] += add + delete
                        d[comp]['files'] += is_add_file
                        d[comp]['commit'] += is_add_comp
                        d[comp]['regression'] += is_add_regression

        return d
Ejemplo n.º 4
0
    def analyze_total_code_churn(self, date_range: list, category_list: list,
                                 classify_func, tmp_folders: list) -> dict:
        """return code churn analysis in date_range, classfied by category_list

        Args:
            commits (list): output of `logfile_to_list`
            date_range (list): [dayStart, dayEnd] yyyy-MM-DD inclusive
            category_list (list): all the components in the category
            classify_func ([type]): the function to classify each component

        Returns:
            dict: {component1:count1, component2:count2, ...}
        """
        after, before = date_range
        total_file_set = FileSet()  # record all files latest name
        file_set = set()  # record files satisfy the result
        d = {}
        config = self.get_tmp_config(tmp_folders)
        for category in category_list:
            d[category] = {'loc': 0, 'files': 0, 'commit': 0, 'regression': 0}
        commits = self.commit.find({
            "commitDate": {
                "$gte": after
            },
        }, {
            '_id': False,
            'files': True,
            'commitDate': True,
            'regressions': True
        }).sort('commitDate', DESCENDING)
        for commit in commits:
            commit_date = commit['commitDate']
            files = commit['files']
            if commit_date > before:
                for f in files:
                    filename = f['filename']
                    self.update_fileset(total_file_set, filename)
                continue
            commit_comps = set()  # indicates all the compoents in this commit
            for f in files:
                filename = f['filename']
                add = f['add']
                delete = f['delete']
                filename = self.update_fileset(total_file_set, filename)
                if not is_keep(filename, config) or (add == 0 and delete == 0):
                    continue
                is_add_regression = 0
                comp = classify_func(filename)
                if filename in file_set:
                    is_add_file = 0
                else:
                    file_set.add(filename)
                    is_add_file = 1
                if comp in commit_comps:
                    is_add_comp = 0
                else:
                    commit_comps.add(comp)
                    is_add_comp = 1
                    if 'regressions' in commit:
                        is_add_regression = 1
                d[comp]['loc'] += add + delete
                d[comp]['files'] += is_add_file
                d[comp]['commit'] += is_add_comp
                d[comp]['regression'] += is_add_regression

        return d