def static_analysis_by_folder(folders: list, date: str) -> dict: """return the statistic analysis result for target folders in one day Arguments: folders {str} -- the target folders to be analysed date {str} Returns: dict -- the analysis result """ dataFile1 = 'files_detail_' + date + '.csv' dataFile2 = 'methods_detail_' + date + '.csv' total_complex_ave = 0 total_complex_max = 0 total_lines = 0 total_files = 0 total_range = {'1~10': 0, '11~20': 0, '21~50': 0, '>50': 0} for idx in range(len(folders)): folders[idx] = folders[idx].replace('/', '\\') with open(dataFile1, 'r') as fin: with open(dataFile2, 'r') as fin2: with open('config.json', 'r') as config_file: config = json.loads(config_file.read()) data = csv.DictReader(fin) data2 = csv.DictReader(fin2) for i in data: flag = False for folder in folders: if i['File Name'].startswith(folder): flag = True break flag = flag and is_keep( 'Source/media/media_driver/' + i['File Name'], config) if flag: total_files += 1 lines_i = int(i['Lines']) total_lines += lines_i complex_ave_i = float(i['Average Complexity*']) complex_max_i = int(i['Maximum Complexity*']) total_complex_ave += complex_ave_i if complex_max_i > total_complex_max: total_complex_max = complex_max_i for i in data2: flag = False for folder in folders: if i['File Name'].startswith(folder): flag = True break flag = flag and is_keep( 'Source/media/media_driver/' + i['File Name'], config) if flag: complex_i = int(i['Complexity']) if complex_i > 50: total_range['>50'] += 1 elif complex_i > 20: total_range['21~50'] += 1 elif complex_i > 10: total_range['11~20'] += 1 else: total_range['1~10'] += 1 result = { 'LOC': total_lines, 'Average Complexity': round(total_complex_ave / total_files, 2), 'Max Complexity': total_complex_max, '1~10': total_range['1~10'], '11~20': total_range['11~20'], '21~50': total_range['21~50'], '>50': total_range['>50'] } return result
def analyze_code_churn_by_period(self, period: str, date_range: list, category_list: list, classify_func, tmp_folders: list) -> dict: """return code churn analysis by period, in date_range, classfied by category_list Args: period (str): 'daily','weekly','monthly' date_range (list): [start,end] yyyy-MM-DD, yyyy-WW, yyyy-MM for daily,weekly,monthly respectively category_list (list): all the components in the category classify_func ([function]): the function to classify each component Returns: dict: {component1:{date:{add,delete,files},...}, component2:{date:{add,delete,files},...},...} """ after, before = get_start_end(period, date_range) total_file_set = FileSet() # record all files latest name dict_fileset = {} # {date:set(file1,file2),...} d = {} # return result config = self.get_tmp_config(tmp_folders) is_classify = True is_subfolder = False if classify_func == 'subfolder': is_subfolder = True else: if len(category_list) == 0: category_list = ['All'] is_classify = False for category in category_list: d[category] = {} commits = self.commit.find({ "commitDate": { "$gte": after }, }, { '_id': False, 'files': True, 'regressions': True, 'commitDate': True }).sort('commitDate', DESCENDING) for commit in commits: commit_date = commit['commitDate'] files = commit['files'] if commit_date > before: for f in files: filename = f['filename'] self.update_fileset(total_file_set, filename) continue commit_comps = set() date_param = get_date_param(period, commit_date) if date_param not in dict_fileset: dict_fileset[date_param] = set() if not is_subfolder: for category in category_list: if date_param not in d[category]: d[category][date_param] = { 'loc': 0, 'files': 0, 'commit': 0, 'regression': 0 } for f in files: filename = f['filename'] add = f['add'] delete = f['delete'] filename = self.update_fileset(total_file_set, filename) # filename not satisfy config whitelist or not changed if not is_keep(filename, config) or (add == 0 and delete == 0): continue is_add_regression = 0 if is_subfolder: comp = get_folder(filename) if comp not in d: d[comp] = {} if date_param not in d[comp]: d[comp][date_param] = { 'loc': 0, 'files': 0, 'commit': 0, 'regression': 0 } elif is_classify: comp = classify_func(filename) else: comp = 'All' if filename in dict_fileset[date_param]: is_add_file = 0 else: dict_fileset[date_param].add(filename) is_add_file = 1 if comp in commit_comps: is_add_comp = 0 else: commit_comps.add(comp) is_add_comp = 1 if 'regressions' in commit: is_add_regression = 1 d[comp][date_param]['loc'] += add + delete d[comp][date_param]['files'] += is_add_file d[comp][date_param]['commit'] += is_add_comp d[comp][date_param]['regression'] += is_add_regression return d
def analyze_group_total_code_churn(self, date_range: list, groups: list) -> dict: """return group code churn analysis in date_range, classfied by category_list from give groups Args: date_range (list): [dayStart, dayEnd] yyyy-MM-DD inclusive groups (list): nested arrays indicating groups [[folder1,folder2],[folder3],[folder4,folder5]...] Returns: dict: {group1:count1, group1:count2, ...} """ after, before = date_range total_file_set = FileSet() # record all files latest name file_sets = [] d = {} configs = [] for group in groups: configs.append(self.get_tmp_config(group)) file_sets.append(set()) for index in range(len(groups)): d['Group' + str(index + 1)] = { 'loc': 0, 'files': 0, 'commit': 0, 'regression': 0 } commits = self.commit.find({ "commitDate": { "$gte": after }, }, { '_id': False, 'files': True, 'regressions': True, 'commitDate': True }).sort('commitDate', DESCENDING) for commit in commits: commit_date = commit['commitDate'] files = commit['files'] if commit_date > before: for f in files: filename = f['filename'] self.update_fileset(total_file_set, filename) continue commit_comps = set() # indicates all the compoents in this commit for f in files: filename = f['filename'] add = f['add'] delete = f['delete'] filename = self.update_fileset(total_file_set, filename) if add == 0 and delete == 0: # this file is not changed at all continue for (index, config) in enumerate(configs): file_set = file_sets[index] if is_keep(filename, config): is_add_regression = 0 comp = 'Group' + str(index + 1) if filename in file_set: is_add_file = 0 else: file_set.add(filename) is_add_file = 1 if comp in commit_comps: is_add_comp = 0 else: commit_comps.add(comp) is_add_comp = 1 if 'regressions' in commit: is_add_regression = 1 d[comp]['loc'] += add + delete d[comp]['files'] += is_add_file d[comp]['commit'] += is_add_comp d[comp]['regression'] += is_add_regression return d
def analyze_total_code_churn(self, date_range: list, category_list: list, classify_func, tmp_folders: list) -> dict: """return code churn analysis in date_range, classfied by category_list Args: commits (list): output of `logfile_to_list` date_range (list): [dayStart, dayEnd] yyyy-MM-DD inclusive category_list (list): all the components in the category classify_func ([type]): the function to classify each component Returns: dict: {component1:count1, component2:count2, ...} """ after, before = date_range total_file_set = FileSet() # record all files latest name file_set = set() # record files satisfy the result d = {} config = self.get_tmp_config(tmp_folders) for category in category_list: d[category] = {'loc': 0, 'files': 0, 'commit': 0, 'regression': 0} commits = self.commit.find({ "commitDate": { "$gte": after }, }, { '_id': False, 'files': True, 'commitDate': True, 'regressions': True }).sort('commitDate', DESCENDING) for commit in commits: commit_date = commit['commitDate'] files = commit['files'] if commit_date > before: for f in files: filename = f['filename'] self.update_fileset(total_file_set, filename) continue commit_comps = set() # indicates all the compoents in this commit for f in files: filename = f['filename'] add = f['add'] delete = f['delete'] filename = self.update_fileset(total_file_set, filename) if not is_keep(filename, config) or (add == 0 and delete == 0): continue is_add_regression = 0 comp = classify_func(filename) if filename in file_set: is_add_file = 0 else: file_set.add(filename) is_add_file = 1 if comp in commit_comps: is_add_comp = 0 else: commit_comps.add(comp) is_add_comp = 1 if 'regressions' in commit: is_add_regression = 1 d[comp]['loc'] += add + delete d[comp]['files'] += is_add_file d[comp]['commit'] += is_add_comp d[comp]['regression'] += is_add_regression return d