def entropy(self): entropy = 0 file_lines = list() file_total_line_count = 0 if self.__cache_entropy is None: for st in self.file_stat: modified_path = st['modified_path'] if in_our_extensions(modified_path) or \ (st['is_rename'] and in_our_extensions(st['current_path'])): added_loc = st['added'] deleted_loc = st['deleted'] total_loc = added_loc + deleted_loc file_total_line_count += total_loc file_lines.append(total_loc) minimum = 100 for f_loc in file_lines: if f_loc == 0: continue else: avg = 1.0 * f_loc / file_total_line_count if avg < minimum: minimum = avg entropy -= avg * math.log(avg, 2) if minimum != 100 and math.log(minimum, 2) != 0: entropy /= abs(math.log(minimum, 2)) self.__cache_entropy = entropy return self.__cache_entropy
def deleted_number(self): count = 0 if self.__cache_deleted_number is None: for st in self.file_stat: if in_our_extensions(st['modified_path']): count += st['deleted'] elif st['is_rename'] and in_our_extensions(st['current_path']): count += st['deleted'] self.__cache_deleted_number = count return self.__cache_deleted_number
def in_required_extensions(self, file_path): if len(self.parents) > 1: return False if in_our_extensions(file_path): return True files, rename_files = self.stats.modified_files if self.namestat.file_modify_type[file_path] == 'rename': cur_path = rename_files[file_path] if in_our_extensions(cur_path): return True return False
def evolve_from_prior_commit(self): la = 0 ld = 0 lt = 0 nf = 0 gcf = GitOneCommitFeatures stats = self.stats namestats = self.namestat if len(self.parents) == 0: p = None elif len(self.parents) == 1: p = self.parents[0] else: if gcf.project_merge_numstat[ self.commit_id].base_commit is not None: p = gcf.project_merge_numstat[self.commit_id].base_commit stats = gcf.project_merge_numstat[self.commit_id] namestats = gcf.project_merge_namestat[self.commit_id] else: p = self.parents[0] stats = None if stats is not None: files, rename_files = stats.modified_files else: # merge后和两个分支对比都没有变化 files = [] rename_files = [] if p is not None: file_stats = gcf.parent_file_stats[p]['files'] if gcf.parent_file_stats[p]['son_num'] == 1: gcf.parent_file_stats[self.commit_id]['files'] = file_stats else: # 新建分支,file_stats deepcopy一份 gcf.parent_file_stats[self.commit_id]['files'] = deepcopy( file_stats) for f, added, deleted in files: if namestats.file_modify_type[f] == 'add': assert (deleted == 0) # gcf.parent_file_stats[self.commit_id]['files'][f] = added if in_our_extensions(f): nf += 1 la += added elif namestats.file_modify_type[f] == 'delete': assert (added == 0) #assert(deleted == file_stats[f]) if in_our_extensions(f): lt += file_stats[f] nf += 1 ld += deleted # del gcf.parent_file_stats[self.commit_id]['files'][f] elif namestats.file_modify_type[f] == 'rename': cur_file = rename_files[f] tmp = file_stats[f] assert (tmp + added - deleted >= 0) # gcf.parent_file_stats[ self.commit_id]['files'][cur_file] = tmp + added - deleted if in_our_extensions(f) or in_our_extensions(cur_file): lt += tmp nf += 1 la += added ld += deleted # del gcf.parent_file_stats[self.commit_id]['files'][f] else: assert (namestats.file_modify_type[f] == 'modify') tmp = file_stats[f] assert (tmp + added - deleted >= 0) # gcf.parent_file_stats[ self.commit_id]['files'][f] = tmp + added - deleted if in_our_extensions(f): lt += tmp nf += 1 la += added ld += deleted if len(self.parents) > 1: lt = 0 la = 0 ld = 0 else: nf = len(files) if nf > 0: lt = 1.0 * lt / nf return lt, la, ld