Beispiel #1
0
 def entropy(self):
     entropy = 0
     file_lines = list()
     file_total_line_count = 0
     if self.__cache_entropy is None:
         for st in self.file_stat:
             modified_path = st['modified_path']
             if in_our_extensions(modified_path) or \
                     (st['is_rename'] and in_our_extensions(st['current_path'])):
                 added_loc = st['added']
                 deleted_loc = st['deleted']
                 total_loc = added_loc + deleted_loc
                 file_total_line_count += total_loc
                 file_lines.append(total_loc)
         minimum = 100
         for f_loc in file_lines:
             if f_loc == 0:
                 continue
             else:
                 avg = 1.0 * f_loc / file_total_line_count
                 if avg < minimum:
                     minimum = avg
                 entropy -= avg * math.log(avg, 2)
         if minimum != 100 and math.log(minimum, 2) != 0:
             entropy /= abs(math.log(minimum, 2))
         self.__cache_entropy = entropy
     return self.__cache_entropy
Beispiel #2
0
 def deleted_number(self):
     count = 0
     if self.__cache_deleted_number is None:
         for st in self.file_stat:
             if in_our_extensions(st['modified_path']):
                 count += st['deleted']
             elif st['is_rename'] and in_our_extensions(st['current_path']):
                 count += st['deleted']
         self.__cache_deleted_number = count
     return self.__cache_deleted_number
Beispiel #3
0
 def in_required_extensions(self, file_path):
     if len(self.parents) > 1:
         return False
     if in_our_extensions(file_path):
         return True
     files, rename_files = self.stats.modified_files
     if self.namestat.file_modify_type[file_path] == 'rename':
         cur_path = rename_files[file_path]
         if in_our_extensions(cur_path):
             return True
     return False
Beispiel #4
0
    def evolve_from_prior_commit(self):
        la = 0
        ld = 0
        lt = 0
        nf = 0
        gcf = GitOneCommitFeatures
        stats = self.stats
        namestats = self.namestat
        if len(self.parents) == 0:
            p = None
        elif len(self.parents) == 1:
            p = self.parents[0]
        else:
            if gcf.project_merge_numstat[
                    self.commit_id].base_commit is not None:
                p = gcf.project_merge_numstat[self.commit_id].base_commit
                stats = gcf.project_merge_numstat[self.commit_id]
                namestats = gcf.project_merge_namestat[self.commit_id]
            else:
                p = self.parents[0]
                stats = None
        if stats is not None:
            files, rename_files = stats.modified_files
        else:
            # merge后和两个分支对比都没有变化
            files = []
            rename_files = []
        if p is not None:
            file_stats = gcf.parent_file_stats[p]['files']
            if gcf.parent_file_stats[p]['son_num'] == 1:
                gcf.parent_file_stats[self.commit_id]['files'] = file_stats
            else:
                # 新建分支,file_stats deepcopy一份
                gcf.parent_file_stats[self.commit_id]['files'] = deepcopy(
                    file_stats)

        for f, added, deleted in files:
            if namestats.file_modify_type[f] == 'add':
                assert (deleted == 0)
                #
                gcf.parent_file_stats[self.commit_id]['files'][f] = added
                if in_our_extensions(f):
                    nf += 1
                    la += added
            elif namestats.file_modify_type[f] == 'delete':
                assert (added == 0)
                #assert(deleted == file_stats[f])

                if in_our_extensions(f):
                    lt += file_stats[f]
                    nf += 1
                    ld += deleted
                #
                del gcf.parent_file_stats[self.commit_id]['files'][f]
            elif namestats.file_modify_type[f] == 'rename':
                cur_file = rename_files[f]
                tmp = file_stats[f]
                assert (tmp + added - deleted >= 0)
                #
                gcf.parent_file_stats[
                    self.commit_id]['files'][cur_file] = tmp + added - deleted
                if in_our_extensions(f) or in_our_extensions(cur_file):
                    lt += tmp
                    nf += 1
                    la += added
                    ld += deleted
                #
                del gcf.parent_file_stats[self.commit_id]['files'][f]
            else:
                assert (namestats.file_modify_type[f] == 'modify')
                tmp = file_stats[f]
                assert (tmp + added - deleted >= 0)
                #
                gcf.parent_file_stats[
                    self.commit_id]['files'][f] = tmp + added - deleted
                if in_our_extensions(f):
                    lt += tmp
                    nf += 1
                    la += added
                    ld += deleted
        if len(self.parents) > 1:
            lt = 0
            la = 0
            ld = 0
        else:
            nf = len(files)
            if nf > 0:
                lt = 1.0 * lt / nf
        return lt, la, ld