def parse_risk_file(risk_file, bus_risks): risk_f = open(risk_file, 'r') for line in risk_f: line = line.strip() if not line: continue dev, risk = line.split('=') dev = safe_author_name(dev) bus_risks[dev] = float(risk) risk_f.close()
def parse_experience(log): """ Parse the dev experience from the git log. """ # list of tuple of shape [(dev, lines_add, lines_removed), ...] exp = [] # entry lines were zero separated with -z entry_lines = log.split('\0') current_entry = [] for entry_line in entry_lines: if not entry_line.strip(): # blank entry line marks the end of an entry, we're ready to process local_entry = current_entry current_entry = [] if len(local_entry) < 2: print >> sys.stderr, "Weird entry, cannot parse: %s\n-----" % '\n'.join( local_entry) continue author, changes = local_entry[:2] author = safe_author_name(author) try: changes_split = re.split(r'\s+', changes) # this can be two fields if there were file renames # detected, in which case the file names are on the # following entry lines, or three fields (third being # the filename) if there were no file renames lines_added, lines_removed = changes_split[:2] lines_added = int(lines_added) lines_removed = int(lines_removed) # don't record revisions that don't have any removed or # added lines...they mean nothing to our algorithm if lines_added or lines_removed: exp.append((author, lines_added, lines_removed)) except ValueError: print >> sys.stderr, "Weird entry, cannot parse: %s\n-----" % '\n'.join( local_entry) continue else: # continue to aggregate the entry lines = entry_line.split('\n') current_entry.extend([line.strip() for line in lines]) # we need the oldest log entries first. exp.reverse() return exp
def parse_experience(log): """ Parse the dev experience from the git log. """ # list of tuple of shape [(dev, lines_add, lines_removed), ...] exp = [] # entry lines were zero separated with -z entry_lines = log.split('\0') current_entry = [] for entry_line in entry_lines: if not entry_line.strip(): # blank entry line marks the end of an entry, we're ready to process local_entry = current_entry current_entry = [] if len(local_entry) < 2: print >> sys.stderr, "Weird entry, cannot parse: %s\n-----" % '\n'.join(local_entry) continue author, changes = local_entry[:2] author = safe_author_name(author) try: changes_split = re.split(r'\s+', changes) # this can be two fields if there were file renames # detected, in which case the file names are on the # following entry lines, or three fields (third being # the filename) if there were no file renames lines_added, lines_removed = changes_split[:2] lines_added = int(lines_added) lines_removed = int(lines_removed) # don't record revisions that don't have any removed or # added lines...they mean nothing to our algorithm if lines_added or lines_removed: exp.append((author, lines_added, lines_removed)) except ValueError: print >> sys.stderr, "Weird entry, cannot parse: %s\n-----" % '\n'.join(local_entry) continue else: # continue to aggregate the entry lines = entry_line.split('\n') current_entry.extend([line.strip() for line in lines]) # we need the oldest log entries first. exp.reverse() return exp
def parse_dev_experience(f, client, repo_root): """ f: a path relative to repo_root for a file from whose log we want to parse dev experience. client: the pysvn client repo_root: the root of the svn repository """ # a list of tuples of form (dev, added_lines, deleted_lines), each # representing one commit dev_experience = [] # a list of tuples with the paths / revisions we want to run diffs # on to reconstruct dev experience comps_to_make = [] # since the name of the file can change through its history due to # moves, we need to keep the most recent one we're looking for fname_to_follow = f added_line_re = re.compile(r'^\+') # strict_node_history=False: follow copies # # discover_changed_paths: make the data about copying available in the changed_paths field for log in client.log("%s%s" % (repo_root, f), strict_node_history=False, discover_changed_paths=True): cp = log.changed_paths # even though we are only asking for the log of a single file, # svn gives us back all changed paths for that revision, so we # have to look for the right one for c in cp: if fname_to_follow == c.path: # since we're going back in time with the log process, # a copyfrom_path means we need to follow the old file # from now on. if c.copyfrom_path: fname_to_follow = c.copyfrom_path comps_to_make.append((c.path, log.revision, log.author)) break # our logic needs oldest logs first comps_to_make.reverse() # for the first revision, every line is attributed to the first # author as an added line txt = client.cat("%s%s" % (repo_root, comps_to_make[0][0]), comps_to_make[0][1]) exp = txt.count('\n') if not txt.endswith('\n'): exp += 1 dev_experience.append((comps_to_make[0][2], exp, 0)) # for all the other entries, we must diff between revisions to # find the number and kind of changes for i in range(len(comps_to_make) - 1): old_path = "%s%s" % (repo_root, comps_to_make[i][0]) old_rev = comps_to_make[i][1] new_path = "%s%s" % (repo_root, comps_to_make[i + 1][0]) new_rev = comps_to_make[i + 1][1] author = comps_to_make[i + 1][2] try: diff = client.diff('.', old_path, revision1=old_rev, url_or_path2=new_path, revision2=new_rev, diff_options=['-w']) diff = diff.split('\n') ind_dbl_ats = 0 for i, line in enumerate(diff): if line.startswith('@@'): ind_dbl_ats = i break added = 0 removed = 0 for line in diff[ind_dbl_ats:]: if line.startswith('+'): added += 1 if line.startswith('-'): removed += 1 dev_experience.append((safe_author_name(author), added, removed)) except: # on one occasion I saw a non-binary item that existed in # the filesystem with svn ls but errored out with a diff # against that revision. Note the error and proceed. print >> sys.stderr, "Error diffing %s %s and %s %s: " % \ (old_path, str(old_rev), new_path, str(new_rev)), sys.exc_info()[0] return dev_experience
def parse_dev_experience(f, client, repo_root): """ f: a path relative to repo_root for a file from whose log we want to parse dev experience. client: the pysvn client repo_root: the root of the svn repository """ # a list of tuples of form (dev, added_lines, deleted_lines), each # representing one commit dev_experience = [] # a list of tuples with the paths / revisions we want to run diffs # on to reconstruct dev experience comps_to_make = [] # since the name of the file can change through its history due to # moves, we need to keep the most recent one we're looking for fname_to_follow = f added_line_re = re.compile(r'^\+') # strict_node_history=False: follow copies # # discover_changed_paths: make the data about copying available in the changed_paths field for log in client.log("%s%s" %(repo_root, f), strict_node_history=False, discover_changed_paths=True): cp = log.changed_paths # even though we are only asking for the log of a single file, # svn gives us back all changed paths for that revision, so we # have to look for the right one for c in cp: if fname_to_follow == c.path: # since we're going back in time with the log process, # a copyfrom_path means we need to follow the old file # from now on. if c.copyfrom_path: fname_to_follow = c.copyfrom_path comps_to_make.append((c.path, log.revision, log.author)) break # our logic needs oldest logs first comps_to_make.reverse() # for the first revision, every line is attributed to the first # author as an added line txt = client.cat("%s%s" % (repo_root, comps_to_make[0][0]), comps_to_make[0][1]) exp = txt.count('\n') if not txt.endswith('\n'): exp += 1 dev_experience.append((comps_to_make[0][2], exp, 0)) # for all the other entries, we must diff between revisions to # find the number and kind of changes for i in range(len(comps_to_make) - 1): old_path = "%s%s" % (repo_root, comps_to_make[i][0]) old_rev = comps_to_make[i][1] new_path = "%s%s" % (repo_root, comps_to_make[i + 1][0]) new_rev = comps_to_make[i + 1][1] author = comps_to_make[i + 1][2] try: diff = client.diff('.', old_path, revision1=old_rev, url_or_path2=new_path, revision2=new_rev, diff_options=['-w']) diff = diff.split('\n') ind_dbl_ats = 0 for i, line in enumerate(diff): if line.startswith('@@'): ind_dbl_ats = i break added = 0 removed = 0 for line in diff[ind_dbl_ats:]: if line.startswith('+'): added += 1 if line.startswith('-'): removed += 1 dev_experience.append((safe_author_name(author), added, removed)) except: # on one occasion I saw a non-binary item that existed in # the filesystem with svn ls but errored out with a diff # against that revision. Note the error and proceed. print >> sys.stderr, "Error diffing %s %s and %s %s: " % \ (old_path, str(old_rev), new_path, str(new_rev)), sys.exc_info()[0] return dev_experience