def parse_risk_file(risk_file, bus_risks):
    risk_f = open(risk_file, 'r')
    for line in risk_f:
        line = line.strip()
        if not line:
            continue
        dev, risk = line.split('=')
        dev = safe_author_name(dev)
        bus_risks[dev] = float(risk)
    risk_f.close()
def parse_risk_file(risk_file, bus_risks):
    risk_f = open(risk_file, 'r')
    for line in risk_f:
        line = line.strip()
        if not line:
            continue
        dev, risk = line.split('=')
        dev = safe_author_name(dev)
        bus_risks[dev] = float(risk)
    risk_f.close()
def parse_experience(log):
    """
    Parse the dev experience from the git log.
    """
    # list of tuple of shape [(dev, lines_add, lines_removed), ...]
    exp = []

    # entry lines were zero separated with -z
    entry_lines = log.split('\0')

    current_entry = []

    for entry_line in entry_lines:
        if not entry_line.strip():
            # blank entry line marks the end of an entry, we're ready to process
            local_entry = current_entry
            current_entry = []
            if len(local_entry) < 2:
                print >> sys.stderr, "Weird entry, cannot parse: %s\n-----" % '\n'.join(
                    local_entry)
                continue
            author, changes = local_entry[:2]
            author = safe_author_name(author)
            try:
                changes_split = re.split(r'\s+', changes)
                # this can be two fields if there were file renames
                # detected, in which case the file names are on the
                # following entry lines, or three fields (third being
                # the filename) if there were no file renames
                lines_added, lines_removed = changes_split[:2]
                lines_added = int(lines_added)
                lines_removed = int(lines_removed)

                # don't record revisions that don't have any removed or
                # added lines...they mean nothing to our algorithm
                if lines_added or lines_removed:
                    exp.append((author, lines_added, lines_removed))
            except ValueError:
                print >> sys.stderr, "Weird entry, cannot parse: %s\n-----" % '\n'.join(
                    local_entry)
                continue
        else:
            # continue to aggregate the entry
            lines = entry_line.split('\n')
            current_entry.extend([line.strip() for line in lines])

    # we need the oldest log entries first.
    exp.reverse()
    return exp
def parse_experience(log):
    """
    Parse the dev experience from the git log.
    """
    # list of tuple of shape [(dev, lines_add, lines_removed), ...]
    exp = []

    # entry lines were zero separated with -z
    entry_lines = log.split('\0')

    current_entry = []
    
    for entry_line in entry_lines:
        if not entry_line.strip():
            # blank entry line marks the end of an entry, we're ready to process
            local_entry = current_entry
            current_entry = []
            if len(local_entry) < 2:
                print >> sys.stderr, "Weird entry, cannot parse: %s\n-----" % '\n'.join(local_entry)
                continue
            author, changes = local_entry[:2]
            author = safe_author_name(author)
            try:
                changes_split = re.split(r'\s+', changes)
                # this can be two fields if there were file renames
                # detected, in which case the file names are on the
                # following entry lines, or three fields (third being
                # the filename) if there were no file renames
                lines_added, lines_removed = changes_split[:2]
                lines_added = int(lines_added)
                lines_removed = int(lines_removed)

                # don't record revisions that don't have any removed or
                # added lines...they mean nothing to our algorithm
                if lines_added or lines_removed:
                    exp.append((author, lines_added, lines_removed))
            except ValueError:
                print >> sys.stderr, "Weird entry, cannot parse: %s\n-----" % '\n'.join(local_entry)                    
                continue
        else:
            # continue to aggregate the entry
            lines = entry_line.split('\n')
            current_entry.extend([line.strip() for line in lines])

    # we need the oldest log entries first.
    exp.reverse()
    return exp
Exemple #5
0
def parse_dev_experience(f, client, repo_root):
    """
    f: a path relative to repo_root for a file from whose log we want
    to parse dev experience.

    client: the pysvn client

    repo_root: the root of the svn repository
    """
    # a list of tuples of form (dev, added_lines, deleted_lines), each
    # representing one commit
    dev_experience = []

    # a list of tuples with the paths / revisions we want to run diffs
    # on to reconstruct dev experience
    comps_to_make = []

    # since the name of the file can change through its history due to
    # moves, we need to keep the most recent one we're looking for
    fname_to_follow = f

    added_line_re = re.compile(r'^\+')

    # strict_node_history=False: follow copies
    #
    # discover_changed_paths: make the data about copying available in the changed_paths field
    for log in client.log("%s%s" % (repo_root, f),
                          strict_node_history=False,
                          discover_changed_paths=True):
        cp = log.changed_paths

        # even though we are only asking for the log of a single file,
        # svn gives us back all changed paths for that revision, so we
        # have to look for the right one
        for c in cp:
            if fname_to_follow == c.path:
                # since we're going back in time with the log process,
                # a copyfrom_path means we need to follow the old file
                # from now on.
                if c.copyfrom_path:
                    fname_to_follow = c.copyfrom_path
                comps_to_make.append((c.path, log.revision, log.author))
                break

    # our logic needs oldest logs first
    comps_to_make.reverse()

    # for the first revision, every line is attributed to the first
    # author as an added line
    txt = client.cat("%s%s" % (repo_root, comps_to_make[0][0]),
                     comps_to_make[0][1])

    exp = txt.count('\n')
    if not txt.endswith('\n'):
        exp += 1
    dev_experience.append((comps_to_make[0][2], exp, 0))

    # for all the other entries, we must diff between revisions to
    # find the number and kind of changes
    for i in range(len(comps_to_make) - 1):
        old_path = "%s%s" % (repo_root, comps_to_make[i][0])
        old_rev = comps_to_make[i][1]

        new_path = "%s%s" % (repo_root, comps_to_make[i + 1][0])
        new_rev = comps_to_make[i + 1][1]

        author = comps_to_make[i + 1][2]

        try:
            diff = client.diff('.',
                               old_path,
                               revision1=old_rev,
                               url_or_path2=new_path,
                               revision2=new_rev,
                               diff_options=['-w'])
            diff = diff.split('\n')
            ind_dbl_ats = 0
            for i, line in enumerate(diff):
                if line.startswith('@@'):
                    ind_dbl_ats = i
                    break
            added = 0
            removed = 0
            for line in diff[ind_dbl_ats:]:
                if line.startswith('+'):
                    added += 1
                if line.startswith('-'):
                    removed += 1
            dev_experience.append((safe_author_name(author), added, removed))
        except:
            # on one occasion I saw a non-binary item that existed in
            # the filesystem with svn ls but errored out with a diff
            # against that revision.  Note the error and proceed.
            print >> sys.stderr, "Error diffing %s %s and %s %s: " % \
                  (old_path, str(old_rev), new_path, str(new_rev)), sys.exc_info()[0]

    return dev_experience
def parse_dev_experience(f, client, repo_root):
    """
    f: a path relative to repo_root for a file from whose log we want
    to parse dev experience.

    client: the pysvn client

    repo_root: the root of the svn repository
    """
    # a list of tuples of form (dev, added_lines, deleted_lines), each
    # representing one commit
    dev_experience = []

    # a list of tuples with the paths / revisions we want to run diffs
    # on to reconstruct dev experience
    comps_to_make = []

    # since the name of the file can change through its history due to
    # moves, we need to keep the most recent one we're looking for
    fname_to_follow = f

    added_line_re = re.compile(r'^\+')
    
    # strict_node_history=False: follow copies
    #
    # discover_changed_paths: make the data about copying available in the changed_paths field
    for log in client.log("%s%s" %(repo_root, f), strict_node_history=False, discover_changed_paths=True):
        cp = log.changed_paths

        # even though we are only asking for the log of a single file,
        # svn gives us back all changed paths for that revision, so we
        # have to look for the right one
        for c in cp:
            if fname_to_follow == c.path:
                # since we're going back in time with the log process,
                # a copyfrom_path means we need to follow the old file
                # from now on.
                if c.copyfrom_path:
                    fname_to_follow = c.copyfrom_path                    
                comps_to_make.append((c.path, log.revision, log.author))
                break

    # our logic needs oldest logs first
    comps_to_make.reverse()

    # for the first revision, every line is attributed to the first
    # author as an added line
    txt = client.cat("%s%s" % (repo_root, comps_to_make[0][0]),
                     comps_to_make[0][1])

    exp = txt.count('\n')
    if not txt.endswith('\n'):
        exp += 1
    dev_experience.append((comps_to_make[0][2], exp, 0))

    # for all the other entries, we must diff between revisions to
    # find the number and kind of changes
    for i in range(len(comps_to_make) - 1):
        old_path = "%s%s" % (repo_root, comps_to_make[i][0])
        old_rev = comps_to_make[i][1]

        new_path = "%s%s" % (repo_root, comps_to_make[i + 1][0])
        new_rev = comps_to_make[i + 1][1]
        
        author = comps_to_make[i + 1][2]
        
        try:
            diff = client.diff('.',
                               old_path,
                               revision1=old_rev,
                               url_or_path2=new_path,
                               revision2=new_rev,
                               diff_options=['-w'])
            diff = diff.split('\n')
            ind_dbl_ats = 0
            for i, line in enumerate(diff):
                if line.startswith('@@'):
                    ind_dbl_ats = i
                    break
            added = 0
            removed = 0
            for line in diff[ind_dbl_ats:]:
                if line.startswith('+'):
                    added += 1
                if line.startswith('-'):
                    removed += 1
            dev_experience.append((safe_author_name(author), added, removed))
        except:
            # on one occasion I saw a non-binary item that existed in
            # the filesystem with svn ls but errored out with a diff
            # against that revision.  Note the error and proceed.
            print >> sys.stderr, "Error diffing %s %s and %s %s: " % \
                  (old_path, str(old_rev), new_path, str(new_rev)), sys.exc_info()[0]
        
    return dev_experience