Example #1
0
def search_files(patt, file_patt, repo_dir):
    cmd = [RG_CMD]
    if file_patt:
        cmd.extend(['-g', file_patt])
    cmd.append(patt)
    proc_res = run_cap(cmd, cwd=repo_dir)
    return proc_res.stdout
Example #2
0
def _get_py_version(plist, project, repo_dir):

    # -v flag required because it improves the correctness of detection
    # manual tests on the calibre package showed better results with -v
    cmd = [VERSION_CMD, '-v', repo_dir]
    proc_res = run_cap(cmd, cwd=repo_dir)
    # hardcoded strings to search output lines for
    MIN_VERSION_PATTERN = '^Minimum required versions:'
    INCOMP_VERSION_PATTERN = '^Incompatible versions:'

    lines = proc_res.stdout.splitlines()
    min_version_line = None
    incomp_version_line = None
    # with -v flag a lot of output is generated, the lines containing
    # version information will probably be at the bottom
    for line in lines[-5:]:
        if re.match(MIN_VERSION_PATTERN, line):
            min_version_line = line
        if re.match(INCOMP_VERSION_PATTERN, line):
            incomp_version_line = line

    # None indicates not compatible with that major version of python
    py_2_version = None
    py_3_version = None
    if min_version_line is not None:
        supp_versions = min_version_line[26:].split()
        for version in supp_versions:
            if version[0] == '2':
                py_2_version = version.strip(',')
            elif version[0] == '3':
                py_3_version = version.strip(',')

    return {'min_py2': py_2_version, 'min_py3': py_3_version}
def collect(plist, project, repo_dir):
    ret = {}
    proc_res = run_cap([DETECT_CMD, repo_dir, '--format', 'json'])
    output_json = json.loads(proc_res.stdout)
    possible_licenses = glom.glom(output_json, '0.matches', default=[])
    # sort and set into descending order
    possible_licenses = sorted(possible_licenses, key=lambda x: x['confidence'], reverse=True)[:3]
    norm_licenses = []
    for pl in possible_licenses:
        if pl['confidence'] < 0.9:
            continue
        elif pl['license'] not in LICENSE_MAP:
            continue
        norm_licenses.append((LICENSE_MAP[pl['license']], round(pl['confidence'], 3)))

    if not norm_licenses or len(norm_licenses) > 3:
        ret['license'] = 'Other'  # not enough consensus on a known license
    else:
        sorted(norm_licenses, key=lambda x: x[1], reverse=True)
        if len(norm_licenses) < 3:
            ret['license'] = norm_licenses[0][0]
        else:
            most_common = Counter([x[0] for x in norm_licenses]).most_common(1)[0][0]
            ret['license'] = most_common

    group = re.split('\W+', ret['license'])[0]
    ret['license_group'] = group
    ret['hereditary'] = GROUP_HEREDITARY_MAP.get(group)

    return ret
Example #4
0
def get_git_info(repo_dir):
    ret = {}

    proc_res = run_cap(['git', 'rev-list', '--max-parents=0', 'HEAD'], cwd=repo_dir)
    first_commit_hashes = proc_res.stdout.strip().split()

    first_commit_dt = sorted([_get_commit_dt(repo_dir, fch) for fch in first_commit_hashes])[0]

    proc_res = run_cap(['git', 'rev-parse', 'HEAD'], cwd=repo_dir)
    latest_commit_hash = proc_res.stdout.strip()

    latest_commit_dt = _get_commit_dt(repo_dir, latest_commit_hash)

    ret['first_commit'] = first_commit_dt.isoformat()
    ret['latest_commit'] = latest_commit_dt.isoformat()

    proc_res = run_cap(['git', 'shortlog', '--summary', '--numbered', '--email'], cwd=repo_dir)

    committer_registry = CommitterRegistry()
    for match in _git_committer_re.finditer(proc_res.stdout):
        gdict = match.groupdict()
        gdict['commit_count'] = int(gdict['commit_count'])

        committer_registry.register(gdict['name'], gdict['email'], gdict['commit_count'])

    committers = committer_registry.get_committers()
    ret['commit_count'] = commit_count = sum([c.commit_count for c in committers])
    ret['committer_count'] = len(committers)  # redundant with committer_percent_dist.100

    # these will be stored as percentages, so keep it to two-digit precision max
    threshes = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99, 1.0]
    commit_thresh_map = {thresh: (commit_count * thresh) for thresh in threshes}

    sorted_committers = sorted([(c, c.commit_count) for c in committers],
                               reverse=True, key=lambda x: x[1])
    def _get_proportion_count(thresh_commit_count):
        _cur_commit_count = 0
        _cur_committer_count = 0
        for committer, committer_commit_count in sorted_committers:
            if _cur_commit_count > thresh_commit_count:
                break
            _cur_commit_count += committer_commit_count
            _cur_committer_count += 1
        return _cur_committer_count

    # how many developers' commits does it take to comprise XX% of the commits?
    committer_dist_map = {round(thresh * 100): _get_proportion_count(thresh_commit_count)
                          for thresh, thresh_commit_count in commit_thresh_map.items()}
    ret['committer_percent_dist'] = committer_dist_map
    ret['committer_top_5'] = [round(c / commit_count, 4) for _, c in sorted_committers][:5]
    ret['minor_committer_counts'] = {x: len([c for _, c in sorted_committers if c <= x])
                                     for x in range(1, 6)}

    '''
    # DEBUG
    print(first_commit_dt.isoformat(), latest_commit_dt.isoformat(), latest_commit_dt - first_commit_dt)
    from pprint import pprint
    pprint(committer_dist_map)
    pprint(ret['top_5'])
    pprint(ret)
    raise SystemExit  # quits after the first
    '''
    return ret
Example #5
0
def _get_commit_dt(repo_dir, commit_hash, **kw):
    kw.setdefault('env', {})['TZ'] = 'UTC'
    kw['cwd'] = repo_dir
    proc_res = run_cap(['git', 'show', '-s', '--format=%cd', '--date=format-local:%Y-%m-%dT%H:%M:%S', commit_hash], **kw)
    date_text = proc_res.stdout.strip()
    return isoparse(date_text)