Esempio n. 1
0
def get_repo_frequencies():
    """
    Returns a map of repo id to (frequency, relative_freq) tuples.
    """
    path = os.path.join(config.CALC_DATA_PATH, 'repo_frequencies1.pickle')
    global _repo_freqs
    repo_frequencies = _repo_freqs or util.load_pickle(path)
    if repo_frequencies:
        _repo_freqs = repo_frequencies
        return repo_frequencies

    user_watches = get_user_watches()

    total_watches = sum(len(w) for w in user_watches.values())
    logger.debug("Total watches is {0}".format(total_watches))
    
    repo_frequencies = dict()
    for repos in user_watches.values():
        for watch in repos:
            if not watch in repo_frequencies:
                repo_frequencies[watch] = (1, 1/total_watches)
            else:
                freq = repo_frequencies[watch][0] + 1
                repo_frequencies[watch] = (freq, freq/total_watches)


    util.store_pickle(repo_frequencies, path, debug=True)
    _repo_freqs = repo_frequencies

    return repo_frequencies
Esempio n. 2
0
def get_user_watches():
    """
    Returns an dict of user id keys mapped to a set
    of repo ids being watched by that user
    """
    path = os.path.join(config.CALC_DATA_PATH, 'user_watches.pickle')
    global _user_watches
    user_watches = _user_watches or util.load_pickle(path)
    if user_watches:
        _user_watches = user_watches
        return user_watches
    
    user_watches = collections.defaultdict(set)
    
    for line in open(os.path.join(config.SRC_DATA_PATH, 'data.txt')):
        k,v = line.rstrip().split(':')
        user_watches[int(k)].add(int(v))

    util.store_pickle(user_watches, path, debug=True)
    _user_watches = user_watches

    return user_watches