def compute_conditional_probabilities(): logger.debug("Computing conditional probabilities.") user_watches = users.get_user_watches() repo_frequencies = users.get_repo_frequencies() # Prune watchlists to only those w/ greater than 1 watch watches_list = [w for w in user_watches.values() if len(w) > 1] watches_size = len(watches_list) logger.debug("Watches size {0}".format(watches_size)) cprob = collections.defaultdict(dict) count = 0 for watches in watches_list: count += 1 logger.debug("Processing watch {0} of {1}".format(count, watches_size)) for i in watches: for j in watches: if i == j: continue if not j in cprob[i]: cprob[i][j] = 1, 1/repo_frequencies[i] else: cofreq = cprob[i][j][0] + 1 cprob[i][j] = cofreq, cofreq/repo_frequencies[i] persist_conditional_probabilities(cprob)
def __init__(self): self.user_watches = users.get_user_watches() self.repo_freqs = users.get_repo_frequencies() self.repos = repos.get_repos() self.test_users = users.get_test_user_ids()