def main(args): start_time = time.time() users, repos, popular_repos, superprojects = load_data(args) print "Processing test users" test = open(args[3], 'r') results = open(args[4], 'w') for line in test.readlines(): user_id = int(line.strip()) if not user_id in users: user = User(user_id) else: user = users[user_id] suggested_repos = suggest_repos(repos, popular_repos, users, user, superprojects) suggested_repos.sort() results.write('%d:' % user.id) results.write(','.join([str(x.id) for x in suggested_repos])) results.write('\n') results.flush() test.close() results.close() print "Done in %d seconds" % (time.time() - start_time)
def main(args): start_time = time.time() print "Loading results" target = load_data(args[0]) results = load_data(args[1]) total = len(target) missed = target.difference(results) guessed = total - len(missed) print "%d/%d, %d%%" % (guessed, len(target), (guessed * 100.0) / len(target)) print "Done in %d seconds" % (time.time() - start_time)
def main(args): start_time = time.time() users, repos, popular_repos = load_data(args) print "Unique users: %d" % len(users) print "Unique repos: %d" % len(repos) print "Done in %d seconds" % (time.time() - start_time)
def main(args): start_time = time.time() users, repos, popular_repos = load_data(args) target_data = remove_target_data(users) print "Writing test data" # XXX create directory output_sample("%s/data.txt" % args[3], users) output_test("%s/test.txt" % args[3], target_data) output_target("%s/target.txt" % args[3], target_data) print "Done in %d seconds" % (time.time() - start_time)