Example #1
0
def main(args):
    start_time = time.time()
    users, repos, popular_repos, superprojects = load_data(args)

    print "Processing test users"
    test = open(args[3], 'r')
    results = open(args[4], 'w')

    for line in test.readlines():
        user_id = int(line.strip())

        if not user_id in users:
            user = User(user_id)
        else:
            user = users[user_id]

        suggested_repos = suggest_repos(repos, popular_repos, users, user,
                superprojects)
        suggested_repos.sort()

        results.write('%d:' % user.id)
        results.write(','.join([str(x.id) for x in suggested_repos]))
        results.write('\n')
        results.flush()

    test.close()
    results.close()
    print "Done in %d seconds" % (time.time() - start_time)
Example #2
0
def main(args):
    start_time = time.time()
    
    print "Loading results"
    target = load_data(args[0])
    results = load_data(args[1])

    total = len(target)
    missed = target.difference(results)

    guessed = total - len(missed)
    print "%d/%d, %d%%" % (guessed, len(target),
            (guessed * 100.0) / len(target))


    print "Done in %d seconds" % (time.time() - start_time)
Example #3
0
def main(args):
    start_time = time.time()
    users, repos, popular_repos = load_data(args)

    print "Unique users: %d" % len(users)
    print "Unique repos: %d" % len(repos)

    print "Done in %d seconds" % (time.time() - start_time)
Example #4
0
def main(args):
    start_time = time.time()
    users, repos, popular_repos = load_data(args)

    target_data = remove_target_data(users)

    print "Writing test data"
    # XXX create directory
    output_sample("%s/data.txt" % args[3], users)
    output_test("%s/test.txt" % args[3], target_data)
    output_target("%s/target.txt" % args[3], target_data)

    print "Done in %d seconds" % (time.time() - start_time)