def build_sub_file():
    R_predictions_file_name = '~/gom/predictions/rf_AND_gbm_1200_trees_AND_lr_sub_009.csv'

    print 'R_predictions_file_name = ', R_predictions_file_name
    user_post_like_probability_tuple_map = load_user_post_like_probability_tuple_map(
        R_predictions_file_name)
    count = 0
    no_recommendation_count = 0

    output_file = '/011_gbm_rf_lr.csv'
    print 'output_file = ', output_file
    with open(sub_loc + output_file, 'w') as f:
        f.write('"posts"\n')
        users = load_test_users()
        for uid in users:
            posts = find_like_posts(uid, user_post_like_probability_tuple_map)
            if len(posts) == 0:
                no_recommendation_count += 1
                print '    no recommended posts: no_recommendation_count = ', no_recommendation_count

            f.write(' '.join(posts) + '\n')

            count += 1

    print 'Total no_recommendation_count = ', no_recommendation_count
Beispiel #2
0
def build_sub_file():
    R_predictions_file_name = "~/gom/predictions/rf_AND_gbm_1200_trees_AND_lr_sub_009.csv"

    print "R_predictions_file_name = ", R_predictions_file_name
    user_post_like_probability_tuple_map = load_user_post_like_probability_tuple_map(R_predictions_file_name)
    count = 0
    no_recommendation_count = 0

    output_file = "/011_gbm_rf_lr.csv"
    print "output_file = ", output_file
    with open(sub_loc + output_file, "w") as f:
        f.write('"posts"\n')
        users = load_test_users()
        for uid in users:
            posts = find_like_posts(uid, user_post_like_probability_tuple_map)
            if len(posts) == 0:
                no_recommendation_count += 1
                print "    no recommended posts: no_recommendation_count = ", no_recommendation_count

            f.write(" ".join(posts) + "\n")

            count += 1

    print "Total no_recommendation_count = ", no_recommendation_count
Beispiel #3
0
print 'populate_for_first_four_weeks = ', populate_for_first_four_weeks
tfu = train_feature_util(populate_for_first_four_weeks)

output_file = '/test__more_features1__max-posts-' + str(
    max_post_to_compare) + '.csv'
print 'output_file = ', output_file
print 'lines_to_read = ', lines_to_read

with open(generated_loc + output_file, 'w') as f_test:
    f_test.write(
        'uid,post_id,blog_post_day,blog_like_fraction,blog_author_like_fraction,max_cosine_similarity,avg_cosine_similarity'
        +
        ',max_tag_like_fraction,avg_tag_like_fraction,max_category_like_fraction,avg_category_like_fraction\n'
    )

    users = load_test_users()

    count = 0
    for uid in users:
        liked_blogs = tfu.liked_blogs(uid)

        for blog in liked_blogs:
            for post_id, author, tags, categories, date_struct in test_blog_post_tuples_map[
                    blog]:
                topic_distribution = tfu.find_topic_distribution(blog, post_id)

                blog_post_day = get_day_since_test_start_date(date_struct)
                blog_like_fraction = tfu.get_blog_like_fraction(uid, blog)
                blog_author_like_fraction = tfu.get_blog_author_like_fraction(
                    uid, blog, author)
                max_cosine_similarity, avg_cosine_similarity = tfu.cosine_similarity(
populate_for_first_four_weeks = False
print "populate_for_first_four_weeks = ", populate_for_first_four_weeks
tfu = train_feature_util(populate_for_first_four_weeks)

output_file = "/test__more_features1__max-posts-" + str(max_post_to_compare) + ".csv"
print "output_file = ", output_file
print "lines_to_read = ", lines_to_read

with open(generated_loc + output_file, "w") as f_test:
    f_test.write(
        "uid,post_id,blog_post_day,blog_like_fraction,blog_author_like_fraction,max_cosine_similarity,avg_cosine_similarity"
        + ",max_tag_like_fraction,avg_tag_like_fraction,max_category_like_fraction,avg_category_like_fraction\n"
    )

    users = load_test_users()

    count = 0
    for uid in users:
        liked_blogs = tfu.liked_blogs(uid)

        for blog in liked_blogs:
            for post_id, author, tags, categories, date_struct in test_blog_post_tuples_map[blog]:
                topic_distribution = tfu.find_topic_distribution(blog, post_id)

                blog_post_day = get_day_since_test_start_date(date_struct)
                blog_like_fraction = tfu.get_blog_like_fraction(uid, blog)
                blog_author_like_fraction = tfu.get_blog_author_like_fraction(uid, blog, author)
                max_cosine_similarity, avg_cosine_similarity = tfu.cosine_similarity(
                    uid, max_post_to_compare, topic_distribution
                )