def build_sub_file(): R_predictions_file_name = '~/gom/predictions/rf_AND_gbm_1200_trees_AND_lr_sub_009.csv' print 'R_predictions_file_name = ', R_predictions_file_name user_post_like_probability_tuple_map = load_user_post_like_probability_tuple_map( R_predictions_file_name) count = 0 no_recommendation_count = 0 output_file = '/011_gbm_rf_lr.csv' print 'output_file = ', output_file with open(sub_loc + output_file, 'w') as f: f.write('"posts"\n') users = load_test_users() for uid in users: posts = find_like_posts(uid, user_post_like_probability_tuple_map) if len(posts) == 0: no_recommendation_count += 1 print ' no recommended posts: no_recommendation_count = ', no_recommendation_count f.write(' '.join(posts) + '\n') count += 1 print 'Total no_recommendation_count = ', no_recommendation_count
def build_sub_file(): R_predictions_file_name = "~/gom/predictions/rf_AND_gbm_1200_trees_AND_lr_sub_009.csv" print "R_predictions_file_name = ", R_predictions_file_name user_post_like_probability_tuple_map = load_user_post_like_probability_tuple_map(R_predictions_file_name) count = 0 no_recommendation_count = 0 output_file = "/011_gbm_rf_lr.csv" print "output_file = ", output_file with open(sub_loc + output_file, "w") as f: f.write('"posts"\n') users = load_test_users() for uid in users: posts = find_like_posts(uid, user_post_like_probability_tuple_map) if len(posts) == 0: no_recommendation_count += 1 print " no recommended posts: no_recommendation_count = ", no_recommendation_count f.write(" ".join(posts) + "\n") count += 1 print "Total no_recommendation_count = ", no_recommendation_count
print 'populate_for_first_four_weeks = ', populate_for_first_four_weeks tfu = train_feature_util(populate_for_first_four_weeks) output_file = '/test__more_features1__max-posts-' + str( max_post_to_compare) + '.csv' print 'output_file = ', output_file print 'lines_to_read = ', lines_to_read with open(generated_loc + output_file, 'w') as f_test: f_test.write( 'uid,post_id,blog_post_day,blog_like_fraction,blog_author_like_fraction,max_cosine_similarity,avg_cosine_similarity' + ',max_tag_like_fraction,avg_tag_like_fraction,max_category_like_fraction,avg_category_like_fraction\n' ) users = load_test_users() count = 0 for uid in users: liked_blogs = tfu.liked_blogs(uid) for blog in liked_blogs: for post_id, author, tags, categories, date_struct in test_blog_post_tuples_map[ blog]: topic_distribution = tfu.find_topic_distribution(blog, post_id) blog_post_day = get_day_since_test_start_date(date_struct) blog_like_fraction = tfu.get_blog_like_fraction(uid, blog) blog_author_like_fraction = tfu.get_blog_author_like_fraction( uid, blog, author) max_cosine_similarity, avg_cosine_similarity = tfu.cosine_similarity(
populate_for_first_four_weeks = False print "populate_for_first_four_weeks = ", populate_for_first_four_weeks tfu = train_feature_util(populate_for_first_four_weeks) output_file = "/test__more_features1__max-posts-" + str(max_post_to_compare) + ".csv" print "output_file = ", output_file print "lines_to_read = ", lines_to_read with open(generated_loc + output_file, "w") as f_test: f_test.write( "uid,post_id,blog_post_day,blog_like_fraction,blog_author_like_fraction,max_cosine_similarity,avg_cosine_similarity" + ",max_tag_like_fraction,avg_tag_like_fraction,max_category_like_fraction,avg_category_like_fraction\n" ) users = load_test_users() count = 0 for uid in users: liked_blogs = tfu.liked_blogs(uid) for blog in liked_blogs: for post_id, author, tags, categories, date_struct in test_blog_post_tuples_map[blog]: topic_distribution = tfu.find_topic_distribution(blog, post_id) blog_post_day = get_day_since_test_start_date(date_struct) blog_like_fraction = tfu.get_blog_like_fraction(uid, blog) blog_author_like_fraction = tfu.get_blog_author_like_fraction(uid, blog, author) max_cosine_similarity, avg_cosine_similarity = tfu.cosine_similarity( uid, max_post_to_compare, topic_distribution )