Python filter_comments_by_posts Examples, cascade_manip.filter_comments_by_posts Python Examples

Example #1

0

Show file

def load_subreddit_cascades(subreddit, domain, cascades, comments):
    #load filtered cascades for this subreddit
    filtered_cascades, filtered_comments = cascade_manip.load_filtered_cascades(
        domain, subreddit)

    #don't exist, filter them now
    if filtered_cascades == False:

        #have we loaded the raw cascades/comments yet? if not, do it now
        #(waiting until now in case we have all the filtered versions and don't need these at all)
        if cascades == None or comments == None:
            #build/load cascades (auto-load as a result, either raw data or cached cascades)
            cascades, comments, missing_posts, missing_comments = cascade_analysis.build_cascades(
                domain)
            #optional: filter out cascades with any missing elements (posts or comments)
            cascades, comments = cascade_manip.remove_missing(
                domain, cascades, comments)

        #filter cascades by a particular subreddit
        filtered_cascades = cascade_manip.filter_cascades_by_subreddit(
            cascades, subreddit)
        #and filter comments to match those posts
        filtered_cascades, filtered_comments = cascade_manip.filter_comments_by_posts(
            filtered_cascades, comments)
        #save these filtered posts/comments for easier loading later
        cascade_manip.save_cascades(code, filtered_cascades, subreddit)
        cascade_manip.save_comments(code, filtered_comments, subreddit)

    return filtered_cascades, filtered_comments

Example #2

0

Show file

        #have we loaded the raw cascades/comments yet? if not, do it now
        #(waiting until now in case we have all the filtered versions and don't need these at all)
        if cascades == None or comments == None:
            #build/load cascades (auto-load as a result, either raw data or cached cascades)
            cascades, comments, missing_posts, missing_comments = cascade_analysis.build_cascades(
                code)
            #optional: filter out cascades with any missing elements (posts or comments)
            cascades, comments = cascade_manip.remove_missing(
                code, cascades, comments)

        #filter cascades by a particular subreddit
        filtered_cascades = cascade_manip.filter_cascades_by_subreddit(
            cascades, subreddit)
        #and filter comments to match those posts
        filtered_cascades, filtered_comments = cascade_manip.filter_comments_by_posts(
            filtered_cascades, comments)
        #save these filtered posts/comments for easier loading later
        cascade_manip.save_cascades(code, filtered_cascades, subreddit)
        cascade_manip.save_comments(code, filtered_comments, subreddit)

    #fit params to all of the filtered cascades, loading checkpoints if they exist
    all_params = cascade_analysis.fit_all_cascades(code, filtered_cascades,
                                                   filtered_comments,
                                                   pickle_save, subreddit)

    #if not saving to pickle, save to text file now
    if pickle_save == False:
        with open("data_cache/txt_params/%s_params.txt" % subreddit, "w") as f:
            for post_id, params in all_params.items():
                f.write(post_id + " ")
                for i in range(len(params)):

Example #3

0

Show file

    total_update_time = 0
    total_insert_count = 0
    total_insert_time = 0
    total_remove_count = 0
    total_remove_time = 0
    total_match_count = 0

#process all posts (or just one, if doing that)
print("Processing", len(sim_post_id_list), "post",
      "s" if len(sim_post_id_list) > 1 else "")
for sim_post_id in sim_post_id_list:

    #pull out just the post (and associated comments) we care about
    sim_post = raw_posts[sim_post_id]
    #and filter to comments
    junk, post_comments = cascade_manip.filter_comments_by_posts(
        {sim_post_id: sim_post}, raw_comments, False)
    if batch == False:
        print("Simulation post has", len(post_comments), "comments\n")

    #GRAPH INFER
    inferred_params = functions_paper_model.graph_infer(
        sim_post, sim_post_id, group, max_nodes, min_node_quality,
        estimate_initial_params)
    #inferred_params = [1.73166, 0.651482, 1.08986, 0.762604, 2.49934, 0.19828]		#placeholder if skipping the infer
    if batch == False:
        print("Inferred params:", inferred_params, "\n")

    #REFINE PARAMS - for partial observed trees
    partial_fit_params = fit_partial_cascade.fit_partial_cascade(
        sim_post, post_comments, time_observed, inferred_params, display=False)
    if batch == False:

Example #4

0

Show file

File: reddit_model.py Project: rkrohn/redditmodel

#load cascades and fit params to all of them, loading checkpoints if they exist
'''
cascades, comments = cascade_manip.load_filtered_cascades(code, subreddit)	#load posts + comments
cascade_analysis.fit_all_cascades(code, cascades, comments, subreddit)		#load saved fits, and fit the rest
'''


#or, load specific saved cascade params from file
cascades, comments = cascade_manip.load_filtered_cascades(code, subreddit)	#load posts + comments
cascade_params = cascade_manip.load_cascade_params(code, subreddit + "100")


#filter cascades/comments to fitted posts (for testing)
cascades = {post_id : post for post_id, post in cascades.items() if post_id in cascade_params}
print("Filtered to", len(cascades), "posts with fitted parameters")
cascade_manip.filter_comments_by_posts(cascades, comments)


#pull out one random cascade from those loaded for testing, remove from all cascades

test_post_id = "WYNap8ZYQ6kc0lKZRAX3tA" #"BitkI6YOhOueIKiphn5okA" #"kRl5UtFpGFEaAQ374AREfw" #random.choice(list(cascades.keys()))
test_post = cascades.pop(test_post_id)
test_post_params = cascade_params.pop(test_post_id)
print("Random post:", test_post_id, "\n   " + test_post['title_m'], "\n  ", test_post_params)



#build a ParamGraph for set of posts
pgraph = ParamGraph()
pgraph.build_graph(cascades, cascade_params)
#file_utils.save_pickle(pgraph, "class_pickle_test.pkl")	#save class instance for later

Example #5

0

Show file

fit_fail = set(
    file_utils.load_json("model_files/params/hackernews_failed_param_fit.txt"))

#load hackernews cascades
posts = file_utils.load_pickle(
    "data_cache/hackernews_cascades/hackernews_cascade_posts.pkl")
comments = file_utils.load_pickle(
    "data_cache/hackernews_cascades/hackernews_cascade_comments.pkl")
print("Loaded", len(posts), "posts and", len(comments), "comments")

#remove missing
posts, comments = cascade_manip.remove_missing(code, posts, comments)

#remove posts for which the fit failed
posts = {key: value for (key, value) in posts.items() if key not in fit_fail}
posts, comments = cascade_manip.filter_comments_by_posts(posts, comments)
print("Down to", len(posts), "posts and", len(comments), "comments")

#filenames of filtered cascades and comments
cascades_filepath = "data_cache/filtered_cascades/%s_%s_cascades.pkl"  #domain and subreddit cascades
comments_filepath = "data_cache/filtered_cascades/%s_%s_comments.pkl"  #domain and subreddit comments

#save to same place as other filtered cascades - use hackernews as domain and subreddit
file_utils.save_pickle(posts, cascades_filepath % (code, code))
file_utils.save_pickle(comments, comments_filepath % (code, code))

#add hackernews to subreddit -> domain mapping
subs = file_utils.load_pickle("model_files/subreddits.pkl")
subs[
    'hackernews_sub'] = 'cve'  #redirect existing cve sub for this file - shouldn't be a problem
subs[code] = code