Beispiel #1
0
# make an instance
reddit_collector = RedditCollector()

# initialize bare minimum connection parameters.
reddit_collector.user_agent = "reddit comment collector v0.1 by /u/jonathan_morgan"

import reddit_collect.models
post_qs = reddit_collect.models.Post.objects.filter( reddit_id = '1cp0i3' )

# num_comments?
django_post = post_qs[ 0 ]
print( django_post.num_comments ) # 115, at time of collection
    
# pass the QuerySet to the collect_comments() method.
reddit_collector.collect_comments( post_qs )

# refresh post.
django_post = reddit_collect.models.Post.objects.get( reddit_id = '1cp0i3' )

# get reddiwrap post, so we can pull comments from reddit.
comment_rw_post = django_post.create_reddiwrap_post()

# get reddiwrap instance
reddiwrap = reddit_collector.create_reddiwrap_instance()

# fetch comments
reddiwrap.fetch_comments( comment_rw_post )

# get top-level comments
top_comments = comment_rw_post.comments
# initialize summary helper
my_summary_helper = SummaryHelper()

# get post QuerySet
#post_qs = reddit_collect.models.Post.objects.filter( reddit_id = reddit_post_id )
post_qs = reddit_collect.models.Post.objects.filter( reddit_id__in = [ '1cp0i3', '1d67nv' ] )

# num_comments?
django_post = post_qs[ 0 ]
print( "==> num_comments: " + str( django_post.num_comments ) ) # 115, at time of collection

my_summary_helper.set_prop_value( "num_comments", django_post.num_comments )
my_summary_helper.set_prop_desc( "num_comments", "num_comments (post)" )
    
# pass the QuerySet to the collect_comments() method.
reddit_collector.collect_comments( post_qs )

#================================================================================
# Now, compare collect_comments() output to just grabbing comments with reddiwrap
#================================================================================

# refresh post.
# reddit_post_id = '1cp0i3'
django_post = reddit_collect.models.Post.objects.get( reddit_id = reddit_post_id )

# get reddiwrap post, so we can pull comments from reddit.
comment_rw_post = django_post.create_reddiwrap_post()

# get reddiwrap instance
reddiwrap = reddit_collector.create_reddiwrap_instance()
post_qs = reddit_collect.models.Post.objects.filter( subreddit_id__in = matching_subreddit_qs )

# OR less fancy
#subreddit_id_list = [ 't5_2qh1o', 't5_2t22d', 't5_2qh2p', 't5_2qh13', 't5_2r9vp', 't5_2tqat', 't5_2r84s', 't5_2s7tt', 't5_2qh1e', 't5_2rfxx', 't5_2qpp6', 't5_2s3qj', 't5_2qh03', 't5_2qh0u', 't5_2qh61', 't5_2qh1i' ]
#post_qs = reddit_collect.models.Post.objects.filter( subreddit_reddit_id__in = subreddit_id_list )

# limit to num_comments >= 10
post_qs = post_qs.filter( num_comments__gte = 10 )

# limit to num_comments <= 1500
post_qs = post_qs.filter( num_comments__lte = 1500 )

# limit to just those we haven't collected comments on yet.
#post_qs = post_qs.filter( comment_collection_status = "new" )

# order by number of comments, DESC
post_qs = post_qs.order_by( "-num_comments" )

# limit? to first 5...
post_qs = post_qs[ :5 ]

# OR don't
#post_qs = ordered_post_qs

# pass the QuerySet to the collect_comments() method.

# update existing (still defaults to using bulk create for any new records).
reddit_collector.collect_comments( posts_qs_IN = post_qs, do_update_existing_IN = True )

# don't update existing
#reddit_collector.collect_comments( posts_qs_IN = post_qs, do_update_existing_IN = False )