# make an instance reddit_collector = RedditCollector() # initialize bare minimum connection parameters. reddit_collector.user_agent = "reddit comment collector v0.1 by /u/jonathan_morgan" import reddit_collect.models post_qs = reddit_collect.models.Post.objects.filter( reddit_id = '1cp0i3' ) # num_comments? django_post = post_qs[ 0 ] print( django_post.num_comments ) # 115, at time of collection # pass the QuerySet to the collect_comments() method. reddit_collector.collect_comments( post_qs ) # refresh post. django_post = reddit_collect.models.Post.objects.get( reddit_id = '1cp0i3' ) # get reddiwrap post, so we can pull comments from reddit. comment_rw_post = django_post.create_reddiwrap_post() # get reddiwrap instance reddiwrap = reddit_collector.create_reddiwrap_instance() # fetch comments reddiwrap.fetch_comments( comment_rw_post ) # get top-level comments top_comments = comment_rw_post.comments
# initialize summary helper my_summary_helper = SummaryHelper() # get post QuerySet #post_qs = reddit_collect.models.Post.objects.filter( reddit_id = reddit_post_id ) post_qs = reddit_collect.models.Post.objects.filter( reddit_id__in = [ '1cp0i3', '1d67nv' ] ) # num_comments? django_post = post_qs[ 0 ] print( "==> num_comments: " + str( django_post.num_comments ) ) # 115, at time of collection my_summary_helper.set_prop_value( "num_comments", django_post.num_comments ) my_summary_helper.set_prop_desc( "num_comments", "num_comments (post)" ) # pass the QuerySet to the collect_comments() method. reddit_collector.collect_comments( post_qs ) #================================================================================ # Now, compare collect_comments() output to just grabbing comments with reddiwrap #================================================================================ # refresh post. # reddit_post_id = '1cp0i3' django_post = reddit_collect.models.Post.objects.get( reddit_id = reddit_post_id ) # get reddiwrap post, so we can pull comments from reddit. comment_rw_post = django_post.create_reddiwrap_post() # get reddiwrap instance reddiwrap = reddit_collector.create_reddiwrap_instance()
post_qs = reddit_collect.models.Post.objects.filter( subreddit_id__in = matching_subreddit_qs ) # OR less fancy #subreddit_id_list = [ 't5_2qh1o', 't5_2t22d', 't5_2qh2p', 't5_2qh13', 't5_2r9vp', 't5_2tqat', 't5_2r84s', 't5_2s7tt', 't5_2qh1e', 't5_2rfxx', 't5_2qpp6', 't5_2s3qj', 't5_2qh03', 't5_2qh0u', 't5_2qh61', 't5_2qh1i' ] #post_qs = reddit_collect.models.Post.objects.filter( subreddit_reddit_id__in = subreddit_id_list ) # limit to num_comments >= 10 post_qs = post_qs.filter( num_comments__gte = 10 ) # limit to num_comments <= 1500 post_qs = post_qs.filter( num_comments__lte = 1500 ) # limit to just those we haven't collected comments on yet. #post_qs = post_qs.filter( comment_collection_status = "new" ) # order by number of comments, DESC post_qs = post_qs.order_by( "-num_comments" ) # limit? to first 5... post_qs = post_qs[ :5 ] # OR don't #post_qs = ordered_post_qs # pass the QuerySet to the collect_comments() method. # update existing (still defaults to using bulk create for any new records). reddit_collector.collect_comments( posts_qs_IN = post_qs, do_update_existing_IN = True ) # don't update existing #reddit_collector.collect_comments( posts_qs_IN = post_qs, do_update_existing_IN = False )