class TestStorage(unittest.TestCase): def __init__(self, *args, **kwargs): super(TestStorage, self).__init__(*args, **kwargs) # Will throw an exception if storage_creds.ini does not exist, # rendering all tests useless. Must have the config file available. self.storage = Storage(config_file='storage_creds.ini', config_header='database_info', new_collection_name='test_new_jsons', user_collection_name='test_user_jsons', vision_collection_name='test_vision_info') self.vision = VisionApi() self.client = self.storage.client self.db = self.storage.db def test_ping(self): ping_result = self.db.command('ping') self.assertEqual(ping_result['ok'], 1.0) def test_testdb(self): testdoc = self.db.test record = testdoc.find_one() self.assertEqual(record['test'], "abc123") def test_user_exists(self): self.assertTrue(self.storage.reddit_user_exists("terpin")) def test_scrape_storage(self): for scraped_info in reddit.scrape_reddit(subreddit="AdviceAnimals", post_count=1, limit=1): self.storage.add_reddit_scraped_info(scraped_info) vision_res = self.vision.detect_images_info( scraped_info.image_urls) for post, image_url, image_info in zip(scraped_info.posts, scraped_info.image_urls, vision_res['responses']): self.storage.add_vision_info(reddit.get_post_id(post), image_url=image_url, vision_json=image_info)
def main(subreddit: str, post_count: int, limit: int = None, after: str = None, log_level: str = None): if subreddit is None or post_count is None: print( 'main.py -s <subreddit> -p <post_count> ' '(optional: -l <limit>, -a <after>, -g <loglevel: {DEBUG (default), INFO}>)' ) sys.exit() # set up logger if log_level != "INFO": log_level = "DEBUG" level = logging.DEBUG else: level = "INFO" log_file_name = datetime.now().strftime( '../logs/popularitybot_%H_%M_%d_%m_%Y_{}.log'.format(log_level)) logging.basicConfig(filename=log_file_name, level=level, format='%(asctime)s %(message)s') # init vision and storage vision = VisionApi() language = LanguageApi() storage = Storage() # scrape reddit for scraped_info in reddit.scrape_reddit(subreddit=subreddit, post_count=post_count, after=after, limit=limit): if scraped_info is None: logging.critical("ScrapedInfo is None. Exiting.") sys.exit(2) # clean any scraped info that has None in their posts, image_urls, or user_info ScrapedRedditPost.clean(scraped_info) if len(scraped_info.posts) < limit: print("Removed {} posts from this scrape.".format( limit - len(scraped_info.posts))) logging.info("Removed {} posts from this scrape.".format( limit - len(scraped_info.posts))) rand_post_id = scraped_info.posts[random.randint( 0, len(scraped_info.posts) - 1)]['data']['id'] if storage.reddit_post_exists(rand_post_id): raise RuntimeError("Post already seen. Something is weird.") else: print("Post {} not yet seen. Good!".format(rand_post_id)) # now pass in the image urls into the vision api vision_res = vision.detect_images_info(scraped_info.image_urls) storage.add_reddit_scraped_info(scraped_info) for post, image_url, image_info in zip(scraped_info.posts, scraped_info.image_urls, vision_res['responses']): reddit_post_id = reddit.get_post_id(post) # sentiment analysis post_title = post['data']['title'] post_text = None if 'fullTextAnnotation' in image_info and 'text' in image_info[ 'fullTextAnnotation']: post_text = language.detect_sentiment( image_info['fullTextAnnotation']['text']) language_result = language.get_result_for_storage( reddit_post_id, post_title, post_text) # store results storage.add_vision_info(reddit_post_id, image_url=image_url, vision_json=image_info) storage.add_language_info(language_result)