Example #1
0
class TestStorage(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        super(TestStorage, self).__init__(*args, **kwargs)

        # Will throw an exception if storage_creds.ini does not exist,
        # rendering all tests useless. Must have the config file available.
        self.storage = Storage(config_file='storage_creds.ini',
                               config_header='database_info',
                               new_collection_name='test_new_jsons',
                               user_collection_name='test_user_jsons',
                               vision_collection_name='test_vision_info')
        self.vision = VisionApi()
        self.client = self.storage.client
        self.db = self.storage.db

    def test_ping(self):
        ping_result = self.db.command('ping')
        self.assertEqual(ping_result['ok'], 1.0)

    def test_testdb(self):
        testdoc = self.db.test
        record = testdoc.find_one()
        self.assertEqual(record['test'], "abc123")

    def test_user_exists(self):
        self.assertTrue(self.storage.reddit_user_exists("terpin"))

    def test_scrape_storage(self):
        for scraped_info in reddit.scrape_reddit(subreddit="AdviceAnimals",
                                                 post_count=1,
                                                 limit=1):
            self.storage.add_reddit_scraped_info(scraped_info)

            vision_res = self.vision.detect_images_info(
                scraped_info.image_urls)
            for post, image_url, image_info in zip(scraped_info.posts,
                                                   scraped_info.image_urls,
                                                   vision_res['responses']):
                self.storage.add_vision_info(reddit.get_post_id(post),
                                             image_url=image_url,
                                             vision_json=image_info)
Example #2
0
def main(subreddit: str,
         post_count: int,
         limit: int = None,
         after: str = None,
         log_level: str = None):
    if subreddit is None or post_count is None:
        print(
            'main.py -s <subreddit> -p <post_count> '
            '(optional: -l <limit>, -a <after>, -g <loglevel: {DEBUG (default), INFO}>)'
        )
        sys.exit()

    # set up logger
    if log_level != "INFO":
        log_level = "DEBUG"
        level = logging.DEBUG
    else:
        level = "INFO"
    log_file_name = datetime.now().strftime(
        '../logs/popularitybot_%H_%M_%d_%m_%Y_{}.log'.format(log_level))
    logging.basicConfig(filename=log_file_name,
                        level=level,
                        format='%(asctime)s %(message)s')

    # init vision and storage
    vision = VisionApi()
    language = LanguageApi()
    storage = Storage()

    # scrape reddit
    for scraped_info in reddit.scrape_reddit(subreddit=subreddit,
                                             post_count=post_count,
                                             after=after,
                                             limit=limit):
        if scraped_info is None:
            logging.critical("ScrapedInfo is None. Exiting.")
            sys.exit(2)
        # clean any scraped info that has None in their posts, image_urls, or user_info
        ScrapedRedditPost.clean(scraped_info)
        if len(scraped_info.posts) < limit:
            print("Removed {} posts from this scrape.".format(
                limit - len(scraped_info.posts)))
            logging.info("Removed {} posts from this scrape.".format(
                limit - len(scraped_info.posts)))
        rand_post_id = scraped_info.posts[random.randint(
            0,
            len(scraped_info.posts) - 1)]['data']['id']
        if storage.reddit_post_exists(rand_post_id):
            raise RuntimeError("Post already seen. Something is weird.")
        else:
            print("Post {} not yet seen. Good!".format(rand_post_id))
        # now pass in the image urls into the vision api
        vision_res = vision.detect_images_info(scraped_info.image_urls)
        storage.add_reddit_scraped_info(scraped_info)
        for post, image_url, image_info in zip(scraped_info.posts,
                                               scraped_info.image_urls,
                                               vision_res['responses']):
            reddit_post_id = reddit.get_post_id(post)
            # sentiment analysis
            post_title = post['data']['title']
            post_text = None
            if 'fullTextAnnotation' in image_info and 'text' in image_info[
                    'fullTextAnnotation']:
                post_text = language.detect_sentiment(
                    image_info['fullTextAnnotation']['text'])
            language_result = language.get_result_for_storage(
                reddit_post_id, post_title, post_text)
            # store results
            storage.add_vision_info(reddit_post_id,
                                    image_url=image_url,
                                    vision_json=image_info)
            storage.add_language_info(language_result)