def test_save_and_load(self):
        aggr = SongAggregateScraper()
        aggr.scrape_from_echonest(BASEDIR,
                                  MIN_NUM_COMMENTS,
                                  MAX_NUM_COMMENTS,
                                  FILTER_LIST,
                                  limit=LIMIT)
        good_songs = aggr.get_passing_songs()
        aggr.save_to_file('test_file')
        # self.assertTrue(aggr.max_num_comments == MAX_NUM_COMMENTS, "aggregator metadata")
        # self.assertTrue(aggr.aggregate[3] == 2, "aggregate statistics")
        # self.assertTrue(len(good_songs) == 1, "scraper passing songs")
        # self.assertEqual(good_songs[0].comments[0], "Who still listening in 2018?", "passing songs comments")

        aggr2 = SongAggregateScraper()
        self.assertTrue(aggr2.max_num_comments is None, "aggr2 is empty")

        aggr2.load_from_file('test_file')
        recover_good_songs = aggr2.get_passing_songs()
        self.assertEqual(aggr.max_num_comments, aggr2.max_num_comments,
                         "metadata preserved")
        self.assertEqual(aggr.aggregate[3], aggr2.aggregate[3],
                         "aggregate statistics preserved")
        self.assertEqual(len(good_songs[0].comments),
                         len(recover_good_songs[0].comments),
                         "passing songs preserved")
        self.assertEqual(good_songs[0].comments[0],
                         recover_good_songs[0].comments[0],
                         "passing songs comments preserved")
 def test_scrape_echonest(self):
     aggr = SongAggregateScraper()
     aggr.scrape_from_echonest(BASEDIR,
                               MIN_NUM_COMMENTS,
                               MAX_NUM_COMMENTS,
                               FILTER_LIST,
                               limit=LIMIT)
     #aggr.print_summary()
     self.assertTrue(
         len(aggr.get_passing_songs()) >= 2, "echonest youtube scraper")
# Project Libraries
import filter as fltr
from song_aggregate import SongAggregateScraper
from five_million import random_songs

# General Libraries
""" Scrapes comments from a subset of 5 million YouTube song dataset """

BASEDIR = '../../../../Thesis/data_sample/W/D'
MIN_NUM_COMMENTS = 40
MAX_NUM_COMMENTS = 80
NUM_SONGS = 100
filter_list = [fltr.english_filter]

video_ids = random_songs(NUM_SONGS)

aggr_5m = SongAggregateScraper()
aggr_5m.scrape_video_ids(video_ids, MIN_NUM_COMMENTS, MAX_NUM_COMMENTS,
                         filter_list)

aggr_echo = SongAggregateScraper()
aggr_echo.scrape_from_echonest(BASEDIR,
                               MIN_NUM_COMMENTS,
                               MAX_NUM_COMMENTS,
                               filter_list,
                               limit=NUM_SONGS)

aggr_5m.print_summary()
aggr_echo.print_summary()