Example #1
0
    def test_run_filters_simple(self):
        """ testing run_filters method with english filter alone """
        aggr = SongAggregateScraper()
        aggr.load_from_file(SAVEFILE)
        passing_songs = aggr.get_passing_songs()

        song = passing_songs[2]
        filtered_comments = fltr.run_filters([english_filter], song.comments)
        self.english_filter_3rd_song(filtered_comments)
Example #2
0
    def test_length_filter(self):
        """ Running a length filter on a song with lyrics as part of comments"""
        aggr = SongAggregateScraper()
        aggr.load_from_file(SAVEFILE)
        passing_songs = aggr.get_passing_songs()

        song = passing_songs[8]
        filtered_comments = fltr.run_filters([english_filter], song.comments)
        # for idx, comment in enumerate(filtered_comments):
        #     print(idx, comment)

        length_filtered_comments = fltr.run_filters([length_filter],
                                                    filtered_comments)
        # for idx, comment in enumerate(length_filtered_comments):
        #     print(idx, comment)

        self.assertEqual(26, len(filtered_comments),
                         "length unfiltered quantity")
        self.assertEqual(20, len(length_filtered_comments),
                         "length filtered quantity")
Example #3
0
    def test_run_filters_medium(self):
        """ testing english filter with the length filter """
        aggr = SongAggregateScraper()
        aggr.load_from_file(SAVEFILE)
        passing_songs = aggr.get_passing_songs()

        song = passing_songs[8]
        filtered_comments = fltr.run_filters([english_filter, length_filter],
                                             song.comments)
        # for idx, comment in enumerate(filtered_comments):
        #     print(idx, comment)

        self.assertEqual(20, len(filtered_comments), "run_filters 2 filters")
def load_comments():
    aggr = SongAggregateScraper()
    aggr.load_from_file(SAVEFILE)
    passing_songs = aggr.get_passing_songs()
    #filter_list = [fltr.english_filter, fltr.length_filter, fltr.youtube_topics_filter]
    filter_list = [
        fltr.english_filter, fltr.length_filter, fltr.youtube_topics_filter,
        fltr.brutish_music_filter
    ]

    print(len(passing_songs))

    for song in passing_songs:
        filtered_comments = fltr.run_filters(filter_list, song.comments)
        for idx, comment in enumerate(filtered_comments):
            print(idx, comment)
Example #5
0
    def test_substring_match_filter(self):
        """ testing substring matching filter, for the word 'sing' """

        comment1 = 'hello I am singing'
        comment2 = 'hello I like to sing'
        comment3 = 'hello I do nothing'

        filtered_comments = fltr.run_filters([brutish_music_filter],
                                             [comment1, comment2, comment3])

        self.assertTrue(comment1 in filtered_comments,
                        "substring match filter")
        self.assertTrue(comment2 in filtered_comments,
                        "substring match filter")
        self.assertTrue(comment3 not in filtered_comments,
                        "substring match filter")
Example #6
0
    def fetch_youtube_comments(self, min_num_comments, max_num_comments,
                               filter_list):
        """
        Fetches comments, filters them, and analyzes them with vader, assuming video id has already been populated
        :param comment_count: the desired number of comments to fetch
        :param filter_list: the list of filters to impose on comments, while fetching them
        """

        # Obtain Comments
        next_page_token = None
        while 1:
            comments_parameters = {
                'part': 'snippet',
                'videoId': self.video_id,
                'maxResults': 100,
                'order': 'relevance'
            }
            if next_page_token is not None:
                comments_parameters['pageToken'] = next_page_token

            query_comments = query(YOUTUBE_COMMENTS_URL, comments_parameters)
            # print(json.dumps(query_comments, indent=4, sort_keys=True))

            comments = [
                x['snippet']['topLevelComment']['snippet']['textOriginal']
                for x in query_comments['items']
            ]
            if len(comments) == 0:
                self.set_error(FetchingError.ERROR_NO_COMMENTS)
                return

            # Filter Comments While Obtaining Them
            comments = fltr.run_filters(filter_list, comments)

            num_added = min(len(comments),
                            max_num_comments - len(self.comments))
            self.comments += comments[:num_added]

            if len(self.comments) == max_num_comments:
                # Done
                break
            if 'nextPageToken' not in query_comments:
                if len(self.comments) < min_num_comments:
                    self.set_error(FetchingError.ERROR_NO_TOKEN)
                break
            next_page_token = query_comments['nextPageToken']
    basics_filename = SYS_DATA['BasicsFilename']
    ratings_filename = SYS_DATA['RatingsFilename']

    build.download_imdb_data(basics_filename)
    build.download_imdb_data(ratings_filename)

    basics = build.read_data(basics_filename)
    ratings = build.read_data(ratings_filename)

    merged = build.merge(basics, ratings)

    build.sanitize(merged)

    print('Got {} records'.format(merged.size))
    build.write_file(merged, DATA_FILE_NAME)

# Setup filters
filter = filter.Filter(CONFIG_FILE, DATA_FILE_NAME, ENCODING, IMDB_ID,
                       SYS_DATA['BaseBrowserURL'])

# Run filters
filter.run_filters()

# Run app, pick a film for the user on request
while True:
    text = input('press ENTER for a movie...')
    if text == '':
        filter.pick()
    else:
        pass