def test_run_filters_simple(self): """ testing run_filters method with english filter alone """ aggr = SongAggregateScraper() aggr.load_from_file(SAVEFILE) passing_songs = aggr.get_passing_songs() song = passing_songs[2] filtered_comments = fltr.run_filters([english_filter], song.comments) self.english_filter_3rd_song(filtered_comments)
def test_length_filter(self): """ Running a length filter on a song with lyrics as part of comments""" aggr = SongAggregateScraper() aggr.load_from_file(SAVEFILE) passing_songs = aggr.get_passing_songs() song = passing_songs[8] filtered_comments = fltr.run_filters([english_filter], song.comments) # for idx, comment in enumerate(filtered_comments): # print(idx, comment) length_filtered_comments = fltr.run_filters([length_filter], filtered_comments) # for idx, comment in enumerate(length_filtered_comments): # print(idx, comment) self.assertEqual(26, len(filtered_comments), "length unfiltered quantity") self.assertEqual(20, len(length_filtered_comments), "length filtered quantity")
def test_run_filters_medium(self): """ testing english filter with the length filter """ aggr = SongAggregateScraper() aggr.load_from_file(SAVEFILE) passing_songs = aggr.get_passing_songs() song = passing_songs[8] filtered_comments = fltr.run_filters([english_filter, length_filter], song.comments) # for idx, comment in enumerate(filtered_comments): # print(idx, comment) self.assertEqual(20, len(filtered_comments), "run_filters 2 filters")
def load_comments(): aggr = SongAggregateScraper() aggr.load_from_file(SAVEFILE) passing_songs = aggr.get_passing_songs() #filter_list = [fltr.english_filter, fltr.length_filter, fltr.youtube_topics_filter] filter_list = [ fltr.english_filter, fltr.length_filter, fltr.youtube_topics_filter, fltr.brutish_music_filter ] print(len(passing_songs)) for song in passing_songs: filtered_comments = fltr.run_filters(filter_list, song.comments) for idx, comment in enumerate(filtered_comments): print(idx, comment)
def test_substring_match_filter(self): """ testing substring matching filter, for the word 'sing' """ comment1 = 'hello I am singing' comment2 = 'hello I like to sing' comment3 = 'hello I do nothing' filtered_comments = fltr.run_filters([brutish_music_filter], [comment1, comment2, comment3]) self.assertTrue(comment1 in filtered_comments, "substring match filter") self.assertTrue(comment2 in filtered_comments, "substring match filter") self.assertTrue(comment3 not in filtered_comments, "substring match filter")
def fetch_youtube_comments(self, min_num_comments, max_num_comments, filter_list): """ Fetches comments, filters them, and analyzes them with vader, assuming video id has already been populated :param comment_count: the desired number of comments to fetch :param filter_list: the list of filters to impose on comments, while fetching them """ # Obtain Comments next_page_token = None while 1: comments_parameters = { 'part': 'snippet', 'videoId': self.video_id, 'maxResults': 100, 'order': 'relevance' } if next_page_token is not None: comments_parameters['pageToken'] = next_page_token query_comments = query(YOUTUBE_COMMENTS_URL, comments_parameters) # print(json.dumps(query_comments, indent=4, sort_keys=True)) comments = [ x['snippet']['topLevelComment']['snippet']['textOriginal'] for x in query_comments['items'] ] if len(comments) == 0: self.set_error(FetchingError.ERROR_NO_COMMENTS) return # Filter Comments While Obtaining Them comments = fltr.run_filters(filter_list, comments) num_added = min(len(comments), max_num_comments - len(self.comments)) self.comments += comments[:num_added] if len(self.comments) == max_num_comments: # Done break if 'nextPageToken' not in query_comments: if len(self.comments) < min_num_comments: self.set_error(FetchingError.ERROR_NO_TOKEN) break next_page_token = query_comments['nextPageToken']
basics_filename = SYS_DATA['BasicsFilename'] ratings_filename = SYS_DATA['RatingsFilename'] build.download_imdb_data(basics_filename) build.download_imdb_data(ratings_filename) basics = build.read_data(basics_filename) ratings = build.read_data(ratings_filename) merged = build.merge(basics, ratings) build.sanitize(merged) print('Got {} records'.format(merged.size)) build.write_file(merged, DATA_FILE_NAME) # Setup filters filter = filter.Filter(CONFIG_FILE, DATA_FILE_NAME, ENCODING, IMDB_ID, SYS_DATA['BaseBrowserURL']) # Run filters filter.run_filters() # Run app, pick a film for the user on request while True: text = input('press ENTER for a movie...') if text == '': filter.pick() else: pass