def determine_topics(df, keywords_map): '''Determines the topic based on the keywords in keywords_map''' text_topic = {} #Detect topic based on keywords for i, row in df.iterrows(): topics = keywords_based_classifier(row.full_text, keywords_map) #To enable writing our list of topics to a big query table #each topic has to be contained within a list of dicts #where each dicts key is topic and item is the topic at hand. topics_list = [{'topic': ''}] for topic in topics: topics_list.append({'topic': topic}) text_topic[row.id_str] = topics_list df[u'topics'] = df['id_str'].map(text_topic) return(df)
def test_word_endings(self): text = 'Test sentence wordabc' result = keywords_based_classifier(text, self.keywords_map) self.assertIn('word ending', result)
def test_beginings(self): text = 'Test sentence abcword' result = keywords_based_classifier(text, self.keywords_map) self.assertIn('word begining', result)
def test_word_combinations_and_phrase(self): text = 'Test sentence abc def' result = keywords_based_classifier(text, self.keywords_map) self.assertIn('word combinations', result) self.assertIn('single phrase', result)
def test_word_combinations(self): text = 'Test sentence def abc' result = keywords_based_classifier(text, self.keywords_map) self.assertIn('word combinations', result)
def test_single_phrase(self): text = 'Test sentence abc def' result = keywords_based_classifier(text, self.keywords_map) self.assertIn('single phrase', result)
def test_multiple_matches(self): text = 'Test sentence abc' result = keywords_based_classifier(text, self.keywords_map) self.assertIn('multiple single words', result) self.assertIn('single word', result)
def test_single_match(self): text = 'Test sentence def' result = keywords_based_classifier(text, self.keywords_map) expected_result = ['multiple single words'] self.assertEqual(result, expected_result)
def test_no_matches(self): text = 'Test sentence' result = keywords_based_classifier(text, self.keywords_map) expected_result = [] self.assertEqual(result, expected_result)
def test_empty_text_input(self): text = '' result = keywords_based_classifier(text, self.keywords_map) expected_result = [] self.assertEqual(result, expected_result)