class TopicModelTest(unittest.TestCase): def setUp(self): self.op = TopicModel() def test_run(self): self.op = TopicModel() self.test_data = [line.strip() for line in open('brown.txt','r')] self.test_data = [[w for w in d.lower().split() ] for d in self.test_data] self.assertIsNotNone(self.op.test_run(self.test_data))
class TopicModelTest(unittest.TestCase): def setUp(self): self.num_topics = 10 self.op = TopicModel(self.num_topics) def test_run(self): test_data = [ Corpus(str(num), "", line) for num, line in enumerate(open('brown.txt', 'r')) ] test_data = test_data[:100] results = self.op.run(test_data) self.assertIsNotNone(results) # Result is returned self.assertEqual(len(results), self.num_topics) # Correct number of topics returned for topic in results.values(): self.assertEqual(len(topic), 10) # Each topic has 10 words
def get_operation_handler(operation): if operation == 'lemmatize_wordnet': return LemmatizerWordNet() elif operation == 'pos_tag': return PosTag() elif operation == 'removecapsgreedy': return RemoveCapsGreedy() elif operation == 'removecapsnnp': return RemoveCapsPreserveNNP() elif operation == 'removepunct': return RemovePunct() elif operation == 'remove_stopwords': return RemoveStopwords() elif operation == 'sentence_tokenize': return SentenceTokenize() elif operation == 'stem_porter': return StemmerPorter() elif operation == 'stem_lancaster': return StemmerLancaster() elif operation == 'stem_snowball': return StemmerSnowball() elif operation == 'tfidf': return Tfidf() elif operation == 'topic_model': return TopicModel() elif operation == 'wordcloudop': return WordCloudOp() elif operation == 'word_tokenize_treebank': return WordTokenizeTreebank() elif operation == 'word_tokenize_whitespace_punct': return WordTokenizeWhitespacePunct() elif operation == 'word_tokenize_stanford': return WordTokenizeStanford() elif operation == 'word_tokenize_spaces': return WordTokenizeSpaces() elif operation == 'word_tokenize_tabs': return WordTokenizeTabs() elif operation == 'nlp-pos': return StanfordCoreNLP(['pos']) elif operation == 'nlp-ner': return StanfordCoreNLP(['pos', 'ner']) elif operation == 'noop': return NoOp() else: raise TransactionException("The requested operation does not exist.")
def get_operation_handler(operation): if operation == 'lemmatize_wordnet': return LemmatizerWordNet() elif operation == 'removecapsgreedy': return RemoveCapsGreedy() elif operation == 'removecapsnnp': return RemoveCapsPreserveNNP() elif operation == 'removepunct': return RemovePunct() elif operation == 'removesilence': return RemoveSilence() elif operation == 'remove_stopwords': return RemoveStopwords() elif operation == 'sentence_tokenize': return SentenceTokenize() elif operation == 'removehashtags': return RemoveHashtags() elif operation == 'removequotes': return RemoveQuotes() elif operation == 'stem_porter': return StemmerPorter() elif operation == 'stop_words': return RemoveStopwords() elif operation == 'tfidf': return Tfidf() elif operation == 'wordcloudop': return WordCloudOp() elif operation == 'word_tokenize_treebank': return WordTokenizeTreebank() elif operation == 'word_tokenize_whitespace_punct': return WordTokenizeWhitespacePunct() elif operation == 'word_tokenize_stanford': return WordTokenizeStanford() elif operation == 'nlp-pos': return StanfordCoreNLP('pos') elif operation == 'nlp-ner': return StanfordCoreNLP('ner') elif operation == 'nlp-sentiment': return StanfordCoreNLP('sentiment') elif operation == 'nlp-coref': return StanfordCoreNLP('coref') elif operation == 'nlp-relation': return StanfordCoreNLP('relation') elif operation == 'splat-disfluency': print("YOU GOT SPLATTED") return SplatDisfluency() elif operation == 'splat-ngrams': print("YOU GOT SPLATTED") return SplatNGrams() elif operation == 'splat-complexity': print("YOU GOT SPLATTED") return SplatComplexity() elif operation == 'splat-pos': print("YOU GOT SPLATTED") return SplatPOSFrequencies() elif operation == 'splat-syllables': print("YOU GOT SPLATTED") return SplatSyllables() elif operation == 'splat-pronouns': print("YOU GOT SPLATTED") return SplatPronouns() elif operation == 'char-ngrams': return CharNgrams() elif operation == 'length-stats': return LengthStatistics() elif operation == 'topic-model-10': return TopicModel(10) elif operation == 'topic-model-30': return TopicModel(30) elif operation == 'word-vector': return WordVector() elif operation == 'unsup-morph': return UnsupervisedMorphology() elif operation == 'bigram-array': return BigramArray() elif operation == 'speech-token-stats': return SpeechTokenStatistics() elif operation == 'extract_transcript': return ExtractTranscript() elif operation == 'noop': return NoOp() else: raise TransactionException( f'The requested operation "{operation}" does not exist.')
def get_operation_handler(operation): if operation == 'lemmatize_wordnet': return LemmatizerWordNet() elif operation == 'pos_tag': return PosTag() elif operation == 'removecapsgreedy': return RemoveCapsGreedy() elif operation == 'removecapsnnp': return RemoveCapsPreserveNNP() elif operation == 'removepunct': return RemovePunct() elif operation == 'removesilence': return RemoveSilence() elif operation == 'remove_stopwords': return RemoveStopwords() elif operation == 'sentence_tokenize': return SentenceTokenize() elif operation == 'stem_porter': return StemmerPorter() elif operation == 'stop_words': return RemoveStopwords() elif operation == 'tfidf': return Tfidf() elif operation == 'topic_model': return TopicModel() elif operation == 'wordcloudop': return WordCloudOp() elif operation == 'word_tokenize_treebank': return WordTokenizeTreebank() elif operation == 'word_tokenize_whitespace_punct': return WordTokenizeWhitespacePunct() elif operation == 'word_tokenize_stanford': return WordTokenizeStanford() elif operation == 'word_tokenize_spaces': return WordTokenizeSpaces() elif operation == 'word_tokenize_tabs': return WordTokenizeTabs() elif operation == 'nlp-pos': return StanfordCoreNLP(['pos']) elif operation == 'nlp-ner': return StanfordCoreNLP(['pos', 'ner']) elif operation == 'nlp-sentiment': return StanfordCoreNLP(['parse', 'sentiment']) elif operation == 'nlp-parse': return StanfordCoreNLP(['parse']) elif operation == 'nlp-coref': return StanfordCoreNLP(['tokenize', 'ssplit', 'coref']) elif operation == 'nlp-relation': return StanfordCoreNLP(['parse', 'relation']) elif operation == 'splat-disfluency': print("YOU GOT SPLATTED") return SplatDisfluency() elif operation == 'splat-ngrams': print("YOU GOT SPLATTED") return SplatNGrams() elif operation == 'splat-complexity': print("YOU GOT SPLATTED") return SplatComplexity() elif operation == 'splat-pos': print("YOU GOT SPLATTED") return SplatPOSFrequencies() elif operation == 'splat-syllables': print("YOU GOT SPLATTED") return SplatSyllables() elif operation == 'splat-pronouns': print("YOU GOT SPLATTED") return SplatPronouns() elif operation == 'noop': return NoOp() else: raise TransactionException("The requested operation does not exist.")
def setUp(self): self.op = TopicModel()
def setUp(self): self.num_topics = 10 self.op = TopicModel(self.num_topics)