def test_run(self): self.op = WordCloudOp() self.test_data = [ Corpus("0", "hello", "hello world hello hello world test") ] desired_results = [] desired_results.append({"term": "hello", "frequency": 3}) desired_results.append({"term": "world", "frequency": 2}) desired_results.append({"term": "test", "frequency": 1}) results = self.op.run(self.test_data) for result in results: self.assertTrue(result in desired_results)
def test_run(self): self.op = WordCloudOp() self.test_data = [ Corpus("0","hello", "hello world hello hello world test") ] desired_results = [] desired_results.append({ "term" : "hello", "frequency" : 3}) desired_results.append({ "term" : "world", "frequency" : 2}) desired_results.append({ "term" : "test", "frequency" : 1}) results = self.op.run(self.test_data) for result in results: self.assertTrue(result in desired_results)
class WordCloudOpTest(unittest.TestCase): def setUp(self): self.op = WordCloudOp() def test_run(self): test_data = [Corpus("0", "hello", "hello world hello hello world test")] WordTokenizeWhitespacePunct().run(test_data) desired_results = [{"term": "hello", "frequency": 3}, {"term": "world", "frequency": 2}, {"term": "test", "frequency": 1}] results = self.op.run(test_data) self.assertEqual(results["sentences"], desired_results)
def get_operation_handler(operation): if operation == 'lemmatize_wordnet': return LemmatizerWordNet() elif operation == 'pos_tag': return PosTag() elif operation == 'removecapsgreedy': return RemoveCapsGreedy() elif operation == 'removecapsnnp': return RemoveCapsPreserveNNP() elif operation == 'removepunct': return RemovePunct() elif operation == 'remove_stopwords': return RemoveStopwords() elif operation == 'sentence_tokenize': return SentenceTokenize() elif operation == 'stem_porter': return StemmerPorter() elif operation == 'stem_lancaster': return StemmerLancaster() elif operation == 'stem_snowball': return StemmerSnowball() elif operation == 'tfidf': return Tfidf() elif operation == 'topic_model': return TopicModel() elif operation == 'wordcloudop': return WordCloudOp() elif operation == 'word_tokenize_treebank': return WordTokenizeTreebank() elif operation == 'word_tokenize_whitespace_punct': return WordTokenizeWhitespacePunct() elif operation == 'word_tokenize_stanford': return WordTokenizeStanford() elif operation == 'word_tokenize_spaces': return WordTokenizeSpaces() elif operation == 'word_tokenize_tabs': return WordTokenizeTabs() elif operation == 'nlp-pos': return StanfordCoreNLP(['pos']) elif operation == 'nlp-ner': return StanfordCoreNLP(['pos', 'ner']) elif operation == 'noop': return NoOp() else: raise TransactionException("The requested operation does not exist.")
def get_operation_handler(operation): if operation == 'lemmatize_wordnet': return LemmatizerWordNet() elif operation == 'removecapsgreedy': return RemoveCapsGreedy() elif operation == 'removecapsnnp': return RemoveCapsPreserveNNP() elif operation == 'removepunct': return RemovePunct() elif operation == 'removesilence': return RemoveSilence() elif operation == 'remove_stopwords': return RemoveStopwords() elif operation == 'sentence_tokenize': return SentenceTokenize() elif operation == 'removehashtags': return RemoveHashtags() elif operation == 'removequotes': return RemoveQuotes() elif operation == 'stem_porter': return StemmerPorter() elif operation == 'stop_words': return RemoveStopwords() elif operation == 'tfidf': return Tfidf() elif operation == 'wordcloudop': return WordCloudOp() elif operation == 'word_tokenize_treebank': return WordTokenizeTreebank() elif operation == 'word_tokenize_whitespace_punct': return WordTokenizeWhitespacePunct() elif operation == 'word_tokenize_stanford': return WordTokenizeStanford() elif operation == 'nlp-pos': return StanfordCoreNLP('pos') elif operation == 'nlp-ner': return StanfordCoreNLP('ner') elif operation == 'nlp-sentiment': return StanfordCoreNLP('sentiment') elif operation == 'nlp-coref': return StanfordCoreNLP('coref') elif operation == 'nlp-relation': return StanfordCoreNLP('relation') elif operation == 'splat-disfluency': print("YOU GOT SPLATTED") return SplatDisfluency() elif operation == 'splat-ngrams': print("YOU GOT SPLATTED") return SplatNGrams() elif operation == 'splat-complexity': print("YOU GOT SPLATTED") return SplatComplexity() elif operation == 'splat-pos': print("YOU GOT SPLATTED") return SplatPOSFrequencies() elif operation == 'splat-syllables': print("YOU GOT SPLATTED") return SplatSyllables() elif operation == 'splat-pronouns': print("YOU GOT SPLATTED") return SplatPronouns() elif operation == 'char-ngrams': return CharNgrams() elif operation == 'length-stats': return LengthStatistics() elif operation == 'topic-model-10': return TopicModel(10) elif operation == 'topic-model-30': return TopicModel(30) elif operation == 'word-vector': return WordVector() elif operation == 'unsup-morph': return UnsupervisedMorphology() elif operation == 'bigram-array': return BigramArray() elif operation == 'speech-token-stats': return SpeechTokenStatistics() elif operation == 'extract_transcript': return ExtractTranscript() elif operation == 'noop': return NoOp() else: raise TransactionException( f'The requested operation "{operation}" does not exist.')
def setUp(self): self.op = WordCloudOp()
def get_operation_handler(operation): if operation == 'lemmatize_wordnet': return LemmatizerWordNet() elif operation == 'pos_tag': return PosTag() elif operation == 'removecapsgreedy': return RemoveCapsGreedy() elif operation == 'removecapsnnp': return RemoveCapsPreserveNNP() elif operation == 'removepunct': return RemovePunct() elif operation == 'removesilence': return RemoveSilence() elif operation == 'remove_stopwords': return RemoveStopwords() elif operation == 'sentence_tokenize': return SentenceTokenize() elif operation == 'stem_porter': return StemmerPorter() elif operation == 'stop_words': return RemoveStopwords() elif operation == 'tfidf': return Tfidf() elif operation == 'topic_model': return TopicModel() elif operation == 'wordcloudop': return WordCloudOp() elif operation == 'word_tokenize_treebank': return WordTokenizeTreebank() elif operation == 'word_tokenize_whitespace_punct': return WordTokenizeWhitespacePunct() elif operation == 'word_tokenize_stanford': return WordTokenizeStanford() elif operation == 'word_tokenize_spaces': return WordTokenizeSpaces() elif operation == 'word_tokenize_tabs': return WordTokenizeTabs() elif operation == 'nlp-pos': return StanfordCoreNLP(['pos']) elif operation == 'nlp-ner': return StanfordCoreNLP(['pos', 'ner']) elif operation == 'nlp-sentiment': return StanfordCoreNLP(['parse', 'sentiment']) elif operation == 'nlp-parse': return StanfordCoreNLP(['parse']) elif operation == 'nlp-coref': return StanfordCoreNLP(['tokenize', 'ssplit', 'coref']) elif operation == 'nlp-relation': return StanfordCoreNLP(['parse', 'relation']) elif operation == 'splat-disfluency': print("YOU GOT SPLATTED") return SplatDisfluency() elif operation == 'splat-ngrams': print("YOU GOT SPLATTED") return SplatNGrams() elif operation == 'splat-complexity': print("YOU GOT SPLATTED") return SplatComplexity() elif operation == 'splat-pos': print("YOU GOT SPLATTED") return SplatPOSFrequencies() elif operation == 'splat-syllables': print("YOU GOT SPLATTED") return SplatSyllables() elif operation == 'splat-pronouns': print("YOU GOT SPLATTED") return SplatPronouns() elif operation == 'noop': return NoOp() else: raise TransactionException("The requested operation does not exist.")