def get_operation_handler(operation): if operation == 'lemmatize_wordnet': return LemmatizerWordNet() elif operation == 'pos_tag': return PosTag() elif operation == 'removecapsgreedy': return RemoveCapsGreedy() elif operation == 'removecapsnnp': return RemoveCapsPreserveNNP() elif operation == 'removepunct': return RemovePunct() elif operation == 'remove_stopwords': return RemoveStopwords() elif operation == 'sentence_tokenize': return SentenceTokenize() elif operation == 'stem_porter': return StemmerPorter() elif operation == 'stem_lancaster': return StemmerLancaster() elif operation == 'stem_snowball': return StemmerSnowball() elif operation == 'tfidf': return Tfidf() elif operation == 'topic_model': return TopicModel() elif operation == 'wordcloudop': return WordCloudOp() elif operation == 'word_tokenize_treebank': return WordTokenizeTreebank() elif operation == 'word_tokenize_whitespace_punct': return WordTokenizeWhitespacePunct() elif operation == 'word_tokenize_stanford': return WordTokenizeStanford() elif operation == 'word_tokenize_spaces': return WordTokenizeSpaces() elif operation == 'word_tokenize_tabs': return WordTokenizeTabs() elif operation == 'nlp-pos': return StanfordCoreNLP(['pos']) elif operation == 'nlp-ner': return StanfordCoreNLP(['pos', 'ner']) elif operation == 'noop': return NoOp() else: raise TransactionException("The requested operation does not exist.")
class SentenceTokenizeTest(unittest.TestCase): def setUp(self): self.op = SentenceTokenize() def test_run(self): test_data = [ Corpus( "0", "", "hello world. Will you say goodbye, world? I'll say hello.") ] desired_results = { "0": [ "hello world.", "Will you say goodbye, world?", "I'll say hello." ] } results = self.op.run(test_data) self.assertIsNotNone(results) for result in results: self.assertEqual(result['sentences'], desired_results[result['corpus_id']])
def get_operation_handler(operation): if operation == 'lemmatize_wordnet': return LemmatizerWordNet() elif operation == 'removecapsgreedy': return RemoveCapsGreedy() elif operation == 'removecapsnnp': return RemoveCapsPreserveNNP() elif operation == 'removepunct': return RemovePunct() elif operation == 'removesilence': return RemoveSilence() elif operation == 'remove_stopwords': return RemoveStopwords() elif operation == 'sentence_tokenize': return SentenceTokenize() elif operation == 'removehashtags': return RemoveHashtags() elif operation == 'removequotes': return RemoveQuotes() elif operation == 'stem_porter': return StemmerPorter() elif operation == 'stop_words': return RemoveStopwords() elif operation == 'tfidf': return Tfidf() elif operation == 'wordcloudop': return WordCloudOp() elif operation == 'word_tokenize_treebank': return WordTokenizeTreebank() elif operation == 'word_tokenize_whitespace_punct': return WordTokenizeWhitespacePunct() elif operation == 'word_tokenize_stanford': return WordTokenizeStanford() elif operation == 'nlp-pos': return StanfordCoreNLP('pos') elif operation == 'nlp-ner': return StanfordCoreNLP('ner') elif operation == 'nlp-sentiment': return StanfordCoreNLP('sentiment') elif operation == 'nlp-coref': return StanfordCoreNLP('coref') elif operation == 'nlp-relation': return StanfordCoreNLP('relation') elif operation == 'splat-disfluency': print("YOU GOT SPLATTED") return SplatDisfluency() elif operation == 'splat-ngrams': print("YOU GOT SPLATTED") return SplatNGrams() elif operation == 'splat-complexity': print("YOU GOT SPLATTED") return SplatComplexity() elif operation == 'splat-pos': print("YOU GOT SPLATTED") return SplatPOSFrequencies() elif operation == 'splat-syllables': print("YOU GOT SPLATTED") return SplatSyllables() elif operation == 'splat-pronouns': print("YOU GOT SPLATTED") return SplatPronouns() elif operation == 'char-ngrams': return CharNgrams() elif operation == 'length-stats': return LengthStatistics() elif operation == 'topic-model-10': return TopicModel(10) elif operation == 'topic-model-30': return TopicModel(30) elif operation == 'word-vector': return WordVector() elif operation == 'unsup-morph': return UnsupervisedMorphology() elif operation == 'bigram-array': return BigramArray() elif operation == 'speech-token-stats': return SpeechTokenStatistics() elif operation == 'extract_transcript': return ExtractTranscript() elif operation == 'noop': return NoOp() else: raise TransactionException( f'The requested operation "{operation}" does not exist.')
def setUp(self): self.op = SentenceTokenize()
def get_operation_handler(operation): if operation == 'lemmatize_wordnet': return LemmatizerWordNet() elif operation == 'pos_tag': return PosTag() elif operation == 'removecapsgreedy': return RemoveCapsGreedy() elif operation == 'removecapsnnp': return RemoveCapsPreserveNNP() elif operation == 'removepunct': return RemovePunct() elif operation == 'removesilence': return RemoveSilence() elif operation == 'remove_stopwords': return RemoveStopwords() elif operation == 'sentence_tokenize': return SentenceTokenize() elif operation == 'stem_porter': return StemmerPorter() elif operation == 'stop_words': return RemoveStopwords() elif operation == 'tfidf': return Tfidf() elif operation == 'topic_model': return TopicModel() elif operation == 'wordcloudop': return WordCloudOp() elif operation == 'word_tokenize_treebank': return WordTokenizeTreebank() elif operation == 'word_tokenize_whitespace_punct': return WordTokenizeWhitespacePunct() elif operation == 'word_tokenize_stanford': return WordTokenizeStanford() elif operation == 'word_tokenize_spaces': return WordTokenizeSpaces() elif operation == 'word_tokenize_tabs': return WordTokenizeTabs() elif operation == 'nlp-pos': return StanfordCoreNLP(['pos']) elif operation == 'nlp-ner': return StanfordCoreNLP(['pos', 'ner']) elif operation == 'nlp-sentiment': return StanfordCoreNLP(['parse', 'sentiment']) elif operation == 'nlp-parse': return StanfordCoreNLP(['parse']) elif operation == 'nlp-coref': return StanfordCoreNLP(['tokenize', 'ssplit', 'coref']) elif operation == 'nlp-relation': return StanfordCoreNLP(['parse', 'relation']) elif operation == 'splat-disfluency': print("YOU GOT SPLATTED") return SplatDisfluency() elif operation == 'splat-ngrams': print("YOU GOT SPLATTED") return SplatNGrams() elif operation == 'splat-complexity': print("YOU GOT SPLATTED") return SplatComplexity() elif operation == 'splat-pos': print("YOU GOT SPLATTED") return SplatPOSFrequencies() elif operation == 'splat-syllables': print("YOU GOT SPLATTED") return SplatSyllables() elif operation == 'splat-pronouns': print("YOU GOT SPLATTED") return SplatPronouns() elif operation == 'noop': return NoOp() else: raise TransactionException("The requested operation does not exist.")