Esempio n. 1
0
 def test_run(self):
     self.op = PosTag()
     self.test_data = 'the old man the boat. john ate an old sandwich, unfortunately.'
     self.assertEqual(self.op.run(self.test_data),
     [('the', 'DT'), ('old', 'JJ'), ('man', 'NN'), ('the', 'DT'), \
      ('boat', 'NN'), ('john', 'NN'), ('ate', 'VBD'), ('an', 'DT'),\
      ('old', 'JJ'), ('sandwich', 'NN'), ('unfortunately', 'RB')]
                      )
Esempio n. 2
0
def get_operation_handler(operation):
    if operation == 'lemmatize_wordnet':
        return LemmatizerWordNet()
    elif operation == 'pos_tag':
        return PosTag()
    elif operation == 'removecapsgreedy':
        return RemoveCapsGreedy()
    elif operation == 'removecapsnnp':
        return RemoveCapsPreserveNNP()
    elif operation == 'removepunct':
        return RemovePunct()
    elif operation == 'remove_stopwords':
        return RemoveStopwords()
    elif operation == 'sentence_tokenize':
        return SentenceTokenize()
    elif operation == 'stem_porter':
        return StemmerPorter()
    elif operation == 'stem_lancaster':
        return StemmerLancaster()
    elif operation == 'stem_snowball':
        return StemmerSnowball()
    elif operation == 'tfidf':
        return Tfidf()
    elif operation == 'topic_model':
        return TopicModel()
    elif operation == 'wordcloudop':
        return WordCloudOp()
    elif operation == 'word_tokenize_treebank':
        return WordTokenizeTreebank()
    elif operation == 'word_tokenize_whitespace_punct':
        return WordTokenizeWhitespacePunct()
    elif operation == 'word_tokenize_stanford':
        return WordTokenizeStanford()
    elif operation == 'word_tokenize_spaces':
        return WordTokenizeSpaces()
    elif operation == 'word_tokenize_tabs':
        return WordTokenizeTabs()
    elif operation == 'nlp-pos':
        return StanfordCoreNLP(['pos'])
    elif operation == 'nlp-ner':
        return StanfordCoreNLP(['pos', 'ner'])
    elif operation == 'noop':
        return NoOp()
    else:
        raise TransactionException("The requested operation does not exist.")
Esempio n. 3
0
 def test_run(self):
     self.op = PosTag()
     self.test_data = "the old man the boat. john ate an old sandwich, unfortunately."
     self.assertEqual(
         self.op.run(self.test_data),
         [
             ("the", "DT"),
             ("old", "JJ"),
             ("man", "NN"),
             ("the", "DT"),
             ("boat", "NN"),
             ("john", "NN"),
             ("ate", "VBD"),
             ("an", "DT"),
             ("old", "JJ"),
             ("sandwich", "NN"),
             ("unfortunately", "RB"),
         ],
     )
Esempio n. 4
0
 def setUp(self):
     self.op = PosTag()
Esempio n. 5
0
def get_operation_handler(operation):
    if operation == 'lemmatize_wordnet':
        return LemmatizerWordNet()
    elif operation == 'pos_tag':
        return PosTag()
    elif operation == 'removecapsgreedy':
        return RemoveCapsGreedy()
    elif operation == 'removecapsnnp':
        return RemoveCapsPreserveNNP()
    elif operation == 'removepunct':
        return RemovePunct()
    elif operation == 'removesilence':
        return RemoveSilence()
    elif operation == 'remove_stopwords':
        return RemoveStopwords()
    elif operation == 'sentence_tokenize':
        return SentenceTokenize()
    elif operation == 'stem_porter':
        return StemmerPorter()
    elif operation == 'stop_words':
        return RemoveStopwords()
    elif operation == 'tfidf':
        return Tfidf()
    elif operation == 'topic_model':
        return TopicModel()
    elif operation == 'wordcloudop':
        return WordCloudOp()
    elif operation == 'word_tokenize_treebank':
        return WordTokenizeTreebank()
    elif operation == 'word_tokenize_whitespace_punct':
        return WordTokenizeWhitespacePunct()
    elif operation == 'word_tokenize_stanford':
        return WordTokenizeStanford()
    elif operation == 'word_tokenize_spaces':
        return WordTokenizeSpaces()
    elif operation == 'word_tokenize_tabs':
        return WordTokenizeTabs()
    elif operation == 'nlp-pos':
        return StanfordCoreNLP(['pos'])
    elif operation == 'nlp-ner':
        return StanfordCoreNLP(['pos', 'ner'])
    elif operation == 'nlp-sentiment':
        return StanfordCoreNLP(['parse', 'sentiment'])
    elif operation == 'nlp-parse':
        return StanfordCoreNLP(['parse'])
    elif operation == 'nlp-coref':
        return StanfordCoreNLP(['tokenize', 'ssplit', 'coref'])
    elif operation == 'nlp-relation':
        return StanfordCoreNLP(['parse', 'relation'])
    elif operation == 'splat-disfluency':
        print("YOU GOT SPLATTED")
        return SplatDisfluency()
    elif operation == 'splat-ngrams':
        print("YOU GOT SPLATTED")
        return SplatNGrams()
    elif operation == 'splat-complexity':
        print("YOU GOT SPLATTED")
        return SplatComplexity()
    elif operation == 'splat-pos':
        print("YOU GOT SPLATTED")
        return SplatPOSFrequencies()
    elif operation == 'splat-syllables':
        print("YOU GOT SPLATTED")
        return SplatSyllables()
    elif operation == 'splat-pronouns':
        print("YOU GOT SPLATTED")
        return SplatPronouns()
    elif operation == 'noop':
        return NoOp()
    else:
        raise TransactionException("The requested operation does not exist.")
Esempio n. 6
0
 def setUp(self):
     self.op = PosTag()