Example #1
0
def main():
    '''Example usage:

    echo "The Fulton County Grand Jury said Friday an investigation of Atlanta's recent primary election produced no evidence that any irregularities took place." | python -m tweedr.ark.__init__
    '''
    if sys.stdin.isatty():
        logger.error('You must pipe in a string')
        exit(1)

    from tweedr.ark.java import TwitterNLP
    tagger = TwitterNLP()

    for line in sys.stdin:
        print '[input]', line.strip()
        tag_line = tagger.predict(line)
        print '[output]', tag_line
Example #2
0
def main():
    '''Example usage:

    echo "The Fulton County Grand Jury said Friday an investigation of Atlanta's recent primary election produced no evidence that any irregularities took place." | python -m tweedr.ark.__init__
    '''
    if sys.stdin.isatty():
        logger.error('You must pipe in a string')
        exit(1)

    from tweedr.ark.java import TwitterNLP
    tagger = TwitterNLP()

    for line in sys.stdin:
        print '[input]', line.strip()
        tag_line = tagger.predict(line)
        print '[output]', tag_line
Example #3
0
class POSTagger(Mapper):
    INPUT = TweetDictProtocol
    OUTPUT = TweetDictProtocol

    def __init__(self):
        self.tagger = TwitterNLP()

    def __call__(self, tweet):
        '''Enhances the input tweet with POS tags, using only the tweet["text"] value:

            {
                ...
                "tokens": "@Donnie I hear ya and I hate earthquakes in Cali too ! But I still love living in LA ! :)",
                "pos": "@ O V O & O V N P ^ R , & O R V V P ^ ,",
                ...
            }

        The `tokens` and `pos` values can be split on whitespace to get equal-length lists of strings.
        '''
        tokens, pos_tags = self.tagger.tokenize_and_tag(tweet['text'])
        tweet['tokens'] = tokens
        tweet['pos'] = pos_tags
        return tweet
Example #4
0
 def __init__(self):
     self.tagger = TwitterNLP()
Example #5
0
from tweedr.ark.java import TwitterNLP

import logging
logger = logging.getLogger(__name__)

logger.debug('The TwitterNLP POS tagger is being loaded as a module singleton')

# simply by importing this module, the TwitterNLP tagger will be started up and
# made available to other scripts.
tagger = TwitterNLP()