Ejemplo n.º 1
0
def tokens2pos(sep, tokens):
    '''Tokenize words into their parts of speech. Each item
    is the original word with its role as the second part
    of the item. Punctuation is considered as a separate token.'''

    content = read_tokens(tokens)
    nltk.data.path.append(data_item())
    tags = nltk.pos_tag(content)
    [output("{},{}".format(t[0], t[1])) for t in tags]
Ejemplo n.º 2
0
def tokens2pos(sep, tokens):
    '''Tokenize words into their parts of speech. Each item
    is the original word with its role as the second part
    of the item. Punctuation is considered as a separate token.'''

    content = read_tokens(tokens)
    nltk.data.path.append(data_item())
    tags = nltk.pos_tag(content)
    [output("{},{}".format(t[0], t[1])) for t in tags]
Ejemplo n.º 3
0
def tokens2pos(sep, tokens):
    '''Tokenize words into their parts of speech. Output contains the
       word token followed by its part-of-speech tag, separated by the
       character specified by --sep.
    '''

    content = read_tokens(tokens)
    nltk.data.path.append(data_item())
    tags = nltk.pos_tag(content)
    write_csv(tags, str(sep))
Ejemplo n.º 4
0
def tokens2pos(sep, tokens):
    '''Tokenize words into their parts of speech. Output contains the
       word token followed by its part-of-speech tag, separated by the
       character specified by --sep.
    '''

    content = read_tokens(tokens)
    nltk.data.path.append(data_item())
    tags = nltk.pos_tag(content)
    write_csv(tags, str(sep))
Ejemplo n.º 5
0
def get_stopwords(stopword_name):
    path = data_item('/stopwords/' + stopword_name + '.txt')
    stopwords = []
    with open(path) as filename:
        stopwords = read_tokens(filename)
    return stopwords