Python nlp Examples

Programming Language: Python

Namespace/Package Name: nlpia.loaders

Method/Function: nlp

Examples at hotexamples.com: 3

Python nlp - 3 examples found. These are the top rated real world Python examples of nlpia.loaders.nlp extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: utils.py Project: zhangpanzhan/nlpia

def most_similar(tok, num_similar=20):
    """ FIXME: Use annoy to index word vectors and find most similar words to token str, id, or vector """
    raise NotImplementedError("Work in Progress, FIXME!")

    if _parse is None:
        nlp("hello", lang='en_core_web_lg')
        # FIXME: is the _parse variable updated by this, will WV and other globals work?
    stem = None
    idx = None
    vec = None

    if isinstance(tok, str):
        # TODO: if the tok is long or has punctuation/whitespace in it, use the spacy pipeline to tokenize it and compute the docvec
        stem = tok
        while stem not in WV_IDS:
            stem = stem[:-1]
        idx = WORD2ID[stem]
    elif isinstance(tok, int):
        idx = tok
        stem = VOCAB[idx].text
    else:
        vec = np.array(tok)
        if len(vec.shape) == 2:
            if vec.shape[0] == WV.shape[1]:
                vec = vec.mean(axis=0)
            else:
                vec = vec.mean(axis=1)
        idx = ANN.get_nns_by_vector(vec, 1)[0]
        stem = VOCAB[idx].text
    if idx is not None:
        vec = WV[idx]
        stem = VOCAB[idx].text
    return ANN.get_nns_by_vector(vec, num_similar)

Example #2

Show file

File: transcoders.py Project: dimitarpg13/nlpia-1

def split_sentences_spacy(text, language_model='en'):
    r""" You must download a spacy language model with python -m download 'en'

    The default English language model for spacy tends to be a lot more agressive than NLTK's punkt:

    >>> split_sentences_nltk("Hi Ms. Lovelace.\nI'm a wanna-\nbe human @ I.B.M. ;) --Watson 2.0")
    ['Hi Ms. Lovelace.', "I'm a wanna-\nbe human @ I.B.M.", ';) --Watson 2.0']
    >>> split_sentences_spacy("Hi Ms. Lovelace.\nI'm a wanna-\nbe human @ I.B.M. ;) --Watson 2.0")
    ['Hi Ms. Lovelace.', "I'm a wanna-", 'be human @', 'I.B.M. ;) --Watson 2.0']

    >>> split_sentences_spacy("Hi Ms. Lovelace. I'm at I.B.M. --Watson 2.0")
    ['Hi Ms. Lovelace.', "I'm at I.B.M. --Watson 2.0"]
    >>> split_sentences_nltk("Hi Ms. Lovelace. I'm at I.B.M. --Watson 2.0")
    ['Hi Ms. Lovelace.', "I'm at I.B.M.", '--Watson 2.0']
    """
    doc = nlp(text)
    sentences = []
    if not hasattr(doc, 'sents'):
        logger.warning(
            "Using NLTK sentence tokenizer because SpaCy language model hasn't been loaded"
        )
        return split_sentences_nltk(text)
    for w, span in enumerate(doc.sents):
        sent = ''.join(doc[i].string
                       for i in range(span.start, span.end)).strip()
        if len(sent):
            sentences.append(sent)
    return sentences

Example #3

Show file

def get_anki_vocab(lang=['eng'], limit=None, filename='anki_en_vocabulary.csv'):
    """ Get all the vocab words+tags+wordvectors for the tokens in the Anki translation corpus

    Returns a DataFrame of with columns = word, pos, tag, dep, ent, ent_iob, sentiment, vectors
    """
    texts = get_anki_phrases(lang=lang, limit=limit)
    docs = nlp(texts, lang=lang)
    vocab = get_vocab(docs)
    vocab['vector'] = get_word_vectors(vocab)  # TODO: turn this into a KeyedVectors object
    if filename:
        vocab.to_csv(os.path.join(BIGDATA_PATH, filename))
    return vocab