コード例 #1
0
def getSentenceFeatures(postDictList):

    nlp = English()   
    
    poSpeechDictList = []
    
    for postDict in postDictList:
       #sentence = constructSentence(postDict)
       #unicodeSentence = unicode(sentence, "utf-8")
       unicodeTokens = convertListInUnicode(postDict.values())
       
       doc = nlp.tokenizer.tokens_from_list(unicodeTokens)
       nlp.parser(doc)
       nlp.tagger(doc)
       
       #formattedTokens = nlp(unicodeSentence)
       poSpeechDict = OrderedDict()
       
       index = 1
       for token in doc:
           poSpeechDict[index] = [token.pos_, token.pos, token.tag, token.tag_, token.dep, token.dep_]
           index = index + 1
           
       poSpeechDictList.append(poSpeechDict)
       
    return poSpeechDictList
コード例 #2
0
ファイル: tagger.py プロジェクト: omidn/UBO_SS2017_NLP
import spacy
from spacy.en import English
import sys

nlp = English()

doc = nlp(sys.stdin.read().decode('UTF-8'))
nlp.tagger(doc)

with open("./pos.txt", "w") as f, open("posner.txt", "w") as f2:
    for word in doc:
        only_tags = word.text
        if word.pos_ in ('PROPN', 'NOUN', 'VERB'):
            only_tags += "_" + word.pos_
        only_tags += " "
        f.write(only_tags.encode("UTF-8"))
        
    print doc.ents
        
    for ent in doc.ents:
        if ent.label_:
            ent.merge(ent.root.tag_, ent.text, ent.label_)
            
    for word in doc:
        if word.ent_type_:
            with_ner = "%s_%s" % (word.text.replace(" ", "_"), word.ent_type_)
        else:
            with_ner = word.text
            if word.pos_ in ('PROPN', 'NOUN', 'VERB'):
                with_ner += "_" + word.pos_
        with_ner += " "
コード例 #3
0
# In[4]:

import spacy.util
from spacy.en import English

from spacy.syntax.parser import OracleError
from spacy.syntax.util import Config


# In[6]:

nlp = English()


# In[33]:

tokens = nlp.tokenizer.tokens_from_list([u'Me', u'and', u'you, 'u'hate', u'pizza'])
a=nlp.tagger(tokens)
a=nlp.parser(tokens)


# In[51]:

print tokens[1].head


# In[ ]: