def ner_tag(sents, silent=True) : """ Named Entety Recognition for sentences. Keyword arguments: sents -- Sentece, list of sentences or list of tokens. Returns : List of (word,neg-tag) pairs, that aims to preserve the structure of the sents input argument. """ if len(sents) == 0 : return [] # saves ner_tagger as global variable, # such that it is not recreated everytime ner_tag is executed if not 'ner_tagger' in globals(): global ner_tagger ner_tagger = NERTagger(stanford_ner_classifier, stanford_ner) # if sentence not tokenized if type(sents) in [str,unicode] : sents = tokenize(sents,'sw') # bring input sents in right form elif type(sents[0]) in [str,unicode] : if ' ' in sents[0] : sents = [tokenize(s,'w') for s in sents] else : sents = [sents] tagged = ner_tagger.tag_sents(sents) if not silent : print('ner-tags:', tagged) return tagged
def ner_tag(sents, silent=True) : if sents == '' or sents == [] : return [] # saves ner_tagger as global variable, # such that it is not recreated everytime ner_tag is executed if not 'ner_tagger' in globals(): global ner_tagger ner_tagger = NERTagger(conf.stanford_ner_classifier, conf.stanford_ner) # if sentence not tokenized if type(sents) in [str,unicode] : sents = tokenize(sents,'sw') # bring input sents in right form elif type(sents[0]) in [str,unicode] : if ' ' in sents[0] : sents = [tokenize(s,'w') for s in sents] else : sents = [sents] tagged = ner_tagger.tag_sents(sents) if not silent : print 'ner-tags:',tagged return tagged
def add_ner(self,target): all_token = self.get_token(target); st = \ NERTagger('../stanford-ner-2015-04-20/classifiers/english.all.3class.distsim.crf.ser.gz','../stanford-ner-2015-04-20/stanford-ner.jar'); ner_result = st.tag_sents(all_token); w = open('ner_%s'%target,'wb'); for num,row in enumerate(ner_result): for item in row: w.write(item[0]+'\n'); w.write('\n'); #end for print len(ner_result),len(all_token); return;
def run_tagger(self, payload): """ Runs :py:meth:`nltk.tag.stanford.NERTagger.tag_sents` on the provided text (http://www.nltk.org/api/nltk.tag.html#nltk.tag.stanford.NERTagger.tag_sents) :param payload: Fulltext payload. :type payload: string :return: List of parsed sentences. """ if NERTagger is None: return None tagger = NERTagger(self.classifier, self.jarfile) return tagger.tag_sents([payload.encode('ascii', 'ignore').split()])
def add_ner(self, target): all_token = self.get_token(target) st = \ NERTagger('../stanford-ner-2015-04-20/classifiers/english.all.3class.distsim.crf.ser.gz','../stanford-ner-2015-04-20/stanford-ner.jar') ner_result = st.tag_sents(all_token) w = open('ner_%s' % target, 'wb') for num, row in enumerate(ner_result): for item in row: w.write(item[0] + '\n') w.write('\n') #end for print len(ner_result), len(all_token) return
list_of_sentences.extend(tkzd_sentences) i+=1 except Exception as error: if "utf" in str(error): pass else: print "SOMETHING HAPPENED" print "\nxxxxxxxxxxx-------------xxxxxxxxxxx\n" print len(list_of_sentences) print i # raw_input("...continue?") IOB_sentences = tagger.tag_sents(list_of_sentences) print len(IOB_sentences) twitter_ners = {} for ne_tagged_sent in IOB_sentences: named_entities = get_continuous_chunks(ne_tagged_sent) named_entities_str = [" ".join([token for token, tag in ne]) for ne in named_entities] named_entities_str_tag = [(" ".join([token for token, tag in ne]), ne[0][1]) for ne in named_entities] if len(named_entities_str_tag)>0: for string, tag in named_entities_str_tag: try: twitter_ners[tag.lower()].append(string.lower()) except: twitter_ners[tag.lower()] = [string.lower()] for k,v in twitter_ners.items():