def get_dependency(self, row_file, target): stanford_parser = parser.Parser() row_str = '' f = open(row_file, 'rb') for row in f: row_str += row soup = BeautifulSoup(row_str) self.soup = soup sentences = soup.find_all('sentence') all_sentences = list() for block in sentences: text = block.text.strip() all_sentences.append(text) #end for temp_csv = csv.writer(open('dependency_%s' % target, 'wb')) for sentence in all_sentences: temp_list = stanford_parser.parseToStanfordDependencies(sentence) for item in temp_list: temp_csv.writerow(item) temp_csv.writerow([]) return
def get_whole(self, sentence): opinion_dict = dict() pos_f = open('../opinion-lexicon-English/positive-words.txt', 'rb') neg_f = open('../opinion-lexicon-English/negative-words.txt', 'rb') for _ in xrange(35): pos_f.readline() neg_f.readline() for word in pos_f: opinion_dict[word.strip()] = True for word in neg_f: opinion_dict[word.strip()] = False pos_f.close() neg_f.close() stemmer = PorterStemmer() stanford_parser = parser.Parser() stanford_tagger = \ POSTagger('../stanford-postagger-full-2015-01-30/models/english-bidirectional-distsim.tagger','../stanford-postagger-full-2015-01-30/stanford-postagger.jar') w = open('sentence_test', 'wb') text_token = self.tf.stanford_tokenize(sentence) text_pos = stanford_tagger.tag(text_token) print text_pos text_dependency = stanford_parser.parseToStanfordDependencies(sentence) temp_list = ['none'] * len(text_token) for dep in text_dependency: if dep[0] == 'amod': temp_list[int(dep[1])] = '%s_1' % dep[0] temp_list[int(dep[2])] = '%s_2' % dep[0] #end for for num, item in enumerate(text_pos[0]): temp_str = 'order' if opinion_dict.has_key(item[0]): temp_str = 'opion' featrue_list=[item[0],item[1],stemmer.stem(item[0]),item[0].lower(),\ temp_str,temp_list[num],'O'] w.write(' '.join(featrue_list) + '\n') pass
#-*-coding:utf-8-*- import pandas as pd from stanford_parser import parser from nltk.tree import Tree import sys reload(sys) sys.setdefaultencoding('utf-8') import warnings warnings.filterwarnings(action='ignore') path = './data/' out_path = './train_data/' standford_parser = parser.Parser() #get features from the structure of the deptree def getDepTree(x): # 构建语法分析工具 tokens, tree = standford_parser.parse(unicode(x)) posTag = standford_parser.getPosTag(tree) return str(tree), posTag def getDepTreeHeight(x): #_, ret = standford_parser.parse(unicode(x)) t = Tree.fromstring(str(x)) return t.height()
(distance, sent_str)) except JavaException: # print "Failure: sentence is too long (len = %i)" % len(sent) pass except AssertionError: # print "Failure: could not find root" pass #best summary sentences is the one with closest feature/opinion summary_sents_with_feature_opinion_dist.sort() if len(summary_sents_with_feature_opinion_dist) > 0: return summary_sents_with_feature_opinion_dist[0][1] else: return None #To summarize movie review(s) not included in the NLTK: # $ python summarizer.py filename1.txt filename2.txt ... etc. if __name__ == '__main__': parser = sp.Parser() if len(sys.argv) > 1: for fname in sys.argv[1:]: print "\nReview: %s" % fname print "Summary: %s\n" % find_summary_sentence(parser, localfile=fname) else: for fileid in movie_reviews.fileids(): print "\nReview:", fileid print "Summary:\n", find_summary_sentence(parser, fileid=fileid)
def __init__(self): self.__parser_ = parser.Parser()
def setUpClass(cls): jarPathName = normpath(join(parser.MODULE_PATH, "./CoreNLP")) cls._parser = parser.Parser(jarPathName, 'edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz')
__author__ = 'simon.hughes' from stanford_parser import parser txt = "Pick up the tire pallet." p = parser.Parser() dependencies = p.parseToStanfordDependencies(txt) tupleResult = [(rel, (gov.text, gov.start, gov.end), (dep.text, dep.start, dep.end)) for rel, gov, dep in dependencies.dependencies] tokens, tree = p.parse(txt) kids = tree.children for tuple in tupleResult: print tuple print "" print "\n".join(map(str, dependencies.dependencies[0])) def extract_dependencies(txt): dependencies = p.parseToStanfordDependencies(txt) return [(rel, (gov.text, gov.start, gov.end), (dep.text, dep.start, dep.end)) for rel, gov, dep in dependencies.dependencies] deps = extract_dependencies(txt)