Example #1
0
 def get_dependency(self, row_file, target):
     stanford_parser = parser.Parser()
     row_str = ''
     f = open(row_file, 'rb')
     for row in f:
         row_str += row
     soup = BeautifulSoup(row_str)
     self.soup = soup
     sentences = soup.find_all('sentence')
     all_sentences = list()
     for block in sentences:
         text = block.text.strip()
         all_sentences.append(text)
     #end for
     temp_csv = csv.writer(open('dependency_%s' % target, 'wb'))
     for sentence in all_sentences:
         temp_list = stanford_parser.parseToStanfordDependencies(sentence)
         for item in temp_list:
             temp_csv.writerow(item)
         temp_csv.writerow([])
     return
Example #2
0
 def get_whole(self, sentence):
     opinion_dict = dict()
     pos_f = open('../opinion-lexicon-English/positive-words.txt', 'rb')
     neg_f = open('../opinion-lexicon-English/negative-words.txt', 'rb')
     for _ in xrange(35):
         pos_f.readline()
         neg_f.readline()
     for word in pos_f:
         opinion_dict[word.strip()] = True
     for word in neg_f:
         opinion_dict[word.strip()] = False
     pos_f.close()
     neg_f.close()
     stemmer = PorterStemmer()
     stanford_parser = parser.Parser()
     stanford_tagger = \
     POSTagger('../stanford-postagger-full-2015-01-30/models/english-bidirectional-distsim.tagger','../stanford-postagger-full-2015-01-30/stanford-postagger.jar')
     w = open('sentence_test', 'wb')
     text_token = self.tf.stanford_tokenize(sentence)
     text_pos = stanford_tagger.tag(text_token)
     print text_pos
     text_dependency = stanford_parser.parseToStanfordDependencies(sentence)
     temp_list = ['none'] * len(text_token)
     for dep in text_dependency:
         if dep[0] == 'amod':
             temp_list[int(dep[1])] = '%s_1' % dep[0]
             temp_list[int(dep[2])] = '%s_2' % dep[0]
     #end for
     for num, item in enumerate(text_pos[0]):
         temp_str = 'order'
         if opinion_dict.has_key(item[0]):
             temp_str = 'opion'
         featrue_list=[item[0],item[1],stemmer.stem(item[0]),item[0].lower(),\
                       temp_str,temp_list[num],'O']
         w.write(' '.join(featrue_list) + '\n')
     pass
#-*-coding:utf-8-*-
import pandas as pd

from stanford_parser import parser
from nltk.tree import Tree
import sys
reload(sys)
sys.setdefaultencoding('utf-8')

import warnings
warnings.filterwarnings(action='ignore')

path = './data/'
out_path = './train_data/'
standford_parser = parser.Parser()


#get features from the structure of the deptree
def getDepTree(x):
    # 构建语法分析工具
    tokens, tree = standford_parser.parse(unicode(x))
    posTag = standford_parser.getPosTag(tree)
    return str(tree), posTag


def getDepTreeHeight(x):
    #_, ret = standford_parser.parse(unicode(x))
    t = Tree.fromstring(str(x))
    return t.height()

Example #4
0
                    (distance, sent_str))
        except JavaException:
            # print "Failure: sentence is too long (len = %i)" % len(sent)
            pass
        except AssertionError:
            # print "Failure: could not find root"
            pass

    #best summary sentences is the one with closest feature/opinion
    summary_sents_with_feature_opinion_dist.sort()
    if len(summary_sents_with_feature_opinion_dist) > 0:
        return summary_sents_with_feature_opinion_dist[0][1]
    else:
        return None

    #To summarize movie review(s) not included in the NLTK:
    # $ python summarizer.py filename1.txt filename2.txt ... etc.


if __name__ == '__main__':
    parser = sp.Parser()
    if len(sys.argv) > 1:
        for fname in sys.argv[1:]:
            print "\nReview: %s" % fname
            print "Summary: %s\n" % find_summary_sentence(parser,
                                                          localfile=fname)
    else:
        for fileid in movie_reviews.fileids():
            print "\nReview:", fileid
            print "Summary:\n", find_summary_sentence(parser, fileid=fileid)
 def __init__(self):
     self.__parser_ = parser.Parser()
Example #6
0
 def setUpClass(cls):
     jarPathName = normpath(join(parser.MODULE_PATH, "./CoreNLP"))
     cls._parser = parser.Parser(jarPathName, 'edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz')
__author__ = 'simon.hughes'

from stanford_parser import parser

txt = "Pick up the tire pallet."
p = parser.Parser()

dependencies = p.parseToStanfordDependencies(txt)
tupleResult = [(rel, (gov.text, gov.start, gov.end), (dep.text, dep.start,
                                                      dep.end))
               for rel, gov, dep in dependencies.dependencies]

tokens, tree = p.parse(txt)
kids = tree.children

for tuple in tupleResult:
    print tuple
print ""
print "\n".join(map(str, dependencies.dependencies[0]))


def extract_dependencies(txt):
    dependencies = p.parseToStanfordDependencies(txt)
    return [(rel, (gov.text, gov.start, gov.end), (dep.text, dep.start,
                                                   dep.end))
            for rel, gov, dep in dependencies.dependencies]


deps = extract_dependencies(txt)