import os from nltk.parse import stanford import sys import codecs from nltk.tokenize import word_tokenize, sent_tokenize import re from path_reader import PathReader pathreader = PathReader("./PATHS") os.environ['STANFORD_PARSER'] = pathreader.get_path('PARSER') os.environ['STANFORD_MODELS'] = pathreader.get_path('PARSER') parser = stanford.StanfordParser() def get_longest_cand(cands): maxlen = 0 bestcand = '' for cand in cands: if len(cand) > maxlen: maxlen = len(cand) bestcand = cand return bestcand def extract_sat_clause(tree, is_root=True): if len(tree) == 0 or type(tree) == unicode: return "" elif tree.label() == 'S' or tree.label() == 'SBAR': if is_root:
from nltk.tag import StanfordPOSTagger from nltk.tokenize import word_tokenize import re import os from path_reader import PathReader pathreader = PathReader("./PATHS") os.environ['CLASSPATH'] = pathreader.get_path('TAGGER') os.environ['STANFORD_MODELS'] = pathreader.get_path('TAGGER') + "/models" class FeatureProcessing(object): def __init__(self): self.feat_index = {} self.implication_words = ["demonstrate", "suggest", "indicate"] self.hyp_words = ["possible"] self.method_words = ["probe", "detect"] self.pos_tagger = StanfordPOSTagger( 'english-bidirectional-distsim.tagger') def get_features(self, phrase, filter_feature='0'): words = word_tokenize(phrase) pos_tags = self.pos_tagger.tag(words) features = [] for word, tag in pos_tags: wl = word.lower() # Feat 1: POS features if filter_feature != '1': if tag != ',' and tag != '.':
from nltk.tag import StanfordPOSTagger from nltk.tokenize import word_tokenize import re import os from path_reader import PathReader pathreader = PathReader("./PATHS") os.environ['CLASSPATH'] = pathreader.get_path('TAGGER') os.environ['STANFORD_MODELS'] = pathreader.get_path('TAGGER') + "/models" class FeatureProcessing(object): def __init__(self): self.feat_index = {} self.implication_words = ["demonstrate", "suggest", "indicate"] self.hyp_words = ["possible"] self.method_words = ["probe", "detect"] self.pos_tagger = StanfordPOSTagger('english-bidirectional-distsim.tagger') def get_features(self, phrase, filter_feature='0'): words = word_tokenize(phrase) pos_tags = self.pos_tagger.tag(words) features = [] for word, tag in pos_tags: wl = word.lower() # Feat 1: POS features if filter_feature != '1': if tag != ',' and tag != '.': features.append(tag) # Feat 2: Verb and adverb identity
import os from nltk.parse import stanford import sys import codecs from nltk.tokenize import word_tokenize, sent_tokenize import re from path_reader import PathReader pathreader = PathReader("./PATHS") os.environ['STANFORD_PARSER'] = pathreader.get_path('PARSER') os.environ['STANFORD_MODELS'] = pathreader.get_path('PARSER') parser = stanford.StanfordParser() def get_longest_cand(cands): maxlen = 0 bestcand = '' for cand in cands: if len(cand) > maxlen: maxlen = len(cand) bestcand = cand return bestcand def extract_sat_clause(tree, is_root = True): if len(tree) == 0 or type(tree) == unicode: return "" elif tree.label() == 'S' or tree.label() == 'SBAR': if is_root: return get_longest_cand([extract_sat_clause(t, is_root = False) for t in tree]) else: