def extract_patterns(self, config): # extract ReVerb pattern and detect the presence of the passive voice patterns_bet_tags = Reverb.extract_reverb_patterns_ptb(self.bet_words) if len(patterns_bet_tags) > 0: # print ">0" self.passive_voice = self.config.reverb.detect_passive_voice( patterns_bet_tags) # forced hack since _'s_ is always tagged as VBZ, (u"'s", 'VBZ') and causes ReVerb to identify # a pattern which is wrong, if this happens, ignore that a pattern was extracted if patterns_bet_tags[0][0] == "'s": self.bet_vector = self.construct_words_vectors( self.bet_words, config) else: self.bet_vector = self.construct_pattern_vector( patterns_bet_tags, config) else: #print "=0" self.bet_vector = self.construct_words_vectors( self.bet_words, config) # extract two words before the first entity, and two words after the second entity if len(self.bef_words) > 0: # print "before words exist" self.bef_vector = self.construct_words_vectors( self.bef_words, config) if len(self.aft_words) > 0: # print "after words exist" self.aft_vector = self.construct_words_vectors( self.aft_words, config)
def extract_patterns(self, config): # extract ReVerb pattern and detect the presence of the passive voice patterns_bet_tags = Reverb.extract_reverb_patterns_ptb(self.bet_words) if len(patterns_bet_tags) > 0: self.passive_voice = self.config.reverb.detect_passive_voice(patterns_bet_tags) # forced hack since _'s_ is always tagged as VBZ, (u"'s", 'VBZ') and causes ReVerb to identify # a pattern which is wrong, if this happens, ignore that a pattern was extracted if patterns_bet_tags[0][0] == "'s": self.bet_vector = self.construct_words_vectors(self.bet_words, config) else: self.bet_vector = self.construct_pattern_vector(patterns_bet_tags, config) else: self.bet_vector = self.construct_words_vectors(self.bet_words, config) # extract two words before the first entity, and two words after the second entity if len(self.bef_words) > 0: self.bef_vector = self.construct_words_vectors(self.bef_words, config) if len(self.aft_words) > 0: self.aft_vector = self.construct_words_vectors(self.aft_words, config)
def __init__(self, config_file, seeds_file, negative_seeds, sentences_file, similarity, confidance): self.seed_tuples = set() self.negative_seed_tuples = set() self.e1_type = None self.e2_type = None self.stopwords = stopwords.words('english') self.threshold_similarity = similarity self.instance_confidance = confidance self.reverb = Reverb() for line in fileinput.input(config_file): if line.startswith("#") or len(line) == 1: continue if line.startswith("wUpdt"): self.wUpdt = float(line.split("=")[1]) if line.startswith("wUnk"): self.wUnk = float(line.split("=")[1]) if line.startswith("wNeg"): self.wNeg = float(line.split("=")[1]) if line.startswith("number_iterations"): self.number_iterations = int(line.split("=")[1]) if line.startswith("use_RlogF"): self.use_RlogF = bool(line.split("=")[1]) if line.startswith("min_pattern_support"): self.min_pattern_support = int(line.split("=")[1]) if line.startswith("max_tokens_away"): self.max_tokens_away = int(line.split("=")[1]) if line.startswith("min_tokens_away"): self.min_tokens_away = int(line.split("=")[1]) if line.startswith("context_window_size"): self.context_window_size = int(line.split("=")[1]) if line.startswith("use_reverb"): self.use_reverb = line.split("=")[1].strip() if line.startswith("alpha"): self.alpha = float(line.split("=")[1]) if line.startswith("beta"): self.beta = float(line.split("=")[1]) if line.startswith("gamma"): self.gamma = float(line.split("=")[1]) assert self.alpha + self.beta + self.gamma == 1 self.read_seeds(seeds_file) self.read_negative_seeds(negative_seeds) fileinput.close() print("\nConfiguration parameters") print("========================") print("Relationship Representation") print("e1 type :", self.e1_type) print("e2 type :", self.e2_type) print("context window :", self.context_window_size) print("max tokens away :", self.max_tokens_away) print("min tokens away :", self.min_tokens_away) print("use ReVerb :", self.use_reverb) print("\nVectors") print("alpha :", self.alpha) print("beta :", self.beta) print("gamma :", self.gamma) print("\nSeeds:") print("positive seeds :", len(self.seed_tuples)) print("negative seeds :", len(self.negative_seed_tuples)) print("negative seeds wNeg :", self.wNeg) print("unknown seeds wUnk :", self.wUnk) print("\nParameters and Thresholds") print("threshold_similarity :", self.threshold_similarity) print("instance confidence :", self.instance_confidance) print("min_pattern_support :", self.min_pattern_support) print("iterations :", self.number_iterations) print("iteration wUpdt :", self.wUpdt) print("\n") try: os.path.isfile("vsm.pkl") f = open("vsm.pkl", "r") print("\nLoading tf-idf model from disk...") self.vsm = pickle.load(f) f.close() except IOError: print("\nGenerating tf-idf model from sentences...") self.vsm = VectorSpaceModel.VectorSpaceModel( sentences_file, self.stopwords) print("\nWriting generated model to disk...") f = open("vsm.pkl", "wb") pickle.dump(self.vsm, f) f.close()