def arg_proc(self, id, point, sep): self.a_predicates = [] if 'verbs' in point: verbs = point['verbs'] for i, verb in verbs.iteritems(): i = '%s.%s'%(id, i) (vt, vh) = verb['tokens'] if 'ARG' in verb: args = verb['ARG'] for type,arg in args.items(): w_t = arg['wordform-t'].split() w_h = arg['wordform-h'].split() l_t = arg['lemma-t'].split() l_h = arg['lemma-h'].split() p_t = arg['pos-t'].split() p_h = arg['pos-h'].split() c_t = arg['chunk-t'].split() c_h = arg['chunk-h'].split() n_t = arg['ne-t'].split() n_h = arg['ne-h'].split() score = arg['score'] w_t.extend(w_h) l_t.extend(l_h) p_t.extend(p_h) #TODO quit stop words for j, word in enumerate(w_t): word_arg = 'Token(%s, %s, "%s")'%(type, id, self.clean_str(word)) lemma_arg = 'Token(%s, %s, "%s")'%(type, id, self.clean_str(l_t[j])) pos_arg = 'Token(%s, %s, "%s")'%(type, id, self.clean_str(p_t[j])) lin = Lin() sim_words = lin.expand_w(word) wn = WNTools() hyps = wn.get_mfs_hypernyms((l_t[j], p_t[j])) self.a_predicates.append(word_arg) self.a_predicates.append(lemma_arg) self.a_predicates.append(pos_arg) for sim_word in sim_words: lin_arg = 'Token(%s, %s, "%s")'%(type, id, self.clean_str(sim_word)) self.a_predicates.append(lin_arg) for key, tree in hyps: t = 0 for category in tree: if t >= 3: break hyp_arg = 'Token(%s, %s, "%s")'%(type, id, self.clean_str(category)) self.a_predicates.append(hyp_arg) t +=1 arg_id = 'ARG(%s, %s, %s)'%(type, i, id) self.a_predicates.append(arg_id) return self.a_predicates
def arg_proc(self, id, point, sep): self.a_predicates = [] n = 3 #levels in wn tree if 'verbs' in point: verbs = point['verbs'] for i, verb in verbs.iteritems(): i = '%s.%s'%(id, i) (vt, vh) = verb['tokens'] if 'ARG' in verb: args = verb['ARG'] for type,arg in args.items(): w_t = arg['wordform-t'].split() w_h = arg['wordform-h'].split() l_t = arg['lemma-t'].split() l_h = arg['lemma-h'].split() p_t = arg['pos-t'].split() p_h = arg['pos-h'].split() c_t = arg['chunk-t'].split() c_h = arg['chunk-h'].split() n_t = arg['ne-t'].split() n_h = arg['ne-h'].split() score = arg['score'] w_t.extend(w_h) l_t.extend(l_h) p_t.extend(p_h) #TODO quit stop words for j, word in enumerate(w_t): word_arg = '>token_word\n"%s" %s "%s"'%(type, j, self.clean_str(word)) lemma_arg = '>token_lemma\n"%s" %s "%s"'%(type, j, self.clean_str(l_t[j])) pos_arg = '>token_pos\n"%s" %s "%s"'%(type, j, self.clean_str(p_t[j])) lin = Lin() sim_words = lin.expand_w(word) wn = WNTools() hyps = wn.get_mfs_hypernyms((l_t[j], p_t[j])) self.a_predicates.append(word_arg) self.a_predicates.append(lemma_arg) self.a_predicates.append(pos_arg) for j, sim_word in enumerate(sim_words): lin_arg = '>token_lin\n"%s" %s "%s"'%(type, j, self.clean_str(sim_word)) self.a_predicates.append(lin_arg) for key, tree in hyps: j = 0 for category in tree[:n]: hyp_arg = '>token_wn\n"%s" %s "%s"'%(type, j, self.clean_str(category)) j += 1 self.a_predicates.append(hyp_arg) arg_id = '>arg\n"%s" "%s%s%s" %s'%(type, self.clean_str(vt), sep, self.clean_str(vh), id) self.a_predicates.append(arg_id) return self.a_predicates
def main(args): pickle_file = args[0] print 'loading file:',pickle_file with open(pickle_file, 'r') as pf: pairs = pickle.load(pf) k = 0 for pair in pairs: print 'id:', pair.get_id() print 's1:', pair.get_text() print 's2:', pair.get_hypo() print 'features:', pair.get_features_text_type() print 'set-metrics, cos test' lemmas_text = pair.get_feature_text('lemmas') lemmas_hypo = pair.get_feature_hypo('lemmas') set_th = SetMetrics(lemmas_text, lemmas_hypo) cos = set_th.cosine() #print cos print 'SRL tools' frames_text = pair.get_feature_text('frames') print frames_text print '################' srl = SRLTools(lemmas_text, frames_text) word_to_frame = srl.get_words_frame() print word_to_frame print '################' print srl.get_verbs() print '################' #print 'verb-metrics, ' pos_text = pair.get_feature_text('pos') pos_hypo = pair.get_feature_hypo('pos') verbs = VerbMetrics() lin = Lin() vectors = VectorMetrics() hyper = WNTools() for i, pos_tuple_t in enumerate(pos_text): (token, pos_t) = pos_tuple_t if pos_t.startswith('V'): for j, pos_tuple_h in enumerate(pos_hypo): (token, pos_h) = pos_tuple_h if pos_h.startswith('V'): verbs.set_text_verb(lemmas_text[i]) verbs.set_hypo_verb(lemmas_hypo[j]) #print 'verbs test t:%s h:%s'%(lemmas_text[i], lemmas_hypo[j]) vn_isec = verbs.vn_isec() #print 'verb net isec: %d'%vn_isec #print 'lin(%s):'%lemmas_text[i], '\n', lin.n_similar_words(lemmas_text[i]) #print 'lin(%s):'%lemmas_hypo[j], '\n', lin.n_similar_words(lemmas_hypo[j]) t_sim = lin.n_similar_words(lemmas_text[i]) h_sim = lin.n_similar_words(lemmas_hypo[j]) t_score = [float(score) for word,score in t_sim] h_score = [float(score) for word,score in h_sim] vectors.set_vectors(t_score, h_score) #print 'cos_vect: ', vectors.cosine() elif pos_h.startswith('N'): #print 'wn test hypernyms' trees = hyper.get_mfs_hypernyms((lemmas_hypo[j], pos_h)) #print trees k += 1 if k >= 10: break pf.close return
def arg_proc(self, id, point, sep): self.a_predicates = [] n = 3 #levels in wn tree if 'verbs' in point: verbs = point['verbs'] for i, verb in verbs.iteritems(): i = '%s.%s' % (id, i) (vt, vh) = verb['tokens'] if 'ARG' in verb: args = verb['ARG'] for type, arg in args.items(): w_t = arg['wordform-t'].split() w_h = arg['wordform-h'].split() l_t = arg['lemma-t'].split() l_h = arg['lemma-h'].split() p_t = arg['pos-t'].split() p_h = arg['pos-h'].split() c_t = arg['chunk-t'].split() c_h = arg['chunk-h'].split() n_t = arg['ne-t'].split() n_h = arg['ne-h'].split() score = arg['score'] w_t.extend(w_h) l_t.extend(l_h) p_t.extend(p_h) #TODO quit stop words for j, word in enumerate(w_t): word_arg = '>token_word\n"%s" %s "%s"' % ( type, j, self.clean_str(word)) lemma_arg = '>token_lemma\n"%s" %s "%s"' % ( type, j, self.clean_str(l_t[j])) pos_arg = '>token_pos\n"%s" %s "%s"' % ( type, j, self.clean_str(p_t[j])) lin = Lin() sim_words = lin.expand_w(word) wn = WNTools() hyps = wn.get_mfs_hypernyms((l_t[j], p_t[j])) self.a_predicates.append(word_arg) self.a_predicates.append(lemma_arg) self.a_predicates.append(pos_arg) for j, sim_word in enumerate(sim_words): lin_arg = '>token_lin\n"%s" %s "%s"' % ( type, j, self.clean_str(sim_word)) self.a_predicates.append(lin_arg) for key, tree in hyps: j = 0 for category in tree[:n]: hyp_arg = '>token_wn\n"%s" %s "%s"' % ( type, j, self.clean_str(category)) j += 1 self.a_predicates.append(hyp_arg) arg_id = '>arg\n"%s" "%s%s%s" %s' % ( type, self.clean_str(vt), sep, self.clean_str(vh), id) self.a_predicates.append(arg_id) return self.a_predicates