Ejemplo n.º 1
0
    def args_syn_wn(self, lemmas_t, lemmas_h, score, id, type):
        result = 0
        wn = WNTools()
        tool = RTETools()
        
        tool.set_tokens(lemmas_t.split())
        tool.quit_punct()
        lemmas_t = tool.quit_sw()
        
        tool.set_tokens(lemmas_h.split())
        tool.quit_punct()
        lemmas_h = tool.quit_sw()

        (diff_ht, diff_th) = self.diff(lemmas_t, lemmas_h)
        expand_diff_ht = wn.expand_bow_syns(list(diff_ht))
        expand_diff_th = wn.expand_bow_syns(list(diff_th))
        
        if len(expand_diff_ht) != 0 and len(expand_diff_th) !=0:
            sim = SetMetrics(expand_diff_ht, expand_diff_th)
            if sim.cosine() > 0:
                result = 1
        else:
            result = 1

        predicate = 'ArgRelSyn(%s, %s, %s)'%(id, type, result)

        return predicate
Ejemplo n.º 2
0
 def __init__(self,
              frames_text={},
              tokens_text=[],
              chunks_text=[],
              frames_hypo={},
              tokens_hypo=[],
              chunks_hypo=[],
              e_type='simple',
              verbose=1,
              entailment=-1):
     self.e_type = e_type
     self.srl_t = SRLTools()
     self.srl_h = SRLTools()
     self.verb_net = VerbMetrics()
     self.arg_sim = SetMetrics()
     self.lin = Lin()
     self.wn = WNTools()
     self.frames_hypo = frames_text
     self.frames_text = frames_hypo
     self.tokens_hypo = tokens_hypo
     self.tokens_text = tokens_text
     self.chunks_hypo = chunks_hypo
     self.chunks_text = chunks_text
     self.args_text = {}
     self.args_hypo = {}
     self.verbs_text = []
     self.verbs_hypo = []
     self.edit_score = 0
     self.verb_score = 0
     self.chunk_score = 0
     self.oper_type = {'del': 0, 'in': 0, 'sub': 0}
     self.verbose = verbose
     self.entailment = entailment
     return
Ejemplo n.º 3
0
    def arg_proc(self, id, point, sep):
        self.a_predicates = []
        if 'verbs' in point:
            verbs = point['verbs']
            for i, verb in verbs.iteritems():
                i = '%s.%s'%(id, i)
                (vt, vh) = verb['tokens']
                if 'ARG' in verb:
                    args = verb['ARG']
                    for type,arg in args.items():
                        w_t = arg['wordform-t'].split()
                        w_h = arg['wordform-h'].split()
                        l_t = arg['lemma-t'].split()
                        l_h = arg['lemma-h'].split()
                        p_t = arg['pos-t'].split()
                        p_h = arg['pos-h'].split()
                        c_t = arg['chunk-t'].split()
                        c_h = arg['chunk-h'].split()
                        n_t = arg['ne-t'].split()
                        n_h = arg['ne-h'].split()
                        score = arg['score']
                        w_t.extend(w_h)
                        l_t.extend(l_h)
                        p_t.extend(p_h)
                        #TODO quit stop words

                        for j, word in enumerate(w_t):
                            word_arg = 'Token(%s, %s, "%s")'%(type, id,  self.clean_str(word))
                            lemma_arg = 'Token(%s, %s, "%s")'%(type, id, self.clean_str(l_t[j]))
                            pos_arg = 'Token(%s, %s, "%s")'%(type, id, self.clean_str(p_t[j]))
                            lin = Lin()
                            sim_words = lin.expand_w(word)
                            wn = WNTools()
                            hyps = wn.get_mfs_hypernyms((l_t[j], p_t[j]))

                            self.a_predicates.append(word_arg)
                            self.a_predicates.append(lemma_arg)
                            self.a_predicates.append(pos_arg)
                            
                            for sim_word in sim_words:
                                lin_arg = 'Token(%s, %s, "%s")'%(type, id, self.clean_str(sim_word))
                                self.a_predicates.append(lin_arg)

                            for key, tree in hyps:
                                t = 0
                                for category in tree:
                                    if t >= 3:
                                        break
                                    hyp_arg = 'Token(%s, %s, "%s")'%(type, id, self.clean_str(category))
                                    self.a_predicates.append(hyp_arg)
                                    t +=1

                        arg_id = 'ARG(%s, %s, %s)'%(type, i, id)
                        self.a_predicates.append(arg_id)
        return self.a_predicates
Ejemplo n.º 4
0
 def args_hyp_wn(self, lemmas_t, pos_t, lemmas_h, pos_h, score, id, type):
     result = 0
     wn = WNTools()
     tool = RTETools()
     tool.set_tokens(lemmas_t)
     tool.quit_punct()
     lemmas_t = tool.quit_sw()
     tool.set_tokens(lemmas_h)
     tool.quit_punct()
     lemmas_h = tool.quit_sw()
     (diff_ht, diff_th) = self.diff(lemmas_t, lemmas_h)
     #TODO
     predicate = 'HypRelSyn(%s, %s)'%(id, type, result)
     return predicate
Ejemplo n.º 5
0
 def __init__(self, frames_text = {}, tokens_text = [],  frames_hypo = {}, tokens_hypo = [], sim_type = 'Lin', verbose = 1):
     self.srl_t = SRLTools()
     self.srl_h = SRLTools()
     self.verb_net = VerbMetrics()
     self.arg_sim = SetMetrics()
     self.lin = Lin()
     self.wn = WNTools()
     self.frames_hypo = frames_text
     self.frames_text = frames_hypo
     self.tokens_hypo = tokens_hypo
     self.tokens_text = tokens_text
     self.args_text = {}
     self.args_hypo = {}
     self.verbs_text = []
     self.verbs_hypo = []
     self.tine_score = 0
     self.verb_score = 0
     self.arg_score = 0
     self.sim_type = sim_type
     self.verbose = verbose
     #self.pos_text = pos_text
     #self.pos_hypo = pos_hypo
     return
Ejemplo n.º 6
0
def main(args):
    pickle_file = args[0]
    print 'loading file:',pickle_file
    with open(pickle_file, 'r') as pf:
        pairs = pickle.load(pf)
        k = 0        
        for pair in pairs:
            print 'id:', pair.get_id()
            print 's1:', pair.get_text()
            print 's2:', pair.get_hypo()
            print 'features:', pair.get_features_text_type()
            print 'set-metrics, cos test'
            lemmas_text = pair.get_feature_text('lemmas')
            lemmas_hypo = pair.get_feature_hypo('lemmas')
            set_th = SetMetrics(lemmas_text, lemmas_hypo)
            cos = set_th.cosine()
            #print cos
            print 'SRL tools'
            frames_text = pair.get_feature_text('frames')
            print frames_text
            print '################'
            srl = SRLTools(lemmas_text, frames_text)
            word_to_frame = srl.get_words_frame()
            print word_to_frame
            print '################'
            print srl.get_verbs()
            print '################'
            
            #print 'verb-metrics, '
            pos_text = pair.get_feature_text('pos')
            pos_hypo = pair.get_feature_hypo('pos')
            verbs = VerbMetrics()
            lin = Lin()
            vectors = VectorMetrics()
            hyper = WNTools()
            for i, pos_tuple_t in enumerate(pos_text):
                (token, pos_t) = pos_tuple_t
                if pos_t.startswith('V'):
                    for j, pos_tuple_h in enumerate(pos_hypo):
                        (token, pos_h) = pos_tuple_h
                        if pos_h.startswith('V'):                            
                            verbs.set_text_verb(lemmas_text[i])
                            verbs.set_hypo_verb(lemmas_hypo[j])
                            #print 'verbs test t:%s h:%s'%(lemmas_text[i], lemmas_hypo[j])
                            vn_isec = verbs.vn_isec()
                            #print 'verb net isec: %d'%vn_isec
                            #print 'lin(%s):'%lemmas_text[i], '\n', lin.n_similar_words(lemmas_text[i])
                            #print 'lin(%s):'%lemmas_hypo[j], '\n', lin.n_similar_words(lemmas_hypo[j])
                            t_sim = lin.n_similar_words(lemmas_text[i])
                            h_sim = lin.n_similar_words(lemmas_hypo[j])
                            t_score = [float(score) for word,score in t_sim]
                            h_score = [float(score) for word,score in h_sim]
                            vectors.set_vectors(t_score, h_score)
                            #print 'cos_vect: ', vectors.cosine()
                        elif pos_h.startswith('N'):
                            #print 'wn test hypernyms'
                            trees = hyper.get_mfs_hypernyms((lemmas_hypo[j], pos_h))
                            #print trees


            k += 1
            if k >= 10:
                break
        pf.close
    return
Ejemplo n.º 7
0
    def arg_proc(self, id, point, sep):
        self.a_predicates = []
        if 'verbs' in point:
            verbs = point['verbs']
            for i, verb in verbs.iteritems():
                (vt, vh) = verb['tokens']
                if 'ARG' in verb:
                    args = verb['ARG']
                    for type,arg in args.items():
                        w_t = arg['wordform-t']
                        w_h = arg['wordform-h']
                        l_t = arg['lemma-t']
                        l_h = arg['lemma-h']
                        p_t = arg['pos-t']
                        p_h = arg['pos-h']
                        c_t = arg['chunk-t']
                        c_h = arg['chunk-h']
                        n_t = arg['ne-t']
                        n_h = arg['ne-h']
                        score = arg['score']

                        
                        token_arg = 'TokenArg(%s, %s, "%s %s %s")'%(id, type, self.clean_str(w_t), sep, self.clean_str(w_h))
                        lemma_arg = 'LemmaArg(%s, %s,"%s %s %s")'%(id, type, self.clean_str(l_t), sep, self.clean_str(l_h))
                        pos_arg = 'PosArg(%s, %s, "%s %s %s")'%(id, type, self.clean_str(p_t), sep, self.clean_str(p_h))
                        #expand content words Lin
                        sw_tool = RTETools()
                        sw_tool.set_tokens(l_t.split())
                        sw_tool.quit_sw()
                        l_tsw = sw_tool.quit_punct()
                        sw_tool.set_tokens(l_h.split())
                        sw_tool.quit_sw()
                        l_hsw = sw_tool.quit_punct()

                        content_arg = 'ContArg(%s, %s, "%s %s %s")'%(id, type, self.clean_str(' '.join(l_tsw)), 
                                sep, self.clean_str(' '.join(l_hsw)))

                        sim_diff = self.sim_dif(l_tsw, l_hsw)
                        diff_arg = 'DiffArg(%s, %s, "%s")'%(id, type, self.clean_str(' '.join(sim_diff)))

                        if score >= 0.4:
                            score_arg = 'SimArg(%s, %s, 1)'%(id, type)
                        else: #TODO find threshold from data statistics mf
                            score_arg = 'SimArg(%s, %s, 0)'%(id, type)


                        lin = Lin()
                        bow_t = lin.expand_bow(l_tsw)
                        bow_h = lin.expand_bow(l_hsw)
                        lin_arg = 'LinArg(%s, %s, "%s %s %s")'%(id, type, self.clean_str(' '.join(bow_t)), 
                                sep, self.clean_str(' '.join(bow_h)))

                        wn = WNTools()
                        wns_t = zip(l_t.split(), p_t.split())
                        wns_h = zip(l_h.split(), p_h.split())
                        bow_wnt = wn.expand_bow_tree(wns_t)
                        bow_wnh = wn.expand_bow_tree(wns_h)
                        wn_arg = 'WnArg(%s, %s, "%s %s %s")'%(id, type, self.clean_str(' '.join(bow_wnt)), 
                                sep, self.clean_str(' '.join(bow_wnh)))

                        rel_syn_args = self.args_syn_wn(l_t, l_h, score, id, type)                


                        self.a_predicates.append(token_arg)
                        self.a_predicates.append(score_arg)
                        self.a_predicates.append(lemma_arg)
                        self.a_predicates.append(rel_syn_args)
                        self.a_predicates.append(content_arg)
                        self.a_predicates.append(diff_arg)
                        self.a_predicates.append(pos_arg)
                        self.a_predicates.append(lin_arg)
                        self.a_predicates.append(wn_arg)
        return self.a_predicates
Ejemplo n.º 8
0
    def arg_proc(self, id, point, sep):
        self.a_predicates = []
        n = 3  #levels in wn tree
        if 'verbs' in point:
            verbs = point['verbs']
            for i, verb in verbs.iteritems():
                i = '%s.%s' % (id, i)
                (vt, vh) = verb['tokens']
                if 'ARG' in verb:
                    args = verb['ARG']
                    for type, arg in args.items():
                        w_t = arg['wordform-t'].split()
                        w_h = arg['wordform-h'].split()
                        l_t = arg['lemma-t'].split()
                        l_h = arg['lemma-h'].split()
                        p_t = arg['pos-t'].split()
                        p_h = arg['pos-h'].split()
                        c_t = arg['chunk-t'].split()
                        c_h = arg['chunk-h'].split()
                        n_t = arg['ne-t'].split()
                        n_h = arg['ne-h'].split()
                        score = arg['score']
                        w_t.extend(w_h)
                        l_t.extend(l_h)
                        p_t.extend(p_h)
                        #TODO quit stop words

                        for j, word in enumerate(w_t):
                            word_arg = '>token_word\n"%s" %s "%s"' % (
                                type, j, self.clean_str(word))
                            lemma_arg = '>token_lemma\n"%s" %s "%s"' % (
                                type, j, self.clean_str(l_t[j]))
                            pos_arg = '>token_pos\n"%s" %s "%s"' % (
                                type, j, self.clean_str(p_t[j]))
                            lin = Lin()
                            sim_words = lin.expand_w(word)
                            wn = WNTools()
                            hyps = wn.get_mfs_hypernyms((l_t[j], p_t[j]))

                            self.a_predicates.append(word_arg)
                            self.a_predicates.append(lemma_arg)
                            self.a_predicates.append(pos_arg)

                            for j, sim_word in enumerate(sim_words):
                                lin_arg = '>token_lin\n"%s" %s "%s"' % (
                                    type, j, self.clean_str(sim_word))
                                self.a_predicates.append(lin_arg)

                            for key, tree in hyps:
                                j = 0
                                for category in tree[:n]:
                                    hyp_arg = '>token_wn\n"%s" %s "%s"' % (
                                        type, j, self.clean_str(category))
                                    j += 1
                                    self.a_predicates.append(hyp_arg)

                        arg_id = '>arg\n"%s" "%s%s%s" %s' % (
                            type, self.clean_str(vt), sep, self.clean_str(vh),
                            id)
                        self.a_predicates.append(arg_id)
        return self.a_predicates