Ejemplo n.º 1
0
    def args_syn_wn(self, lemmas_t, lemmas_h, score, id, type):
        result = 0
        wn = WNTools()
        tool = RTETools()
        
        tool.set_tokens(lemmas_t.split())
        tool.quit_punct()
        lemmas_t = tool.quit_sw()
        
        tool.set_tokens(lemmas_h.split())
        tool.quit_punct()
        lemmas_h = tool.quit_sw()

        (diff_ht, diff_th) = self.diff(lemmas_t, lemmas_h)
        expand_diff_ht = wn.expand_bow_syns(list(diff_ht))
        expand_diff_th = wn.expand_bow_syns(list(diff_th))
        
        if len(expand_diff_ht) != 0 and len(expand_diff_th) !=0:
            sim = SetMetrics(expand_diff_ht, expand_diff_th)
            if sim.cosine() > 0:
                result = 1
        else:
            result = 1

        predicate = 'ArgRelSyn(%s, %s, %s)'%(id, type, result)

        return predicate
Ejemplo n.º 2
0
    def args_syn_wn(self, lemmas_t, lemmas_h, score, id, type):
        result = 0
        wn = WNTools()
        tool = RTETools()
        
        tool.set_tokens(lemmas_t.split())
        tool.quit_punct()
        lemmas_t = tool.quit_sw()
        
        tool.set_tokens(lemmas_h.split())
        tool.quit_punct()
        lemmas_h = tool.quit_sw()

        (diff_ht, diff_th) = self.diff(lemmas_t, lemmas_h)
        expand_diff_ht = wn.expand_bow_syns(list(diff_ht))
        expand_diff_th = wn.expand_bow_syns(list(diff_th))
        
        if len(expand_diff_ht) != 0 and len(expand_diff_th) !=0:
            sim = SetMetrics(expand_diff_ht, expand_diff_th)
            if sim.cosine() > 0:
                result = 1
        else:
            result = 1

        predicate = '>arg_relsyn\n%s %s %s'%(id, type, result)

        return predicate
Ejemplo n.º 3
0
def extractFeatures(options):

    with open(options.pickle_file, 'r') as pf:
        pairs = pickle.load(pf)
        metrics = SetMetrics()
        tool = RTETools()
        feature_values = {}
        i = 0
        o = open(options.output_file, 'w')
        for pair in pairs:
            id = pair.get_id()
            value = pair.get_value()
            lemmas_text = pair.get_feature_text('lemmas')
            lemmas_hypo = pair.get_feature_hypo('lemmas')
            tool.set_tokens(lemmas_text)
            lemmas_text = tool.quit_sw()
            lemmas_text = tool.quit_punct()

            pos_text = pair.get_feature_text('pos')
            pos_hypo = pair.get_feature_hypo('pos')

            tool.set_tokens(lemmas_hypo)
            lemmas_hypo = tool.quit_sw()
            lemmas_hypo = tool.quit_punct()

            metrics.set_text(lemmas_text)
            metrics.set_hypo(lemmas_hypo)
            cos = metrics.cosine()
            print >> o, '>>'
            i = 0
            for lemma_text in lemmas_text:
                print >> o, '>word_t'
                print >> o, '%s %s "%s"' % (id, i, lemma_text)
                i += 1
            i = 0
            for lemma_hypo in lemmas_hypo:
                print >> o, '>word_h'
                print >> o, '%s %s "%s"' % (id, i, lemma_hypo)
                i += 1
            i = 0
            for l, pos_text in pos_text:
                print >> o, '>pos_t'
                print >> o, '%s %s "%s"' % (id, i, pos_text)
                i += 1
            i = 0
            for l, pos_hypo in pos_hypo:
                print >> o, '>pos_h'
                print >> o, '%s %s "%s"' % (id, i, pos_hypo)
                i += 1
            print >> o, '>cosine'
            print >> o, '%s %s' % (id, cos)
            print >> o, '>entailment'
            print >> o, '%s "%s"\n' % (id, value)
Ejemplo n.º 4
0
def extractFeatures(options):

    with open(options.pickle_file, 'r') as pf:
        pairs = pickle.load(pf)
        metrics = SetMetrics()
        tool = RTETools()
        feature_values = {}
        i = 0
        o = open(options.output_file, 'w')
        for pair in pairs:
            id = pair.get_id()
            value = pair.get_value()
            lemmas_text = pair.get_feature_text('lemmas')
            lemmas_hypo = pair.get_feature_hypo('lemmas')
            tool.set_tokens(lemmas_text)
            lemmas_text = tool.quit_sw()
            lemmas_text = tool.quit_punct()

            pos_text = pair.get_feature_text('pos')
            pos_hypo = pair.get_feature_hypo('pos')
            
            tool.set_tokens(lemmas_hypo)
            lemmas_hypo = tool.quit_sw()
            lemmas_hypo = tool.quit_punct()
            
            metrics.set_text(lemmas_text)
            metrics.set_hypo(lemmas_hypo)
            cos = metrics.cosine()
            print >>o, '>>'
            i = 0
            for lemma_text in lemmas_text:
                print >>o, '>word_t'
                print >>o, '%s %s "%s"'%(id, i, lemma_text)
                i += 1
            i = 0
            for lemma_hypo in lemmas_hypo:
                print >>o, '>word_h'
                print >>o, '%s %s "%s"'%(id, i, lemma_hypo)
                i += 1
            i = 0
            for l,pos_text in pos_text:
                print >>o, '>pos_t'
                print >>o, '%s %s "%s"'%(id, i, pos_text)
                i += 1
            i = 0
            for l,pos_hypo in pos_hypo:
                print >>o, '>pos_h'
                print >>o, '%s %s "%s"'%(id, i, pos_hypo)
                i += 1
            print >>o, '>cosine'
            print >>o, '%s %s'%(id, cos)
            print >>o, '>entailment'
            print >>o, '%s "%s"\n'%(id, value)
Ejemplo n.º 5
0
 def args_direct(self, lemmas_t, pos_t, lemmas_h, pos_h, score, id, type):
     result = 0
     n = NounTools()
     tool = RTETools()
     tool.set_tokens(lemmas_t)
     tool.quit_punct()
     lemmas_t = tool.quit_sw()
     tool.set_tokens(lemmas_h)
     tool.quit_punct()
     lemmas_h = tool.quit_sw()
     (diff_ht, diff_th) = self.diff(lemmas_t, lemmas_h)
     sim_th = self.sim(lemmas_t, lemmas_h)
     return result
Ejemplo n.º 6
0
 def args_direct(self, lemmas_t, pos_t, lemmas_h, pos_h, score, id, type):
     result = 0
     n = NounTools()
     tool = RTETools()
     tool.set_tokens(lemmas_t)
     tool.quit_punct()
     lemmas_t = tool.quit_sw()
     tool.set_tokens(lemmas_h)
     tool.quit_punct()
     lemmas_h = tool.quit_sw()
     (diff_ht, diff_th) = self.diff(lemmas_t, lemmas_h)
     sim_th = self.sim(lemmas_t, lemmas_h)
     return result
Ejemplo n.º 7
0
 def args_hyp_wn(self, lemmas_t, pos_t, lemmas_h, pos_h, score, id, type):
     result = 0
     wn = WNTools()
     tool = RTETools()
     tool.set_tokens(lemmas_t)
     tool.quit_punct()
     lemmas_t = tool.quit_sw()
     tool.set_tokens(lemmas_h)
     tool.quit_punct()
     lemmas_h = tool.quit_sw()
     (diff_ht, diff_th) = self.diff(lemmas_t, lemmas_h)
     #TODO
     predicate = '>hyp_relsyn\n%s %s'%(id, type, result)
     return predicate
Ejemplo n.º 8
0
 def args_hyp_wn(self, lemmas_t, pos_t, lemmas_h, pos_h, score, id, type):
     result = 0
     wn = WNTools()
     tool = RTETools()
     tool.set_tokens(lemmas_t)
     tool.quit_punct()
     lemmas_t = tool.quit_sw()
     tool.set_tokens(lemmas_h)
     tool.quit_punct()
     lemmas_h = tool.quit_sw()
     (diff_ht, diff_th) = self.diff(lemmas_t, lemmas_h)
     #TODO
     predicate = 'HypRelSyn(%s, %s)'%(id, type, result)
     return predicate
Ejemplo n.º 9
0
    def baseline(self, id_bo):
        self.baseline_predicates = []
        lin = Lin()
        tools = RTETools()
        if id_bo in self.pairs:
            value = self.pairs[id_bo].get_value()
            lemmas_text = self.pairs[id_bo].get_feature_text('lemmas')
            tools.set_tokens(lemmas_text)
            lemmas_text = tools.quit_sw()
            lemmas_text = tools.quit_punct()
            lemmas_hypo = self.pairs[id_bo].get_feature_hypo('lemmas')
            tools.set_tokens(lemmas_hypo)
            lemmas_hypo = tools.quit_sw()
            lemmas_hypo = tools.quit_punct()
            for lemma_t in lemmas_text:
                for lemma_h in lemmas_hypo:
                    combo1 = 'Combo(%s, "%s|||%s")'%(id_bo, self.clean_str(lemma_t), self.clean_str(lemma_h))
                    sim_t = lin.n_similar_words(lemma_t, 10)
                    sim_h = lin.n_similar_words(lemma_h, 10)
                    tmp_score_t = []
                    for w,s in sim_t:
                        tmp_score_t.append(w)
                    tmp_score_h = []
                    for w,s in sim_h:
                        tmp_score_h.append(w)

                    vector = SetMetrics(tmp_score_t, tmp_score_h)
                    cos = vector.cosine()
                    combo2 = 'ComboLin(%s, %s)'%(id_bo, cos)
                    wordcpm = NounTools(lemma_t, lemma_h)
                    direct = wordcpm.direct()
                    combo3 = 'Direct(%s, %s)'%(id_bo, direct)
                    self.baseline_predicates.append(combo1)
                    #self.baseline_predicates.append(combo2)
                    self.baseline_predicates.append(combo3)
                    
        return self.baseline_predicates
Ejemplo n.º 10
0
    def arg_proc(self, id, point, sep):
        self.a_predicates = []
        if 'verbs' in point:
            verbs = point['verbs']
            for i, verb in verbs.iteritems():
                (vt, vh) = verb['tokens']
                if 'ARG' in verb:
                    args = verb['ARG']
                    for type,arg in args.items():
                        w_t = arg['wordform-t']
                        w_h = arg['wordform-h']
                        l_t = arg['lemma-t']
                        l_h = arg['lemma-h']
                        p_t = arg['pos-t']
                        p_h = arg['pos-h']
                        c_t = arg['chunk-t']
                        c_h = arg['chunk-h']
                        n_t = arg['ne-t']
                        n_h = arg['ne-h']
                        score = arg['score']

                        
                        token_arg = '>token_arg\n%s %s "%s%s%s"'%(id, type, self.clean_str(w_t), sep, self.clean_str(w_h))
                        lemma_arg = '>lemma_arg\n%s %s "%s%s%s"'%(id, type, self.clean_str(l_t), sep, self.clean_str(l_h))
                        pos_arg = '>pos_arg\n%s %s "%s%s%s"'%(id, type, self.clean_str(p_t), sep, self.clean_str(p_h))
                        #expand content words Lin
                        sw_tool = RTETools()
                        sw_tool.set_tokens(l_t.split())
                        sw_tool.quit_sw()
                        l_tsw = sw_tool.quit_punct()
                        sw_tool.set_tokens(l_h.split())
                        sw_tool.quit_sw()
                        l_hsw = sw_tool.quit_punct()

                        content_arg = '>cont_arg\n%s %s "%s%s%s"'%(id, type, self.clean_str(' '.join(l_tsw)), 
                                sep, self.clean_str(' '.join(l_hsw)))

                        sim_diff = self.sim_dif(l_tsw, l_hsw)
                        diff_arg = '>diff_arg\n%s %s "%s"'%(id, type, self.clean_str(' '.join(sim_diff)))

                        if score >= 0.4:
                            score_arg = '>sim_arg\n%s %s 1'%(id, type)
                        else: #TODO find threshold from data statistics mf
                            score_arg = '>sim_arg\n%s %s 0'%(id, type)


                        lin = Lin()
                        bow_t = lin.expand_bow(l_tsw)
                        bow_h = lin.expand_bow(l_hsw)
                        lin_arg = '>lin_arg\n%s %s "%s%s%s"'%(id, type, self.clean_str(' '.join(bow_t)), 
                                sep, self.clean_str(' '.join(bow_h)))

                        wn = WNTools()
                        wns_t = zip(l_t.split(), p_t.split())
                        wns_h = zip(l_h.split(), p_h.split())
                        bow_wnt = wn.expand_bow_tree(wns_t)
                        bow_wnh = wn.expand_bow_tree(wns_h)
                        wn_arg = '>wn_arg\n%s %s "%s%s%s"'%(id, type, self.clean_str(' '.join(bow_wnt)), 
                                sep, self.clean_str(' '.join(bow_wnh)))

                        rel_syn_args = self.args_syn_wn(l_t, l_h, score, id, type)                


                        self.a_predicates.append(token_arg)
                        self.a_predicates.append(score_arg)
                        self.a_predicates.append(lemma_arg)
                        self.a_predicates.append(rel_syn_args)
                        self.a_predicates.append(content_arg)
                        self.a_predicates.append(diff_arg)
                        self.a_predicates.append(pos_arg)
                        self.a_predicates.append(lin_arg)
                        self.a_predicates.append(wn_arg)
        return self.a_predicates
Ejemplo n.º 11
0
    def arg_proc(self, id, point, sep):
        self.a_predicates = []
        if 'verbs' in point:
            verbs = point['verbs']
            for i, verb in verbs.iteritems():
                (vt, vh) = verb['tokens']
                if 'ARG' in verb:
                    args = verb['ARG']
                    for type,arg in args.items():
                        w_t = arg['wordform-t']
                        w_h = arg['wordform-h']
                        l_t = arg['lemma-t']
                        l_h = arg['lemma-h']
                        p_t = arg['pos-t']
                        p_h = arg['pos-h']
                        c_t = arg['chunk-t']
                        c_h = arg['chunk-h']
                        n_t = arg['ne-t']
                        n_h = arg['ne-h']
                        score = arg['score']

                        
                        token_arg = 'TokenArg(%s, %s, "%s %s %s")'%(id, type, self.clean_str(w_t), sep, self.clean_str(w_h))
                        lemma_arg = 'LemmaArg(%s, %s,"%s %s %s")'%(id, type, self.clean_str(l_t), sep, self.clean_str(l_h))
                        pos_arg = 'PosArg(%s, %s, "%s %s %s")'%(id, type, self.clean_str(p_t), sep, self.clean_str(p_h))
                        #expand content words Lin
                        sw_tool = RTETools()
                        sw_tool.set_tokens(l_t.split())
                        sw_tool.quit_sw()
                        l_tsw = sw_tool.quit_punct()
                        sw_tool.set_tokens(l_h.split())
                        sw_tool.quit_sw()
                        l_hsw = sw_tool.quit_punct()

                        content_arg = 'ContArg(%s, %s, "%s %s %s")'%(id, type, self.clean_str(' '.join(l_tsw)), 
                                sep, self.clean_str(' '.join(l_hsw)))

                        sim_diff = self.sim_dif(l_tsw, l_hsw)
                        diff_arg = 'DiffArg(%s, %s, "%s")'%(id, type, self.clean_str(' '.join(sim_diff)))

                        if score >= 0.4:
                            score_arg = 'SimArg(%s, %s, 1)'%(id, type)
                        else: #TODO find threshold from data statistics mf
                            score_arg = 'SimArg(%s, %s, 0)'%(id, type)


                        lin = Lin()
                        bow_t = lin.expand_bow(l_tsw)
                        bow_h = lin.expand_bow(l_hsw)
                        lin_arg = 'LinArg(%s, %s, "%s %s %s")'%(id, type, self.clean_str(' '.join(bow_t)), 
                                sep, self.clean_str(' '.join(bow_h)))

                        wn = WNTools()
                        wns_t = zip(l_t.split(), p_t.split())
                        wns_h = zip(l_h.split(), p_h.split())
                        bow_wnt = wn.expand_bow_tree(wns_t)
                        bow_wnh = wn.expand_bow_tree(wns_h)
                        wn_arg = 'WnArg(%s, %s, "%s %s %s")'%(id, type, self.clean_str(' '.join(bow_wnt)), 
                                sep, self.clean_str(' '.join(bow_wnh)))

                        rel_syn_args = self.args_syn_wn(l_t, l_h, score, id, type)                


                        self.a_predicates.append(token_arg)
                        self.a_predicates.append(score_arg)
                        self.a_predicates.append(lemma_arg)
                        self.a_predicates.append(rel_syn_args)
                        self.a_predicates.append(content_arg)
                        self.a_predicates.append(diff_arg)
                        self.a_predicates.append(pos_arg)
                        self.a_predicates.append(lin_arg)
                        self.a_predicates.append(wn_arg)
        return self.a_predicates