Esempio n. 1
0
    def args_syn_wn(self, lemmas_t, lemmas_h, score, id, type):
        result = 0
        wn = WNTools()
        tool = RTETools()
        
        tool.set_tokens(lemmas_t.split())
        tool.quit_punct()
        lemmas_t = tool.quit_sw()
        
        tool.set_tokens(lemmas_h.split())
        tool.quit_punct()
        lemmas_h = tool.quit_sw()

        (diff_ht, diff_th) = self.diff(lemmas_t, lemmas_h)
        expand_diff_ht = wn.expand_bow_syns(list(diff_ht))
        expand_diff_th = wn.expand_bow_syns(list(diff_th))
        
        if len(expand_diff_ht) != 0 and len(expand_diff_th) !=0:
            sim = SetMetrics(expand_diff_ht, expand_diff_th)
            if sim.cosine() > 0:
                result = 1
        else:
            result = 1

        predicate = 'ArgRelSyn(%s, %s, %s)'%(id, type, result)

        return predicate
Esempio n. 2
0
    def args_syn_wn(self, lemmas_t, lemmas_h, score, id, type):
        result = 0
        wn = WNTools()
        tool = RTETools()
        
        tool.set_tokens(lemmas_t.split())
        tool.quit_punct()
        lemmas_t = tool.quit_sw()
        
        tool.set_tokens(lemmas_h.split())
        tool.quit_punct()
        lemmas_h = tool.quit_sw()

        (diff_ht, diff_th) = self.diff(lemmas_t, lemmas_h)
        expand_diff_ht = wn.expand_bow_syns(list(diff_ht))
        expand_diff_th = wn.expand_bow_syns(list(diff_th))
        
        if len(expand_diff_ht) != 0 and len(expand_diff_th) !=0:
            sim = SetMetrics(expand_diff_ht, expand_diff_th)
            if sim.cosine() > 0:
                result = 1
        else:
            result = 1

        predicate = '>arg_relsyn\n%s %s %s'%(id, type, result)

        return predicate
Esempio n. 3
0
def extractFeatures(options):

    with open(options.pickle_file, 'r') as pf:
        pairs = pickle.load(pf)
        metrics = SetMetrics()
        tool = RTETools()
        feature_values = {}
        i = 0
        o = open(options.output_file, 'w')
        for pair in pairs:
            id = pair.get_id()
            value = pair.get_value()
            lemmas_text = pair.get_feature_text('lemmas')
            lemmas_hypo = pair.get_feature_hypo('lemmas')
            tool.set_tokens(lemmas_text)
            lemmas_text = tool.quit_sw()
            lemmas_text = tool.quit_punct()

            pos_text = pair.get_feature_text('pos')
            pos_hypo = pair.get_feature_hypo('pos')

            tool.set_tokens(lemmas_hypo)
            lemmas_hypo = tool.quit_sw()
            lemmas_hypo = tool.quit_punct()

            metrics.set_text(lemmas_text)
            metrics.set_hypo(lemmas_hypo)
            cos = metrics.cosine()
            print >> o, '>>'
            i = 0
            for lemma_text in lemmas_text:
                print >> o, '>word_t'
                print >> o, '%s %s "%s"' % (id, i, lemma_text)
                i += 1
            i = 0
            for lemma_hypo in lemmas_hypo:
                print >> o, '>word_h'
                print >> o, '%s %s "%s"' % (id, i, lemma_hypo)
                i += 1
            i = 0
            for l, pos_text in pos_text:
                print >> o, '>pos_t'
                print >> o, '%s %s "%s"' % (id, i, pos_text)
                i += 1
            i = 0
            for l, pos_hypo in pos_hypo:
                print >> o, '>pos_h'
                print >> o, '%s %s "%s"' % (id, i, pos_hypo)
                i += 1
            print >> o, '>cosine'
            print >> o, '%s %s' % (id, cos)
            print >> o, '>entailment'
            print >> o, '%s "%s"\n' % (id, value)
Esempio n. 4
0
def extractFeatures(options):

    with open(options.pickle_file, 'r') as pf:
        pairs = pickle.load(pf)
        metrics = SetMetrics()
        tool = RTETools()
        feature_values = {}
        i = 0
        o = open(options.output_file, 'w')
        for pair in pairs:
            id = pair.get_id()
            value = pair.get_value()
            lemmas_text = pair.get_feature_text('lemmas')
            lemmas_hypo = pair.get_feature_hypo('lemmas')
            tool.set_tokens(lemmas_text)
            lemmas_text = tool.quit_sw()
            lemmas_text = tool.quit_punct()

            pos_text = pair.get_feature_text('pos')
            pos_hypo = pair.get_feature_hypo('pos')
            
            tool.set_tokens(lemmas_hypo)
            lemmas_hypo = tool.quit_sw()
            lemmas_hypo = tool.quit_punct()
            
            metrics.set_text(lemmas_text)
            metrics.set_hypo(lemmas_hypo)
            cos = metrics.cosine()
            print >>o, '>>'
            i = 0
            for lemma_text in lemmas_text:
                print >>o, '>word_t'
                print >>o, '%s %s "%s"'%(id, i, lemma_text)
                i += 1
            i = 0
            for lemma_hypo in lemmas_hypo:
                print >>o, '>word_h'
                print >>o, '%s %s "%s"'%(id, i, lemma_hypo)
                i += 1
            i = 0
            for l,pos_text in pos_text:
                print >>o, '>pos_t'
                print >>o, '%s %s "%s"'%(id, i, pos_text)
                i += 1
            i = 0
            for l,pos_hypo in pos_hypo:
                print >>o, '>pos_h'
                print >>o, '%s %s "%s"'%(id, i, pos_hypo)
                i += 1
            print >>o, '>cosine'
            print >>o, '%s %s'%(id, cos)
            print >>o, '>entailment'
            print >>o, '%s "%s"\n'%(id, value)
Esempio n. 5
0
    def baseline(self, id_bo):
        self.baseline_predicates = []
        lin = Lin()
        tools = RTETools()
        if id_bo in self.pairs:
            value = self.pairs[id_bo].get_value()
            lemmas_text = self.pairs[id_bo].get_feature_text('lemmas')
            tools.set_tokens(lemmas_text)
            lemmas_text = tools.quit_sw()
            lemmas_text = tools.quit_punct()
            lemmas_hypo = self.pairs[id_bo].get_feature_hypo('lemmas')
            tools.set_tokens(lemmas_hypo)
            lemmas_hypo = tools.quit_sw()
            lemmas_hypo = tools.quit_punct()
            for lemma_t in lemmas_text:
                for lemma_h in lemmas_hypo:
                    combo1 = 'Combo(%s, "%s|||%s")'%(id_bo, self.clean_str(lemma_t), self.clean_str(lemma_h))
                    sim_t = lin.n_similar_words(lemma_t, 10)
                    sim_h = lin.n_similar_words(lemma_h, 10)
                    tmp_score_t = []
                    for w,s in sim_t:
                        tmp_score_t.append(w)
                    tmp_score_h = []
                    for w,s in sim_h:
                        tmp_score_h.append(w)

                    vector = SetMetrics(tmp_score_t, tmp_score_h)
                    cos = vector.cosine()
                    combo2 = 'ComboLin(%s, %s)'%(id_bo, cos)
                    wordcpm = NounTools(lemma_t, lemma_h)
                    direct = wordcpm.direct()
                    combo3 = 'Direct(%s, %s)'%(id_bo, direct)
                    self.baseline_predicates.append(combo1)
                    #self.baseline_predicates.append(combo2)
                    self.baseline_predicates.append(combo3)
                    
        return self.baseline_predicates
Esempio n. 6
0
def main(args):
    pickle_file = args[0]
    print 'loading file:',pickle_file
    with open(pickle_file, 'r') as pf:
        pairs = pickle.load(pf)
        k = 0        
        for pair in pairs:
            print 'id:', pair.get_id()
            print 's1:', pair.get_text()
            print 's2:', pair.get_hypo()
            print 'features:', pair.get_features_text_type()
            print 'set-metrics, cos test'
            lemmas_text = pair.get_feature_text('lemmas')
            lemmas_hypo = pair.get_feature_hypo('lemmas')
            set_th = SetMetrics(lemmas_text, lemmas_hypo)
            cos = set_th.cosine()
            #print cos
            print 'SRL tools'
            frames_text = pair.get_feature_text('frames')
            print frames_text
            print '################'
            srl = SRLTools(lemmas_text, frames_text)
            word_to_frame = srl.get_words_frame()
            print word_to_frame
            print '################'
            print srl.get_verbs()
            print '################'
            
            #print 'verb-metrics, '
            pos_text = pair.get_feature_text('pos')
            pos_hypo = pair.get_feature_hypo('pos')
            verbs = VerbMetrics()
            lin = Lin()
            vectors = VectorMetrics()
            hyper = WNTools()
            for i, pos_tuple_t in enumerate(pos_text):
                (token, pos_t) = pos_tuple_t
                if pos_t.startswith('V'):
                    for j, pos_tuple_h in enumerate(pos_hypo):
                        (token, pos_h) = pos_tuple_h
                        if pos_h.startswith('V'):                            
                            verbs.set_text_verb(lemmas_text[i])
                            verbs.set_hypo_verb(lemmas_hypo[j])
                            #print 'verbs test t:%s h:%s'%(lemmas_text[i], lemmas_hypo[j])
                            vn_isec = verbs.vn_isec()
                            #print 'verb net isec: %d'%vn_isec
                            #print 'lin(%s):'%lemmas_text[i], '\n', lin.n_similar_words(lemmas_text[i])
                            #print 'lin(%s):'%lemmas_hypo[j], '\n', lin.n_similar_words(lemmas_hypo[j])
                            t_sim = lin.n_similar_words(lemmas_text[i])
                            h_sim = lin.n_similar_words(lemmas_hypo[j])
                            t_score = [float(score) for word,score in t_sim]
                            h_score = [float(score) for word,score in h_sim]
                            vectors.set_vectors(t_score, h_score)
                            #print 'cos_vect: ', vectors.cosine()
                        elif pos_h.startswith('N'):
                            #print 'wn test hypernyms'
                            trees = hyper.get_mfs_hypernyms((lemmas_hypo[j], pos_h))
                            #print trees


            k += 1
            if k >= 10:
                break
        pf.close
    return
Esempio n. 7
0
class TineVN:
    def __init__(self, frames_text = {}, tokens_text = [],  frames_hypo = {}, tokens_hypo = [], sim_type = 'Lin', verbose = 1):
        self.srl_t = SRLTools()
        self.srl_h = SRLTools()
        self.verb_net = VerbMetrics()
        self.arg_sim = SetMetrics()
        self.lin = Lin()
        self.wn = WNTools()
        self.frames_hypo = frames_text
        self.frames_text = frames_hypo
        self.tokens_hypo = tokens_hypo
        self.tokens_text = tokens_text
        self.args_text = {}
        self.args_hypo = {}
        self.verbs_text = []
        self.verbs_hypo = []
        self.tine_score = 0
        self.verb_score = 0
        self.arg_score = 0
        self.sim_type = sim_type
        self.verbose = verbose
        #self.pos_text = pos_text
        #self.pos_hypo = pos_hypo
        return

    def get_tine_score(self, frames_text = {}, tokens_text = [], frames_hypo = {}, tokens_hypo = []):
        if frames_text:
            self.frames_text = frames_text
        if frames_hypo:
            self.frames_hypo = frames_hypo
        if tokens_text:
            self.tokens_text = tokens_text
        if tokens_hypo:
            self.tokens_hypo = tokens_hypo

        self.srl_t.set_frames(self.frames_text)
        self.srl_h.set_frames(self.frames_hypo)

        self.srl_t.set_tokens(self.tokens_text)
        self.srl_h.set_tokens(self.tokens_hypo)

        self.args_text = self.srl_t.get_words_frame()
        self.args_hypo = self.srl_h.get_words_frame()
        sum_verb = 0
        num_verbs_h = len(self.args_text.keys())
        self.__p_stderr('TINE VerbNet\n')
        self.__p_stderr('T: %s \n H: %s\n'%(self.args_text, self.args_hypo))
        self.__p_stderr('T: %s \n H: %s\n'%(self.args_text.keys(), self.args_hypo.keys()))

        for verb_t, args_t in self.args_text.items():
            for verb_h, args_h in self.args_hypo.items():
                sim_verbs = self.__simVerbs(verb_t, verb_h)
                if sim_verbs == 1:
                    self.__p_stderr('verbs(%s, %s)\n'%(verb_t, verb_h))
                    args_score = self.__simArgs(args_t, args_h)
                    sum_verb += args_score                    
        self.tine_score = float(sum_verb) / num_verbs_h
        self.__p_stderr('score:%s\n'%(self.tine_score))
        return self.tine_score

    def __simVerbs(self, verb_t = '', verb_h = ''):
        if verb_t == verb_h:
            return 1
        self.verb_net.set_text_verb(verb_t)
        self.verb_net.set_hypo_verb(verb_h)
        isec = self.verb_net.vn_isec()
        if isec == 0:
            vo = self.verb_net.vo()
            return vo
        else:
            return isec
        return isec


    def __simArgs(self, args_t = [], args_h = []):
        sum_args = 0
        num_args_h = len(args_h)
        for tag_t, tokens_t in args_t:
            for tag_h, tokens_h in args_h:
                if tag_t == tag_h:
                    expand_t = []
                    expand_h = []
                    if self.sim_type == 'Lin':
                        expand_t = self.lin.expand_bow(tokens_t)
                        expand_h = self.lin.expand_bow(tokens_h)
                    elif self.sim_type == 'WN':
                        expand_t = self.wn.expand_bow_tree(tokens_t)
                        expand_h = self.wn.expand_bow_tree(tokens_h)
                    self.arg_sim.set_text(expand_t)
                    self.arg_sim.set_hypo(expand_h)
                    self.arg_score = self.arg_sim.cosine()
                    self.__p_stderr('\t[%s|%s] %s %s\n'%(tag_t, self.arg_score, expand_t, expand_h))
                    sum_args += self.arg_score
        if num_args_h == 0:
            return 0
        else:
            self.verb_score = float(sum_args) / num_args_h
            return self.verb_score

    def __p_stderr(self, text = ''):
        if self.verbose == 1:
            sys.stderr.write(text)
        return

    def get_verb_score(self):
        return self.verb_score

    def get_arg_score(self):
        return self.arg_score
Esempio n. 8
0
def extractFeatures(options):

    with open(options.pickle_file, 'r') as pf:
        pairs = pickle.load(pf)
        metrics = SetMetrics()
        meteor_h = []
        meteor_t = []
        feature_values = {}
        i = 0
        if options.sim_type:
            tine = TineVN(sim_type = options.sim_type)
        else:
            tine = TineVN()
        ne = NEMetrics()
        ed = Edistance(verbose = 0)
        for pair in pairs:
            id = pair.get_id()
            value = pair.get_value()
            lemmas_text = pair.get_feature_text('lemmas')
            lemmas_hypo = pair.get_feature_hypo('lemmas')
            frames_text = pair.get_feature_text('frames')
            frames_hypo = pair.get_feature_hypo('frames')
            ne_text = pair.get_feature_text('ne')
            ne_hypo = pair.get_feature_hypo('ne')
            pos_text = pair.get_feature_text('pos')
            pos_hypo = pair.get_feature_hypo('pos')
            chunk_text = pair.get_feature_text('chunks')
            chunk_hypo = pair.get_feature_hypo('chunks')

            tokens_text = []
            tokens_hypo = []
            if options.sim_type == 'WordNet':
                tokens_text = toWN(lemmas_text, pos_text)
                tokens_hypo = toWN(lemmas_hypo, pos_hypo)
            elif options.sim_type == 'Lin':
                tokens_text = lemmas_text
                tokens_hypo = lemmas_hypo
            
            metrics.set_text(lemmas_text)
            metrics.set_hypo(lemmas_hypo)
            ne.set_pairs_text(ne_text)
            ne.set_pairs_hypo(ne_hypo)
            ne_score = ne.get_score_lin()
            cos = metrics.cosine()
            rec = metrics.get_recall()
            prec = metrics.get_precision()
            f1 = metrics.get_f1()
            bleu = metrics.bleu()
            meteor_t.append((id, lemmas_text))
            meteor_h.append((id, lemmas_hypo))
            
            tine_score = tine.get_tine_score(frames_text, tokens_text, frames_hypo, tokens_hypo)
            
            ed_score = ed.get_edistance_micai(frames_text, tokens_text, chunk_text, frames_hypo, tokens_hypo, chunk_hypo, entailment = value)
            
            feature_values.setdefault(id, {})
            feature_values[id]['cos'] = cos
            feature_values[id]['f1'] = f1
            feature_values[id]['rec'] = rec
            feature_values[id]['prec'] = prec
            feature_values[id]['bleu'] = bleu
            feature_values[id]['value'] = value
            feature_values[id]['tine'] = tine_score
            feature_values[id]['ne'] = ne_score
            feature_values[id]['ed'] = ed_score
            #if tine_score == 0:
            #   i += 1

        
        feature_values = addMeteor(feature_values, metrics, meteor_h, meteor_t)
        dumpFeatures(feature_values, options.output_file, options.out_type)