예제 #1
0
 def baseline(self, id_bo):
     self.baseline_predicates = []
     #TODO syntax and word based predicates
     if id_bo in self.pairs:            
         value = self.pairs[id_bo].get_value()
         lemmas_text = self.pairs[id_bo].get_feature_text('lemmas')
         lemmas_hypo = self.pairs[id_bo].get_feature_hypo('lemmas')
         metric = SetMetrics(text = lemmas_text, hypo = lemmas_hypo)
         isec = metric.get_isec()
         bo = '>overlap\n%s %s'%(id_bo, isec)
         self.baseline_predicates.append(bo)
     return self.baseline_predicates
예제 #2
0
 def baseline(self, id_bo):
     self.baseline_predicates = []
     #TODO syntax and word based predicates
     if id_bo in self.pairs:            
         value = self.pairs[id_bo].get_value()
         lemmas_text = self.pairs[id_bo].get_feature_text('lemmas')
         lemmas_hypo = self.pairs[id_bo].get_feature_hypo('lemmas')
         metric = SetMetrics(text = lemmas_text, hypo = lemmas_hypo)
         isec = metric.get_isec()
         bo = 'Overlap(%s, %s)'%(id_bo, isec)
         self.baseline_predicates.append(bo)
     return self.baseline_predicates
예제 #3
0
 def backoff(self, id_bo):
     self.backoff_predicates = []
     #TODO syntax and word based predicates
     #TODO backoff with cosine metric instead of intersection
     if id_bo in self.pairs:            
         value = self.pairs[id_bo].get_value()
         lemmas_text = self.pairs[id_bo].get_feature_text('lemmas')
         lemmas_hypo = self.pairs[id_bo].get_feature_hypo('lemmas')
         metric = SetMetrics(text = lemmas_text, hypo = lemmas_hypo)
         isec = metric.get_isec()
         bo = '>overlap\n%s %s'%(id_bo, isec)
         self.backoff_predicates.append(bo)
     return self.backoff_predicates
예제 #4
0
 def backoff(self, id_bo):
     self.backoff_predicates = []
     #TODO syntax and word based predicates
     #TODO backoff with cosine metric instead of intersection
     if id_bo in self.pairs:
         value = self.pairs[id_bo].get_value()
         lemmas_text = self.pairs[id_bo].get_feature_text('lemmas')
         lemmas_hypo = self.pairs[id_bo].get_feature_hypo('lemmas')
         metric = SetMetrics(text=lemmas_text, hypo=lemmas_hypo)
         isec = metric.get_isec()
         bo = '>overlap\n%s %s' % (id_bo, isec)
         self.backoff_predicates.append(bo)
     return self.backoff_predicates
예제 #5
0
def loadData(file_data, subset):
    with open(file_data, 'r') as pf1:
        tmp_pairs = pickle.load(pf1)
        metric = SetMetrics()
        X = []
        Y = []
        for pair in tmp_pairs:
            id = pair.get_id()
            if id in subset:
                value = pair.get_value()
                lemmas_text = pair.get_feature_text('lemmas')
                lemmas_hypo = pair.get_feature_hypo('lemmas')
                metric.set_text(lemmas_text)
                metric.set_hypo(lemmas_hypo)
                isec = metric.get_isec()

                X.append([isec])
                if value == 'TRUE':
                    Y.append(1)
                else:
                    Y.append(0)
    return (X, Y)
예제 #6
0
def load_file(file):
    isec_t = []
    values = {}
    #C = 1.0 
    
    with open(file, 'r') as pf1:
            tmp_pairs = pickle.load(pf1)
            metric = SetMetrics()
            X = []
            Y = []
            for pair in tmp_pairs:
                id = pair.get_id()
                value = pair.get_value()
                lemmas_text = pair.get_feature_text('lemmas')
                lemmas_hypo = pair.get_feature_hypo('lemmas')
                metric.set_text(lemmas_text)
                metric.set_hypo(lemmas_hypo)
                isec = metric.get_isec()
                isec_t.append(isec)
                if isec in values:
                    values[isec] += 1
                else:
                    values[isec] = 1
                X.append([isec])
                if value == 'TRUE':
                    Y.append(1)
                else:
                    Y.append(0)

            #svc = svm.SVC(kernel='linear', C=C).fit(X, Y)
            #clf = svm.SVC(kernel='linear')
            #clf.fit(X, Y)
            # get the separating hyperplane
            #print 'Weights asigned to the features: ', clf.coef_
            #print 'Constants in decision function: ', clf.intercept_
            #print 'number of support vector for each class: ', clf.n_support_


    return (isec_t, values, X, Y)
예제 #7
0
    def baseline(self, id_bo):
        self.baseline_predicates = []
        #TODO syntax and word based predicates
        if id_bo in self.pairs:
            value = self.pairs[id_bo].get_value()
            pos_text = self.pairs[id_bo].get_feature_text('pos')
            pos_hypo = self.pairs[id_bo].get_feature_hypo('pos')
            pos_text.extend(pos_hypo)
            for word, pos in pos_text:
                lemma_predicate = '>token_back_lemma\n%s "%s"'%(id_bo, self.clean_str(word))
                pos_predicate = '>toke_back_pos\n%s "%s"'%(id_bo, self.clean_str(pos))
                self.baseline_predicates.append(lemma_predicate)
                self.baseline_predicates.append(pos_predicate)
            
            lemmas_text = self.pairs[id_bo].get_feature_text('lemmas')
            lemmas_hypo = self.pairs[id_bo].get_feature_hypo('lemmas')
            metric = SetMetrics(text = lemmas_text, hypo = lemmas_hypo)
            isec = metric.get_isec()
            bo = '>overlap\n%s %s'%(id_bo, isec)
            self.baseline_predicates.append(bo)

        return self.baseline_predicates
예제 #8
0
    def baseline(self, id_bo):
        self.baseline_predicates = []
        #TODO syntax and word based predicates
        if id_bo in self.pairs:
            value = self.pairs[id_bo].get_value()
            pos_text = self.pairs[id_bo].get_feature_text('pos')
            pos_hypo = self.pairs[id_bo].get_feature_hypo('pos')
            pos_text.extend(pos_hypo)
            for word, pos in pos_text:
                lemma_predicate = 'TokenBack(%s, "%s")'%(id_bo, self.clean_str(word))
                pos_predicate = 'TokenBack(%s, "%s")'%(id_bo, self.clean_str(pos))
                self.baseline_predicates.append(lemma_predicate)
                self.baseline_predicates.append(pos_predicate)
            
            lemmas_text = self.pairs[id_bo].get_feature_text('lemmas')
            lemmas_hypo = self.pairs[id_bo].get_feature_hypo('lemmas')
            metric = SetMetrics(text = lemmas_text, hypo = lemmas_hypo)
            isec = metric.get_isec()
            bo = 'Overlap(%s, %s)'%(id_bo, isec)
            self.baseline_predicates.append(bo)

        return self.baseline_predicates
예제 #9
0
def loadData(file_data, subset):
    with open(file_data, 'r') as pf1:
            tmp_pairs = pickle.load(pf1)
            metric = SetMetrics()
            X = []
            Y = []
            for pair in tmp_pairs:
                id = pair.get_id()
                if id in subset:
                    value = pair.get_value()
                    lemmas_text = pair.get_feature_text('lemmas')
                    lemmas_hypo = pair.get_feature_hypo('lemmas')
                    metric.set_text(lemmas_text)
                    metric.set_hypo(lemmas_hypo)
                    isec = metric.get_isec()
                
                    X.append([isec])
                    if value == 'TRUE':
                        Y.append(1)
                    else:
                        Y.append(0)
    return (X, Y)