def baseline(self, id_bo): self.baseline_predicates = [] #TODO syntax and word based predicates if id_bo in self.pairs: value = self.pairs[id_bo].get_value() lemmas_text = self.pairs[id_bo].get_feature_text('lemmas') lemmas_hypo = self.pairs[id_bo].get_feature_hypo('lemmas') metric = SetMetrics(text = lemmas_text, hypo = lemmas_hypo) isec = metric.get_isec() bo = '>overlap\n%s %s'%(id_bo, isec) self.baseline_predicates.append(bo) return self.baseline_predicates
def baseline(self, id_bo): self.baseline_predicates = [] #TODO syntax and word based predicates if id_bo in self.pairs: value = self.pairs[id_bo].get_value() lemmas_text = self.pairs[id_bo].get_feature_text('lemmas') lemmas_hypo = self.pairs[id_bo].get_feature_hypo('lemmas') metric = SetMetrics(text = lemmas_text, hypo = lemmas_hypo) isec = metric.get_isec() bo = 'Overlap(%s, %s)'%(id_bo, isec) self.baseline_predicates.append(bo) return self.baseline_predicates
def backoff(self, id_bo): self.backoff_predicates = [] #TODO syntax and word based predicates #TODO backoff with cosine metric instead of intersection if id_bo in self.pairs: value = self.pairs[id_bo].get_value() lemmas_text = self.pairs[id_bo].get_feature_text('lemmas') lemmas_hypo = self.pairs[id_bo].get_feature_hypo('lemmas') metric = SetMetrics(text = lemmas_text, hypo = lemmas_hypo) isec = metric.get_isec() bo = '>overlap\n%s %s'%(id_bo, isec) self.backoff_predicates.append(bo) return self.backoff_predicates
def backoff(self, id_bo): self.backoff_predicates = [] #TODO syntax and word based predicates #TODO backoff with cosine metric instead of intersection if id_bo in self.pairs: value = self.pairs[id_bo].get_value() lemmas_text = self.pairs[id_bo].get_feature_text('lemmas') lemmas_hypo = self.pairs[id_bo].get_feature_hypo('lemmas') metric = SetMetrics(text=lemmas_text, hypo=lemmas_hypo) isec = metric.get_isec() bo = '>overlap\n%s %s' % (id_bo, isec) self.backoff_predicates.append(bo) return self.backoff_predicates
def loadData(file_data, subset): with open(file_data, 'r') as pf1: tmp_pairs = pickle.load(pf1) metric = SetMetrics() X = [] Y = [] for pair in tmp_pairs: id = pair.get_id() if id in subset: value = pair.get_value() lemmas_text = pair.get_feature_text('lemmas') lemmas_hypo = pair.get_feature_hypo('lemmas') metric.set_text(lemmas_text) metric.set_hypo(lemmas_hypo) isec = metric.get_isec() X.append([isec]) if value == 'TRUE': Y.append(1) else: Y.append(0) return (X, Y)
def load_file(file): isec_t = [] values = {} #C = 1.0 with open(file, 'r') as pf1: tmp_pairs = pickle.load(pf1) metric = SetMetrics() X = [] Y = [] for pair in tmp_pairs: id = pair.get_id() value = pair.get_value() lemmas_text = pair.get_feature_text('lemmas') lemmas_hypo = pair.get_feature_hypo('lemmas') metric.set_text(lemmas_text) metric.set_hypo(lemmas_hypo) isec = metric.get_isec() isec_t.append(isec) if isec in values: values[isec] += 1 else: values[isec] = 1 X.append([isec]) if value == 'TRUE': Y.append(1) else: Y.append(0) #svc = svm.SVC(kernel='linear', C=C).fit(X, Y) #clf = svm.SVC(kernel='linear') #clf.fit(X, Y) # get the separating hyperplane #print 'Weights asigned to the features: ', clf.coef_ #print 'Constants in decision function: ', clf.intercept_ #print 'number of support vector for each class: ', clf.n_support_ return (isec_t, values, X, Y)
def baseline(self, id_bo): self.baseline_predicates = [] #TODO syntax and word based predicates if id_bo in self.pairs: value = self.pairs[id_bo].get_value() pos_text = self.pairs[id_bo].get_feature_text('pos') pos_hypo = self.pairs[id_bo].get_feature_hypo('pos') pos_text.extend(pos_hypo) for word, pos in pos_text: lemma_predicate = '>token_back_lemma\n%s "%s"'%(id_bo, self.clean_str(word)) pos_predicate = '>toke_back_pos\n%s "%s"'%(id_bo, self.clean_str(pos)) self.baseline_predicates.append(lemma_predicate) self.baseline_predicates.append(pos_predicate) lemmas_text = self.pairs[id_bo].get_feature_text('lemmas') lemmas_hypo = self.pairs[id_bo].get_feature_hypo('lemmas') metric = SetMetrics(text = lemmas_text, hypo = lemmas_hypo) isec = metric.get_isec() bo = '>overlap\n%s %s'%(id_bo, isec) self.baseline_predicates.append(bo) return self.baseline_predicates
def baseline(self, id_bo): self.baseline_predicates = [] #TODO syntax and word based predicates if id_bo in self.pairs: value = self.pairs[id_bo].get_value() pos_text = self.pairs[id_bo].get_feature_text('pos') pos_hypo = self.pairs[id_bo].get_feature_hypo('pos') pos_text.extend(pos_hypo) for word, pos in pos_text: lemma_predicate = 'TokenBack(%s, "%s")'%(id_bo, self.clean_str(word)) pos_predicate = 'TokenBack(%s, "%s")'%(id_bo, self.clean_str(pos)) self.baseline_predicates.append(lemma_predicate) self.baseline_predicates.append(pos_predicate) lemmas_text = self.pairs[id_bo].get_feature_text('lemmas') lemmas_hypo = self.pairs[id_bo].get_feature_hypo('lemmas') metric = SetMetrics(text = lemmas_text, hypo = lemmas_hypo) isec = metric.get_isec() bo = 'Overlap(%s, %s)'%(id_bo, isec) self.baseline_predicates.append(bo) return self.baseline_predicates