def predict(self, title, content): Segmentor.Init() title = self.deal_title(title) content = self.deal_content(content) title_words = Segmentor.Segment(title, SEG_BASIC) content_words = Segmentor.Segment(content, SEG_BASIC) id_val_map = libtrate.id_map() num_words = self.identifer.size() libtrate.TextPredictor.Prase(title_words, id_val_map, self.identifer, 0, ngram=1, skip=0) libtrate.TextPredictor.Prase(content_words, id_val_map, self.identifer, num_words, ngram=1, skip=0) fe = libtrate.Vector(id_val_map) normed_fe = self.normalizer.NormalizeCopy(fe) score = float(self.predictor.Predict(normed_fe)) #adjusted_score = self.calibrator.PredictProbability(score) #return adjusted_score score = 1 - score return score
def predict(self, title, content): Segmentor.Init() title = self.deal_title(title) content = self.deal_content(content) #return libtrate.TextPredictor.Predict(title, content, self.identifer, self.predictor) title_words = Segmentor.Segment(title, SEG_BASIC) content_words = Segmentor.Segment(content, SEG_BASIC) #return libtrate.TextPredictor.Predict(title_words, content_words, self.identifer, self.predictor #score1 = libtrate.TextPredictor.Predict(title_words, content_words, self.identifer, self.predictor) id_val_map = libtrate.id_map() num_words = self.identifer.size() libtrate.TextPredictor.Prase(title_words, id_val_map, self.identifer, 0) libtrate.TextPredictor.Prase(content_words, id_val_map, self.identifer, num_words) #print id_val_map.size() fe = libtrate.Vector(id_val_map) #print fe.indices.size() score = self.predictor.Predict(fe) #print 'begin dnn predict' #fe = self.normalizer.NormalizeCopy(fe) dnn_score = float(self.dnn_predictor.Predict(fe)) adjusted_dnn_score = self.calibrator.PredictProbability(dnn_score) #print score1, ' ', score2, ' ', score3 #print 'linear: ',score1,' ','dnn: ', score3, 'adjusted: ', self.calibrator.PredictProbability(score3) #return self.predictor.Predict(fe) return (score, dnn_score, adjusted_dnn_score)
def predict(classifer, file): for line in open(file): l = line.strip().split() label = l[1] feature_str = '\t'.join(l[3:]) #print label, ' # ' , feature_str fe = libtrate.Vector(feature_str) #print fe.indices.size() score = classifer.Predict(fe) print label, ' ', score
def id2key_map(self, id_val_map): fe = libtrate.Vector(id_val_map) self.predictor.GetNormalizer().Normalize(fe) key_val_map = collections.defaultdict(float) weights = self.predictor.weights() total = 0 for i in xrange(fe.indices.size()): index = fe.indices[i] val = fe.values[i] if weights[index] == 0: continue key = '' if index < self.identifer.size(): key = 't:' + self.identifer.key(index) else: key = 'c:' + self.identifer.key(index - self.identifer.size()) key_val_map[key] += weights[index] total += val * weights[index] total += self.predictor.bias() return key_val_map, total
id_val_map = libtrate.id_map() num_words = identifer.size() #for i in range(title_words.size()): # if title_words[i] == '害人': # title_words[i] = ' ' #title_words.clear() #content_words.clear() #title_words.push_back('害人') #content_words.push_back('害人') libtrate.TextPredictor.Prase(title_words, id_val_map, identifer, 0, ngram = 3, skip = 2) libtrate.TextPredictor.Prase(content_words, id_val_map, identifer, num_words, ngram = 3, skip = 2) #libtrate.TextPredictor.Prase(title_words, id_val_map, identifer, 0) #libtrate.TextPredictor.Prase(content_words, id_val_map, identifer, num_words) print id_val_map.size() fe = libtrate.Vector(id_val_map) print fe.str() #fe2 = fe #print 'score3:', lpredictor.Predict(fe2) #print 'fe2: ', fe2.str() #print fe.str() #@FIXME why wrong core.....?? #fe = normalizer.NormalizeCopy(fe) fe = lpredictor.GetNormalizer().NormalizeCopy(fe) print fe.str() score = float(bc.Predict(fe)) print score print 'score:{}, adjusted_score:{}'.format(score, calibrator.PredictProbability(score)) print 'score2:', libtrate.TextPredictor.Predict(title_words, content_words, identifer, lpredictor)