def predict(self, title, content): Segmentor.Init() title = self.deal_title(title) content = self.deal_content(content) #return libtrate.TextPredictor.Predict(title, content, self.identifer, self.predictor) title_words = Segmentor.Segment(title, SEG_BASIC) content_words = Segmentor.Segment(content, SEG_BASIC) #return libtrate.TextPredictor.Predict(title_words, content_words, self.identifer, self.predictor #score1 = libtrate.TextPredictor.Predict(title_words, content_words, self.identifer, self.predictor) id_val_map = libtrate.id_map() num_words = self.identifer.size() libtrate.TextPredictor.Prase(title_words, id_val_map, self.identifer, 0) libtrate.TextPredictor.Prase(content_words, id_val_map, self.identifer, num_words) #print id_val_map.size() fe = libtrate.Vector(id_val_map) #print fe.indices.size() score = self.predictor.Predict(fe) #print 'begin dnn predict' #fe = self.normalizer.NormalizeCopy(fe) dnn_score = float(self.dnn_predictor.Predict(fe)) adjusted_dnn_score = self.calibrator.PredictProbability(dnn_score) #print score1, ' ', score2, ' ', score3 #print 'linear: ',score1,' ','dnn: ', score3, 'adjusted: ', self.calibrator.PredictProbability(score3) #return self.predictor.Predict(fe) return (score, dnn_score, adjusted_dnn_score)
def predict(self, title, content): Segmentor.Init() title = self.deal_title(title) content = self.deal_content(content) title_words = Segmentor.Segment(title, SEG_BASIC) content_words = Segmentor.Segment(content, SEG_BASIC) id_val_map = libtrate.id_map() num_words = self.identifer.size() libtrate.TextPredictor.Prase(title_words, id_val_map, self.identifer, 0, ngram=1, skip=0) libtrate.TextPredictor.Prase(content_words, id_val_map, self.identifer, num_words, ngram=1, skip=0) fe = libtrate.Vector(id_val_map) normed_fe = self.normalizer.NormalizeCopy(fe) score = float(self.predictor.Predict(normed_fe)) #adjusted_score = self.calibrator.PredictProbability(score) #return adjusted_score score = 1 - score return score
def predict_debug(self, title, content): id_val_map = libtrate.id_map() title = self.deal_title(title) content = self.deal_content(content) #score = libtrate.TextPredictor.Predict(title, content, self.identifer, self.predictor, id_val_map) title_words = Segmentor.Segment(title, SEG_BASIC) sep = '/' import gezi title_seg_result = sep.join(gezi.vec2list(title_words)) content_words = Segmentor.Segment(content, SEG_BASIC) content_seg_result = sep.join(gezi.vec2list(content_words)) score = libtrate.TextPredictor.Predict(title_words, content_words, self.identifer, self.predictor, id_val_map) key_val_map, total = self.id2key_map(id_val_map) import cStringIO map_info = cStringIO.StringIO() map_info.write('Per ngram weight sort by spam prob\n\n') sorted_items = sorted(key_val_map.items(), key=lambda x: x[1], reverse=True) for key, val in sorted_items: map_info.write('{}|{:40f}\n'.format(key.replace('\x01', '/'), val)) map_info.write('\n\nPer ngram weight sort by impportance\n\n') sorted_items = sorted(key_val_map.items(), key=lambda x: abs(x[1]), reverse=True) for key, val in sorted_items: map_info.write('{}|{:40f}\n'.format(key.replace('\x01', '/'), val)) class DebugInfo(object): title_seg_result = '' content_seg_result = '' debug_info = DebugInfo() debug_info.title_seg_result = title_seg_result debug_info.content_seg_result = content_seg_result debug_info.map_info = map_info.getvalue() debug_info.title = title debug_info.content = content debug_info.total = total debug_info.output = self.predictor.Output(id_val_map) debug_info.score = self.predictor.Predict(id_val_map) return score, debug_info
#info.content = '' content = deal_content(info.content) print content from libsegment import * from libsegment import LogHelper LogHelper.set_level(4) Segmentor.Init() title_words = Segmentor.Segment(title, SEG_BASIC) print '\x01'.join(title_words) content_words = Segmentor.Segment(content, SEG_BASIC) print '\x01'.join(content_words) id_val_map = libtrate.id_map() num_words = identifer.size() #for i in range(title_words.size()): # if title_words[i] == '害人': # title_words[i] = ' ' #title_words.clear() #content_words.clear() #title_words.push_back('害人') #content_words.push_back('害人') libtrate.TextPredictor.Prase(title_words, id_val_map, identifer, 0, ngram = 3, skip = 2) libtrate.TextPredictor.Prase(content_words, id_val_map, identifer, num_words, ngram = 3, skip = 2) #libtrate.TextPredictor.Prase(title_words, id_val_map, identifer, 0) #libtrate.TextPredictor.Prase(content_words, id_val_map, identifer, num_words) print id_val_map.size() fe = libtrate.Vector(id_val_map)