Exemple #1
0
 def predict(self, title, content):
     Segmentor.Init()
     title = self.deal_title(title)
     content = self.deal_content(content)
     #return libtrate.TextPredictor.Predict(title, content, self.identifer, self.predictor)
     title_words = Segmentor.Segment(title, SEG_BASIC)
     content_words = Segmentor.Segment(content, SEG_BASIC)
     #return libtrate.TextPredictor.Predict(title_words, content_words, self.identifer, self.predictor
     #score1 = libtrate.TextPredictor.Predict(title_words, content_words, self.identifer, self.predictor)
     id_val_map = libtrate.id_map()
     num_words = self.identifer.size()
     libtrate.TextPredictor.Prase(title_words, id_val_map, self.identifer,
                                  0)
     libtrate.TextPredictor.Prase(content_words, id_val_map, self.identifer,
                                  num_words)
     #print id_val_map.size()
     fe = libtrate.Vector(id_val_map)
     #print fe.indices.size()
     score = self.predictor.Predict(fe)
     #print 'begin dnn predict'
     #fe = self.normalizer.NormalizeCopy(fe)
     dnn_score = float(self.dnn_predictor.Predict(fe))
     adjusted_dnn_score = self.calibrator.PredictProbability(dnn_score)
     #print score1, ' ', score2, ' ', score3
     #print 'linear: ',score1,' ','dnn: ', score3, 'adjusted: ', self.calibrator.PredictProbability(score3)
     #return self.predictor.Predict(fe)
     return (score, dnn_score, adjusted_dnn_score)
Exemple #2
0
    def predict(self, title, content):
        Segmentor.Init()
        title = self.deal_title(title)
        content = self.deal_content(content)
        title_words = Segmentor.Segment(title, SEG_BASIC)
        content_words = Segmentor.Segment(content, SEG_BASIC)

        id_val_map = libtrate.id_map()
        num_words = self.identifer.size()
        libtrate.TextPredictor.Prase(title_words,
                                     id_val_map,
                                     self.identifer,
                                     0,
                                     ngram=1,
                                     skip=0)
        libtrate.TextPredictor.Prase(content_words,
                                     id_val_map,
                                     self.identifer,
                                     num_words,
                                     ngram=1,
                                     skip=0)

        fe = libtrate.Vector(id_val_map)
        normed_fe = self.normalizer.NormalizeCopy(fe)

        score = float(self.predictor.Predict(normed_fe))
        #adjusted_score = self.calibrator.PredictProbability(score)

        #return adjusted_score
        score = 1 - score
        return score
Exemple #3
0
    def predict_debug(self, title, content):
        id_val_map = libtrate.id_map()
        title = self.deal_title(title)
        content = self.deal_content(content)
        #score = libtrate.TextPredictor.Predict(title, content, self.identifer, self.predictor, id_val_map)

        title_words = Segmentor.Segment(title, SEG_BASIC)
        sep = '/'
        import gezi
        title_seg_result = sep.join(gezi.vec2list(title_words))
        content_words = Segmentor.Segment(content, SEG_BASIC)
        content_seg_result = sep.join(gezi.vec2list(content_words))
        score = libtrate.TextPredictor.Predict(title_words, content_words,
                                               self.identifer, self.predictor,
                                               id_val_map)

        key_val_map, total = self.id2key_map(id_val_map)
        import cStringIO
        map_info = cStringIO.StringIO()
        map_info.write('Per ngram weight sort by spam prob\n\n')

        sorted_items = sorted(key_val_map.items(),
                              key=lambda x: x[1],
                              reverse=True)
        for key, val in sorted_items:
            map_info.write('{}|{:40f}\n'.format(key.replace('\x01', '/'), val))

        map_info.write('\n\nPer ngram weight sort by impportance\n\n')
        sorted_items = sorted(key_val_map.items(),
                              key=lambda x: abs(x[1]),
                              reverse=True)
        for key, val in sorted_items:
            map_info.write('{}|{:40f}\n'.format(key.replace('\x01', '/'), val))

        class DebugInfo(object):
            title_seg_result = ''
            content_seg_result = ''

        debug_info = DebugInfo()
        debug_info.title_seg_result = title_seg_result
        debug_info.content_seg_result = content_seg_result
        debug_info.map_info = map_info.getvalue()
        debug_info.title = title
        debug_info.content = content

        debug_info.total = total
        debug_info.output = self.predictor.Output(id_val_map)
        debug_info.score = self.predictor.Predict(id_val_map)

        return score, debug_info
Exemple #4
0
#info.content = ''
content = deal_content(info.content)
print content

from libsegment import *
from libsegment import LogHelper

LogHelper.set_level(4)

Segmentor.Init()
title_words = Segmentor.Segment(title, SEG_BASIC)
print '\x01'.join(title_words)
content_words = Segmentor.Segment(content, SEG_BASIC)
print '\x01'.join(content_words)
id_val_map =  libtrate.id_map()
num_words = identifer.size()

#for i in range(title_words.size()):
#	if title_words[i] == '害人':
#		title_words[i] = ' '
#title_words.clear()
#content_words.clear()
#title_words.push_back('害人')
#content_words.push_back('害人')
libtrate.TextPredictor.Prase(title_words, id_val_map, identifer, 0, ngram = 3, skip = 2)
libtrate.TextPredictor.Prase(content_words, id_val_map, identifer, num_words, ngram = 3, skip = 2)
#libtrate.TextPredictor.Prase(title_words, id_val_map, identifer, 0)
#libtrate.TextPredictor.Prase(content_words, id_val_map, identifer, num_words)
print id_val_map.size()
fe = libtrate.Vector(id_val_map)