Exemple #1
0
    def predict(self, title, content):
        Segmentor.Init()
        title = self.deal_title(title)
        content = self.deal_content(content)
        title_words = Segmentor.Segment(title, SEG_BASIC)
        content_words = Segmentor.Segment(content, SEG_BASIC)

        id_val_map = libtrate.id_map()
        num_words = self.identifer.size()
        libtrate.TextPredictor.Prase(title_words,
                                     id_val_map,
                                     self.identifer,
                                     0,
                                     ngram=1,
                                     skip=0)
        libtrate.TextPredictor.Prase(content_words,
                                     id_val_map,
                                     self.identifer,
                                     num_words,
                                     ngram=1,
                                     skip=0)

        fe = libtrate.Vector(id_val_map)
        normed_fe = self.normalizer.NormalizeCopy(fe)

        score = float(self.predictor.Predict(normed_fe))
        #adjusted_score = self.calibrator.PredictProbability(score)

        #return adjusted_score
        score = 1 - score
        return score
Exemple #2
0
 def predict(self, title, content):
     Segmentor.Init()
     title = self.deal_title(title)
     content = self.deal_content(content)
     #return libtrate.TextPredictor.Predict(title, content, self.identifer, self.predictor)
     title_words = Segmentor.Segment(title, SEG_BASIC)
     content_words = Segmentor.Segment(content, SEG_BASIC)
     #return libtrate.TextPredictor.Predict(title_words, content_words, self.identifer, self.predictor
     #score1 = libtrate.TextPredictor.Predict(title_words, content_words, self.identifer, self.predictor)
     id_val_map = libtrate.id_map()
     num_words = self.identifer.size()
     libtrate.TextPredictor.Prase(title_words, id_val_map, self.identifer,
                                  0)
     libtrate.TextPredictor.Prase(content_words, id_val_map, self.identifer,
                                  num_words)
     #print id_val_map.size()
     fe = libtrate.Vector(id_val_map)
     #print fe.indices.size()
     score = self.predictor.Predict(fe)
     #print 'begin dnn predict'
     #fe = self.normalizer.NormalizeCopy(fe)
     dnn_score = float(self.dnn_predictor.Predict(fe))
     adjusted_dnn_score = self.calibrator.PredictProbability(dnn_score)
     #print score1, ' ', score2, ' ', score3
     #print 'linear: ',score1,' ','dnn: ', score3, 'adjusted: ', self.calibrator.PredictProbability(score3)
     #return self.predictor.Predict(fe)
     return (score, dnn_score, adjusted_dnn_score)
Exemple #3
0
def predict(classifer, file):
    for line in open(file):
        l = line.strip().split()
        label = l[1]
        feature_str = '\t'.join(l[3:])
        #print label, ' # ' , feature_str
        fe = libtrate.Vector(feature_str)
        #print fe.indices.size()
        score = classifer.Predict(fe)
        print label, ' ', score
Exemple #4
0
    def id2key_map(self, id_val_map):
        fe = libtrate.Vector(id_val_map)
        self.predictor.GetNormalizer().Normalize(fe)
        key_val_map = collections.defaultdict(float)
        weights = self.predictor.weights()
        total = 0
        for i in xrange(fe.indices.size()):
            index = fe.indices[i]
            val = fe.values[i]

            if weights[index] == 0:
                continue

            key = ''
            if index < self.identifer.size():
                key = 't:' + self.identifer.key(index)
            else:
                key = 'c:' + self.identifer.key(index - self.identifer.size())

            key_val_map[key] += weights[index]

            total += val * weights[index]
        total += self.predictor.bias()
        return key_val_map, total
Exemple #5
0
id_val_map =  libtrate.id_map()
num_words = identifer.size()

#for i in range(title_words.size()):
#	if title_words[i] == '害人':
#		title_words[i] = ' '
#title_words.clear()
#content_words.clear()
#title_words.push_back('害人')
#content_words.push_back('害人')
libtrate.TextPredictor.Prase(title_words, id_val_map, identifer, 0, ngram = 3, skip = 2)
libtrate.TextPredictor.Prase(content_words, id_val_map, identifer, num_words, ngram = 3, skip = 2)
#libtrate.TextPredictor.Prase(title_words, id_val_map, identifer, 0)
#libtrate.TextPredictor.Prase(content_words, id_val_map, identifer, num_words)
print id_val_map.size()
fe = libtrate.Vector(id_val_map)
print fe.str()
#fe2 = fe
#print 'score3:', lpredictor.Predict(fe2)
#print 'fe2: ', fe2.str()
#print fe.str()
#@FIXME why wrong core.....??
#fe = normalizer.NormalizeCopy(fe)
fe = lpredictor.GetNormalizer().NormalizeCopy(fe)
print fe.str() 

score = float(bc.Predict(fe))
print score
print 'score:{}, adjusted_score:{}'.format(score, calibrator.PredictProbability(score))
print 'score2:', libtrate.TextPredictor.Predict(title_words, content_words, identifer, lpredictor)