Beispiel #1
0
def get_feature_avg(svm_in):
    l_svm_data = load_svm_feature(svm_in)
    h_rel_feature = {}
    h_irrel_feature = {}
    rel_cnt = 0
    irrel_cnt = 0

    for data in l_svm_data:
        label = data['score']
        h_feature = data['feature']
        for key, score in h_feature.items():
            if score < -20:
                score = 0
            else:
                score = math.exp(score)
            h_feature[key] = score
        if label > 0:
            h_rel_feature = add_svm_feature(h_rel_feature, h_feature)
            rel_cnt += 1
        else:
            h_irrel_feature = add_svm_feature(h_irrel_feature, h_feature)
            irrel_cnt += 1

    rel_cnt = float(rel_cnt)
    irrel_cnt = float(irrel_cnt)
    for key in h_rel_feature:
        h_rel_feature[key] /= rel_cnt
    for key in h_irrel_feature:
        h_irrel_feature[key] /= irrel_cnt
    return h_rel_feature, h_irrel_feature
Beispiel #2
0
 def _load_svm_ltr_feature(self):
     logging.info('loading svm ltr feature [%s]', self.ltr_f_in)
     l_svm_data = load_svm_feature(self.ltr_f_in)
     for svm_data in l_svm_data:
         qid, docno, h_feature = svm_data['qid'], svm_data[
             'comment'], svm_data['feature']
         self.h_qid_docno_ltr_feature[qid + '\t' + docno] = h_feature
     logging.info('loaded [%d] pairs of pre extracted ltr feature',
                  len(self.h_qid_docno_ltr_feature))
Beispiel #3
0
def load_multiple_svm_and_feature(svm_files_in):
    l_name_fields = open(svm_files_in).read().splitlines()
    l_names = [line.split('\t')[0] for line in l_name_fields]
    # l_fields = [line.split('\t')[1] for line in l_name_fields]
    ll_svm_data = [load_svm_feature(name) for name in l_names]
    l_h_feature_name = [
        json.load(open(name + '_name.json')) for name in l_names
    ]
    l_new_h_feature_name = []
    for h_feature_name in l_h_feature_name:
        l_new_h_feature_name.append(h_feature_name)
    return ll_svm_data, l_new_h_feature_name
Beispiel #4
0
import sys

if 4 != len(sys.argv):
    print "convert svm feature to csv format"
    print "3 para: svm in + feature name in + out"
    sys.exit(-1)

h_feature_name = json.load(open(sys.argv[2]))

l_feature_name = h_feature_name.items()
l_feature_name.sort(key=lambda item: item[1])
l_feature_name = [item[0] for item in l_feature_name]
head_str = 'qid,docno,label,' + ','.join(l_feature_name)

out = open(sys.argv[3], 'w')
print >> out, head_str

l_svm_data = load_svm_feature(sys.argv[1])
for svm_data in l_svm_data:
    line = svm_data['qid'] + ',' + svm_data[
        'comment'] + ',%d,' % svm_data['score']
    h_feature = svm_data['feature']
    l_feature = h_feature.items()
    l_feature.sort(key=lambda item: item[0])
    l_feature_value = ['%.4f' % item[1] for item in l_feature]
    line += ','.join(l_feature_value)
    print >> out, line

out.close()
print "finished"
from knowledge4ir.utils import load_svm_feature, dump_svm_feature
import sys

if 3 != len(sys.argv):
    print "svm in + out"
    sys.exit()

l = load_svm_feature(sys.argv[1])
dump_svm_feature(l, sys.argv[2])