Example #1
0
import numpy as np
from sklearn import decomposition
import pickle

import config

config.goto_wiki_dataset()

files = ["imfs.bin", "imfs_vfc7.bin"]

n = 500

data1 = pickle.load(open("./work/ddl12_temp/pickle/imfs_fc7.bin"))
data2 = pickle.load(open("./work/ddl12_temp/pickle/imfs_vfc7.bin"))

traindata = np.array(data1['feature'])
testdata = np.array(data2['feature'])

pca = decomposition.PCA(n_components=n)
pca.fit(traindata)
traindata = pca.transform(traindata)
testdata = pca.transform(testdata)

data1['feature'] = traindata
data2['feature'] = testdata

open("./work/ddl12_temp/pickle/imfs_fc7_d%d.bin" % n,
     'w').write(pickle.dumps(data1))

open("./work/ddl12_temp/pickle/imfs_vfc7_d%d.bin" % n,
     'w').write(pickle.dumps(data2))
Example #2
0
import numpy as np
from sklearn import decomposition
import pickle


import config

config.goto_wiki_dataset()

files = [
"imfs.bin",
"imfs_vfc7.bin"
]

n = 500


data1 = pickle.load(open("./work/ddl12_temp/pickle/imfs_fc7.bin"))
data2 = pickle.load(open("./work/ddl12_temp/pickle/imfs_vfc7.bin"))


traindata = np.array(data1['feature'])
testdata = np.array(data2['feature'])

pca = decomposition.PCA(n_components=n)
pca.fit(traindata)
traindata = pca.transform(traindata)
testdata = pca.transform(testdata)

data1['feature'] = traindata
data2['feature'] = testdata
Example #3
0
def run(trainer,
        predictor,
        relat_calc,
        dump_name,
        opt,
        ratio=1.0,
        need_dump=True):
    config.goto_wiki_dataset()

    if opt == 'img':
        traindata = wiki_dataset.traindata
        testdata = wiki_dataset.testdata
    elif opt == 'text':
        traindata = wiki_dataset.traindata2
        testdata = wiki_dataset.testdata2
    elif opt == 'both':
        traindata = np.concatenate(
            (wiki_dataset.traindata2, wiki_dataset.traindata * ratio), axis=1)
        testdata = np.concatenate(
            (wiki_dataset.testdata2, wiki_dataset.testdata * ratio), axis=1)
    else:
        raise Error("error opt")

    testlabel = wiki_dataset.testlabel
    trainlabel = wiki_dataset.trainlabel
    groundtruth = wiki_dataset.groundtruth

    svms = []
    ok = 0
    if need_dump and os.path.exists(dump_name):
        print "load model from dump file %s" % dump_name
        ok = 1
        svms = pickle.load(open(dump_name))

    global result, dbresult
    result = []
    dbresult = []
    for i in range(10):
        result.append([])
        dbresult.append([])

    def runner(i):
        print("learn begin %s" % i)

        if ok:
            clf = svms[i][1]
        else:
            clf = trainer(traindata, trainlabel == (i + 1))
            svms.append((i, clf))

        result[i] = predictor(clf, testdata, testlabel == (i + 1))
        #dbresult[i] = predictor(clf, traindata, trainlabel == (i+1))
        dbresult[i] = (trainlabel == (i + 1))

        print("learn done %s" % i)

    print "training"
    myrunner(runner)

    svms = sorted(svms, key=lambda x: x[0])
    if need_dump:
        s = pickle.dumps(svms)
        open(dump_name, 'w').write(s)

    result = np.array(result)
    dbresult = np.array(dbresult)

    result = np.rollaxis(result, 0, 2)
    dbresult = np.rollaxis(dbresult, 0, 2)

    result.shape = (result.shape[0], ) + (1, ) + (result.shape[1:])
    dbresult.shape = (1, ) + (dbresult.shape[0], ) + (dbresult.shape[1:])

    print "calc relation %s %s" % (str(result.shape), str(dbresult.shape))
    global prediction
    prediction = relat_calc(result, dbresult)

    print "calc mAP"
    global ap
    ap = []

    def runner2(i):
        n = prediction.shape[0]
        m = n / config.num_thread + 1
        l = i * m
        r = min(l + m, n)
        global ap

        for i in range(l, r):
            #print i
            answer = sorted(enumerate(prediction[i]),
                            key=lambda d: d[1],
                            reverse=True)
            apsum = float(0)
            rightsum = 0
            for j in range(prediction.shape[1]):
                if groundtruth[i][answer[j][0]] == 1:
                    rightsum += 1
                    apsum += rightsum * 1.0 / (j + 1)
            ap.append(apsum / sum(groundtruth[i]))

    myrunner(runner2)
    MeanAP = sum(ap) / len(ap)
    print 'MAP = ' + str(MeanAP)
    print len(ap)

    count = 0
    right = 0

    groundtruth = np.array(groundtruth)
    count = groundtruth.size
    right = np.sum(groundtruth == prediction)

    print float(right * 1.0 / count)
    return MeanAP
def run(trainer, predictor, relat_calc, dump_name, opt, ratio = 1.0, need_dump = True):
    config.goto_wiki_dataset()

    if opt == 'img':
        traindata = wiki_dataset.traindata
        testdata = wiki_dataset.testdata
    elif opt == 'text':
        traindata = wiki_dataset.traindata2
        testdata = wiki_dataset.testdata2
    elif opt == 'both':
        traindata = np.concatenate(
            (wiki_dataset.traindata2, wiki_dataset.traindata * ratio),
            axis = 1)
        testdata = np.concatenate(
            (wiki_dataset.testdata2, wiki_dataset.testdata * ratio),
            axis = 1)
    else:
        raise Error("error opt")

    testlabel = wiki_dataset.testlabel
    trainlabel = wiki_dataset.trainlabel
    groundtruth = wiki_dataset.groundtruth

    svms = []
    ok = 0
    if need_dump and os.path.exists(dump_name):
        print "load model from dump file %s" % dump_name
        ok = 1
        svms = pickle.load(open(dump_name))

    global result, dbresult
    result = []
    dbresult = []
    for i in range(10):
        result.append([])
        dbresult.append([])

    def runner(i):
        print("learn begin %s" % i)

        if ok:
            clf = svms[i][1]
        else:
            clf = trainer(traindata, trainlabel == (i+1))
            svms.append((i, clf))

        result[i] = predictor(clf, testdata, testlabel == (i+1))
        #dbresult[i] = predictor(clf, traindata, trainlabel == (i+1))
        dbresult[i] = (trainlabel == (i+1))

        print("learn done %s" % i)

    print "training"
    myrunner(runner)

    svms = sorted(svms, key=lambda x:x[0])
    if need_dump:
        s = pickle.dumps(svms)
        open(dump_name, 'w').write(s)

    result = np.array(result)
    dbresult = np.array(dbresult)

    result = np.rollaxis(result, 0, 2)
    dbresult = np.rollaxis(dbresult, 0, 2)

    result.shape = (result.shape[0],) + (1,) + (result.shape[1:])
    dbresult.shape = (1,) + (dbresult.shape[0],) + (dbresult.shape[1:])

    print "calc relation %s %s" % (str(result.shape), str(dbresult.shape))
    global prediction
    prediction = relat_calc(result, dbresult)


    print "calc mAP"
    global ap
    ap = []

    def runner2(i):
        n = prediction.shape[0]
        m = n / config.num_thread +1
        l = i*m
        r = min(l+m, n)
        global ap

        for i in range(l,r):
            #print i
            answer = sorted(enumerate(prediction[i]), key=lambda d:d[1], reverse=True)
            apsum = float(0)
            rightsum = 0
            for j in range(prediction.shape[1]):
                if groundtruth[i][answer[j][0]] == 1:
                    rightsum += 1
                    apsum += rightsum * 1.0 / (j + 1)
            ap.append(apsum / sum(groundtruth[i]))


    myrunner(runner2)
    MeanAP = sum(ap) / len(ap)
    print 'MAP = ' + str(MeanAP)
    print len(ap)

    count = 0
    right = 0

    groundtruth = np.array(groundtruth)
    count = groundtruth.size
    right = np.sum(groundtruth == prediction)

    print float(right * 1.0 / count)
    return MeanAP