Esempio n. 1
0
def classify(nnuf, X, Y, train_index, test_index):
    svm_X = []
    num_candidates = 0
    for i in train_index:
        nbrs = []
        for xi in X[i]:
            idx, mag, num_c = nnuf.index(xi, detail=True)
            num_candidates += num_c
            nbrs.append(idx)
        svm_X.append(util.bow(nbrs, D_atoms))

    svm = LinearSVC()
    svm.fit(svm_X, Y[train_index])

    svm_X = []
    for i in test_index:
        nbrs = []
        for xi in X[i]:
            idx, mag, num_c = nnuf.index(xi, detail=True)
            num_candidates += num_c
            nbrs.append(idx)
        svm_X.append(util.bow(nbrs, D_atoms))

    Y_pred = svm.predict(svm_X)

    return accuracy_score(Y[test_index], Y_pred), num_candidates
Esempio n. 2
0
storage (e.g. 'mini' or 'half')
output_path
chunk_size (size in bytes, -1 for no chunks)
'''
args = json.loads(sys.argv[1])
storage = name_to_storage(args['storage'])
KMeans_tr_size = 200000
X, Y, X_normal = read_dataset(args['tr_folder_path'], args['dtype'])


pipe = pscgen.Pipeline(100, 12)
pipe.fit(X, Y, args['D_atoms'], args['alpha'], args['beta'], storage)

cl1, cl2, cl3 = [], [], []
for i in xrange(len(X)):
    x = util.bow(pipe.nnu.index(X[i]), args['D_atoms'])
    cl1.append(pipe.svm.predict(x)[0])
    cl2.append(pipe.svm.classes_[classify(x, pipe.svm.coef_,
                                 pipe.svm.intercept_, 13)])
    cl3.append(pipe.classify(X_normal[i]))


print len([i for i, j in zip(cl1, cl3) if i == j])

assert False

X_Kmeans = np.vstack(X)[:KMeans_tr_size]

# Train D using KMeans
D = KMeans(n_clusters=args['D_atoms'], init_size=args['D_atoms']*3)
D.fit(X_Kmeans)
Esempio n. 3
0
#nns
print 'Nearest Neighbor'
for i, N in enumerate(Ns):
    D = KMeans(n_clusters=N, init_size=N*3)
    D.fit(X_tr_Kmeans)
    D = D.cluster_centers_
    D = util.normalize(D)
    D_mean = np.mean(D, axis=0)
    D = D - D_mean

    svm_nns_xs_tr, svm_nns_xs_t = [], []
    for x in X_tr:
        x = util.normalize(x)
        x = x - D_mean
        nbrs = np.argmax(np.abs(np.dot(D, x.T)), axis=0)
        svm_nns_xs_tr.append(util.bow(nbrs, N))

    for x in X_t:
        x = util.normalize(x)
        x = x - D_mean
        nbrs = np.argmax(np.abs(np.dot(D, x.T)), axis=0)
        svm_nns_xs_t.append(util.bow(nbrs, N))

    acc = util.predict_chi2(svm_nns_xs_tr, Y_tr, svm_nns_xs_t, Y_t)
    print N, acc
    nns_dists.append(acc)


D = KMeans(n_clusters=NNU_N, init_size=NNU_N*3)
D.fit(X_tr_Kmeans)
D = D.cluster_centers_