def testing(prefix, classifier): feature, label, name = data.get_matrix('test', False) predict = classifier.predict(feature) res_file = open(prefix + '.txt', 'w') res_file.write(str(classifier.score(feature, label) * 100.0) + '\n') for i in xrange(4): res_file.write(' '.join(map(str, (np.logical_and(label == i, predict == j).sum() for j in xrange(4)))) + '\n') res_file.write('predict label name\n') for x in zip(predict, label, name): res_file.write(' '.join(map(str, x)) + '\n') res_file.close()
def training(model_name): feature, label, _ = data.get_matrix('train', True) classifier = model.get_model(model_name) classifier.fit(feature, label) return classifier
def run(): group, name, index = parse_argv() np_matrix = data.get_matrix(group, name, index) matrix.run_operations_on_matrix(np_matrix)
from sklearn import manifold import torch.nn.functional as F import matplotlib.lines as mlines import pandas as pd import umap import sys #sys.path.append('../dataset') #import graph_similarity_matrix as gsm import data as mdata #dpath1='/Users/iqbal/multiview3d/dataset_3D/clusters_dataset/dist_2.csv' dpath1 = '/Users/iqbal/multiview3d/dataset_3D/123_dataset_new/250/data_mat_1_250.csv' #dpath1='/Users/iqbal/multiview3d/dataset_3D/sq_cir_tr_dataset/350/data_mat_sq_350.csv' D1 = mdata.get_matrix(dpath1) #for i in range(1, 30): # for j in range(1 ) pr = 20 ex = 12 lr = 1 filename = "input_squire_perplexity_" + str(pr) #tsne = manifold.TSNE(n_components=3,perplexity=10.0, early_exaggeration=12.0, learning_rate=200.0, n_iter=1000, n_iter_without_progress=300, min_grad_norm=1e-07, metric='euclidean', init='random', verbose=1, random_state=None, method='barnes_hut', angle=0.5) tsne = manifold.TSNE(n_components=2, perplexity=pr, early_exaggeration=ex, learning_rate=lr, n_iter=1000,
plt.savefig('precision_ml100k.png') plt.show() if __name__ == '__main__': # # ml100k的数据 # nb_user=943 # nb_item=1682 # top_k = 5 # train_set_dict, test_set_dict = read_ml100k('dataset/ml-100k/u1.base', 'dataset/ml-100k/u1.test', sep='\t', header=None) # train_set, test_set = get_matrix(train_set_dict, test_set_dict, nb_user=nb_user, nb_item=nb_item) # train_CFGAN(train_set, nb_item, epoches=300, batch_size=32, nb_zr=128, nb_pm=128, alpha=0.1, test_set_dict=test_set_dict, top_k=top_k) # ml1m的数据,超参数与ml100k不一样 nb_user = 6040 nb_item = 3952 top_k = 5 train_set_dict, test_set_dict = read_ml1m('dataset/ml-1m/ratings.dat') train_set, test_set = get_matrix(train_set_dict, test_set_dict, nb_user=nb_user, nb_item=nb_item) train_CFGAN(train_set, nb_item, epoches=2000, batch_size=128, nb_zr=512, nb_pm=512, alpha=1, test_set_dict=test_set_dict, top_k=top_k)
diff = np.sum(np.square(vi - vj)) d = 0 if diff < eps else np.sqrt(diff) cost = cost + np.square(d - W[i][j]) if abs(d - W[i][j]) > eps else cost return cost #dotpath='../dataset/game_of_thrones_consistent.dot' #M, G, nodes_index=gsm.get_similarity_matrix(dotpath) #X=np.zeros((6,2)) #X=np.random.rand(6,2) #D1, D2, D3=data() D1 = mdata.get_matrix('../dataset/dist_1.csv') D2 = mdata.get_matrix('../dataset/dist_2.csv') #D3=mdata.get_matrix('../dataset/dist_2.csv') D3 = np.zeros((len(D1), len(D1))) A = np.random.rand(len(D1) * dim, 1) #B=A.copy() #print(A) pos1 = multiview_autograd(alpha, A, steps, dim, stopping_eps) #pos2=mds_sklearn(alpha,A,steps,dim) pos1 = pos1.reshape(int(len(pos1) / dim), dim) #pos2=pos2.reshape(int(len(ZZ)/dim),dim) fig = plt.figure() ax = plt.axes(projection='3d')
# Updated for python3 # CS 251 Project 6 # # PCA test function # import numpy as np import data import analysis import sys if __name__ == "__main__": if len(sys.argv) < 2: print('Usage: python %s <data file>' % (sys.argv[0])) exit() data = data.Data(sys.argv[1]) pcadata = analysis.pca(data, data.get_headers(), False) print("\nOriginal Data Headers") print(pcadata.get_original_headers()) print("\nOriginal Data") print(data.get_matrix(data.get_headers())) print("\nOriginal Data Means") print(pcadata.get_original_means()) print("\nEigenvalues") print(pcadata.get_eigenvalues()) print("\nEigenvectors") print(pcadata.get_eigenvectors()) print("\nProjected Data") print(pcadata.get_matrix(pcadata.get_headers()))
else: d = np.sqrt(diff) #print("diss",d) if abs(d - M[i][j]) > eps: cost = cost + np.square(d - M[i][j]) #dis[j][i]=dis[i][j] #dis = euclidean_distances(X) #cost1=np.square(dis-M) #cost=np.sum(cost1) return cost #dotpath='../dataset/game_of_thrones_consistent.dot' #M, G, nodes_index=gsm.get_similarity_matrix(dotpath) M = mdata.get_matrix('../dataset/dist_2.csv') M = M[0:29, 0:29] print(M.shape) #X=np.zeros((6,2)) #X=np.random.rand(6,2) #M=data() A = np.random.rand(len(M) * dim, 1) #B=A.copy() #print(A) ZZ = mds_autograd(alpha, A, M, steps, dim) Z = mds_sklearn(alpha, A, M, steps, dim) fig = plt.figure()
if len(sys.argv) != 8: print( "uses: multiview_distance_matrix.py nameofdataset datapath1 datapath2 datapath3 learning_rate maxsteps" ) sys.exit() name_data_set = sys.argv[1] dpath1 = sys.argv[2] dpath2 = sys.argv[3] dpath3 = sys.argv[4] alpha = float(sys.argv[5]) steps = int(sys.argv[6]) outputpath = sys.argv[7] D1 = mdata.get_matrix(dpath1) D2 = mdata.get_matrix(dpath2) D3 = mdata.get_matrix(dpath3) #D1=D1[0:10,0:10] #D2=D2[0:10,0:10] #D3=D3[0:10,0:10] P1 = np.random.rand(4, 1) P2 = np.random.rand(4, 1) P3 = np.random.rand(4, 1) print("number of data points", len(D1)) A = np.random.rand(len(D1) * dim, 1) mview = multiview(D1, D2, D3, dim, eps) pos1, costs, P1, P2, P3 = mview.multiview_mds_projection(