def bi_item(m, tuples, k, func, func_w): # with open('u2U.txt', 'r') as f1: # u2U = pickle.loads(f1.read()) # with open('m2Ucluster.txt', 'r') as f4: # m2Ucluster = pickle.loads(f4.read()) with open('m2M.txt', 'r') as f1: m2M = pickle.loads(f1.read()) with open('u2Mcluster.txt', 'r') as f2: u2Mcluster = pickle.loads(f2.read()) res = [] # return the list of predictions given the query tuples # m = np.asarray(m, order = 'F') # column-major order u2Mcluster = np.asarray(u2Mcluster, order = 'F') simPair = model.similarityPair(u2Mcluster , func) for pair in tuples: #print pair[0] c = findCentroid(m2M, pair[0]) sim = simPair[c] temp = model.knn(sim, k) prediction = model.predict(u2Mcluster, temp, sim, func_w) pred = prediction[pair[1]] + 3 if pred > 5: pred = 5 elif pred < 1: pred = 1 res.append(pred)# plus 3 #print pred return res
def predict(): skills = request.json['skill'] headers = [str(i) for i in skills.split(',')] payload = request.json['data'] values = [float(i) for i in payload.split(',')] df = data_reader() col_list = list(df.columns) df_new = pd.DataFrame(columns=col_list) X = pd.DataFrame([values], columns=headers, dtype=float) X_te = pd.concat([df_new, X], axis=0, sort=False) X_te.fillna(0, inplace=True) X_tr, X_te, X_tr_sc, X_te_sc = preprocess(df, X_te) my_model = knn(X_tr_sc, 5, 0.4) prediction_results = prediction(my_model, X_tr, X_te, X_tr_sc, X_te_sc) prediction_results = prediction_results.loc[:, (prediction_results != 0).any( axis=0)] return json.dumps(json.loads(prediction_results.to_json(orient='index')))
def bi_user(m, tuples, k, func, func_w): with open('u2U.txt', 'r') as f1: u2U = pickle.loads(f1.read()) with open('m2Ucluster.txt', 'r') as f2: m2Ucluster = pickle.loads(f2.read()) m2Ucluster = m2Ucluster.transpose() res = [] # return the list of predictions given the query tuples m2Ucluster = np.asarray(m2Ucluster, order = 'F') simPair = model.similarityPair(m2Ucluster , func) for pair in tuples: #print pair[0] c = findCentroid(u2U, pair[1]) sim = simPair[c] temp = model.knn(sim, k) prediction = model.predict(m2Ucluster, temp, sim, func_w) pred = prediction[pair[0]] + 3 if pred > 5: pred = 5 elif pred < 1: pred = 1 res.append(pred)# plus 3 #print pred return res
print("""\n# ============================================================================= # linear model # =============================================================================\n""") m_lin = model.linear_regression(Q) m_lin.train(LM_selected) y_lin = m_lin.evaluate(T_selected)*std_dataset[-1]+mean_dataset[-1] e_lin = m_lin.error()*std_dataset[-1] print('linear error = ',e_lin) print("""\n# ============================================================================= # knn model # =============================================================================\n""") my_knn = model.knn() k_opt,error_array_knn = my_knn.meta_find(LM_selected,vm.default) error_array_knn=error_array_knn*std_dataset[-1] print("k_opt = {}".format(k_opt)) print("error_array = ") print(error_array_knn) my_plt.plt_knn_meta(my_knn,error_array_knn) my_knn.train(LM_selected) y_knn = my_knn.evaluate(T_selected)*std_dataset[-1]+mean_dataset[-1] e_knn = my_knn.error()*std_dataset[-1] print('error = ',e_knn) print("""\n# ============================================================================= # rbfn model
print(data.shape) print(label.shape)''' B = 9840 N = 1024 K = 20 data, _ = load_data('train') print('data type:', type(data)) print('data shape:', data.shape) data = data[:B, :N, :] print('data shape:', data.shape) #(10,1024,3) data = torch.from_numpy(data) data = data.view(B, -1, N) ne_idx = knn(data, K) print('ne_idx shape:', ne_idx.shape) #print('ne_idx:\n', ne_idx) ne_idx = ne_idx.reshape(B * N * K) print('ne_idx:', ne_idx) #device = torch.device('cuda') #center_idx = torch.arange(0, N, device=device).view(1, N, 1).repeat(B, 1, K).reshape(B*N*K) center_idx = torch.arange(0, N).view(1, N, 1).repeat(B, 1, K).reshape(B * N * K) print('center_idx shape:', center_idx.shape) print('center_idx:\n', center_idx) batch_idx = torch.arange(0, B).view(B, 1, 1).repeat(1, N, K).reshape(B * N * K) print('batch_idx shape:', batch_idx.shape)
M = m.toarray() u4321 = M[4321] print u4321.nonzero()[0].size print np.where(u4321 == 1)[0].shape print np.where(u4321 == 3)[0].shape print np.where(u4321 == 5)[0].shape print np.average(u4321[u4321.nonzero()[0].tolist()]) m3 = M[:, 3] print m3.nonzero()[0].size print np.where(m3 == 1)[0].shape print np.where(m3 == 3)[0].shape print np.where(m3 == 5)[0].shape print np.average(m3[m3.nonzero()[0].tolist()]) M = readHelper.readTrain('train.csv') sim = model.similarity(M, 4321, 'dotp') print model.knn(sim, 5) sim = model.similarity(M, 4321, 'cosine') print model.knn(sim, 5) sim = model.similarity(M.transpose(), 3, 'dotp') print model.knn(sim, 5) sim = model.similarity(M.transpose(), 3, 'cosine') print model.knn(sim, 5)
print('SVM classifier') svmmodel = m.svm((xtrain, ytrain), (xtext, ytest)) # Decision Tree print('Decision Tree') dtmodel = m.decisionTree((xtrain, ytrain), (xtext, ytest)) # Random Tree print('Random Tree') rtmodel = m.randomTree((xtrain, ytrain), (xtext, ytest)) #Logistic Classifier # Random Tree print('Logistic classifier') logistimodel = m.logisticClassifer((xtrain, ytrain), (xtext, ytest)) #knn print('KNN') knn = m.knn((xtrain, ytrain), (xtext, ytest)) #adaboosting print('Adaboosting') adaboosting = m.adaboosting((xtrain, ytrain), (xtext, ytest)) #voting print('Voting') classifiers = [('svm', svmmodel), ('dt', dtmodel), ('rt', rtmodel), ('lt', logistimodel), ('knn', knn), ('addboosting', adaboosting)] votingmodel = m.voting((xtrain, ytrain), (xtext, ytest), classifiers)
M = m.toarray() u4321 = M[4321] print u4321.nonzero()[0].size print np.where(u4321 == 1)[0].shape print np.where(u4321 == 3)[0].shape print np.where(u4321 == 5)[0].shape print np.average(u4321[u4321.nonzero()[0].tolist()]) m3 = M[:,3] print m3.nonzero()[0].size print np.where(m3 == 1)[0].shape print np.where(m3 == 3)[0].shape print np.where(m3 == 5)[0].shape print np.average(m3[m3.nonzero()[0].tolist()]) M = readHelper.readTrain('train.csv') sim = model.similarity(M, 4321, 'dotp') print model.knn(sim,5) sim = model.similarity(M, 4321, 'cosine') print model.knn(sim,5) sim = model.similarity(M.transpose(), 3, 'dotp') print model.knn(sim,5) sim = model.similarity(M.transpose(), 3, 'cosine') print model.knn(sim,5)