def PHDMF(res_dir, train_user, train_item, valid_user, test_user, R, CNN_X, aSDAE, vocab_size, init_W=None, give_item_weight=True, max_iter=50, lambda_u=1, lambda_v=100, dimension=50, dropout_rate=0.2, emb_dim=200, max_len=300, num_kernel_per_ws=100): # explicit setting a = 1 b = 0 aSDAE_encoder_dimension = 100 user_feature = 47 num_user = R.shape[0] num_item = R.shape[1] PREV_LOSS = 1e-50 if not os.path.exists(res_dir): os.makedirs(res_dir) f1 = open(res_dir + '/state.log', 'w') Train_R_I = train_user[1] Train_R_J = train_item[1] Test_R = test_user[1] Valid_R = valid_user[1] if give_item_weight is True: item_weight = np.array([math.sqrt(len(i)) for i in Train_R_J], dtype=float) item_weight = (float(num_item) / item_weight.sum()) * item_weight else: item_weight = np.ones(num_item, dtype=float) pre_val_eval = 1e10 cnn_module = CNN_module(dimension, vocab_size, dropout_rate, emb_dim, max_len, num_kernel_per_ws, init_W) theta = cnn_module.get_projection_layer(CNN_X) asdae_module = aSDAE_module(aSDAE_encoder_dimension, dimension, num_item, user_feature) alpha = asdae_module.get_middle_layer(R.toarray(), aSDAE.toarray()) #np.random.seed(133) #U = np.random.uniform(size=(num_user, dimension)) U = alpha V = theta endure_count = 5 count = 0 for iteration in xrange(max_iter): loss = 0 tic = time.time() print "%d iteration\t(patience: %d)" % (iteration, count) VV = b * (V.T.dot(V)) + lambda_u * np.eye(dimension) #sub_loss = np.zeros(num_user) for i in xrange(num_user): idx_item = train_user[0][i] V_i = V[idx_item] R_i = Train_R_I[i] A = VV + (a - b) * (V_i.T.dot(V_i)) B = (a * V_i * (np.tile(R_i, (dimension, 1)).T)).sum(0) + lambda_u * alpha[i] U[i] = np.linalg.solve(A, B) #sub_loss[i] = -0.5 * lambda_u * np.dot(U[i], U[i]) #loss = loss + np.sum(sub_loss) asdae_seed = np.random.randint(100000) asdae_history = asdae_module.train(R.toarray(), aSDAE.toarray(), U, asdae_seed) alpha = asdae_module.get_middle_layer(R.toarray(), aSDAE.toarray()) loss = loss - 0.5 * lambda_u * asdae_history.history['loss'][-1] sub_loss = np.zeros(num_item) UU = b * (U.T.dot(U)) for j in xrange(num_item): idx_user = train_item[0][j] U_j = U[idx_user] R_j = Train_R_J[j] tmp_A = UU + (a - b) * (U_j.T.dot(U_j)) A = tmp_A + lambda_v * item_weight[j] * np.eye(dimension) B = (a * U_j * (np.tile(R_j, (dimension, 1)).T) ).sum(0) + lambda_v * item_weight[j] * theta[j] V[j] = np.linalg.solve(A, B) sub_loss[j] = -0.5 * np.square(R_j * a).sum() sub_loss[j] = sub_loss[j] + a * np.sum((U_j.dot(V[j])) * R_j) sub_loss[j] = sub_loss[j] - 0.5 * np.dot(V[j].dot(tmp_A), V[j]) loss = loss + np.sum(sub_loss) seed = np.random.randint(100000) history = cnn_module.train(CNN_X, V, item_weight, seed) theta = cnn_module.get_projection_layer(CNN_X) cnn_loss = history.history['loss'][-1] loss = loss - 0.5 * lambda_v * cnn_loss * num_item tr_eval = eval_RMSE(Train_R_I, U, V, train_user[0]) val_eval = eval_RMSE(Valid_R, U, V, valid_user[0]) te_eval = eval_RMSE(Test_R, U, V, test_user[0]) toc = time.time() elapsed = toc - tic converge = abs((loss - PREV_LOSS) / PREV_LOSS) if (val_eval < pre_val_eval): cnn_module.save_model(res_dir + '/CNN_weights.hdf5') np.savetxt(res_dir + '/U.dat', U) np.savetxt(res_dir + '/V.dat', V) np.savetxt(res_dir + '/theta.dat', theta) np.savetxt(res_dir + '/alpha.dat', alpha) else: count = count + 1 pre_val_eval = val_eval print "Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f" % ( loss, elapsed, converge, tr_eval, val_eval, te_eval) f1.write( "Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f\n" % (loss, elapsed, converge, tr_eval, val_eval, te_eval)) if (count == endure_count): break PREV_LOSS = loss f1.close()
#num_user=100 user_feature = 200 output_dimension_module = 30 first_dimension_module = 100 S = pickl.load(open("/home/daicoolb/Desktop/CopeData/test_0717/side.all", "rb")) R = pickl.load( open("/home/daicoolb/Desktop/CopeData/test_0717/ratings.all", "rb")) #np.random.seed(112) #SDAE=np.random.uniform(size=(num_user,num_item)) #np.savetxt('./test/sdae.dat',SDAE) #np.random.seed(112) #Train_user=np.random.uniform(size=(num_user,user_feature)) #np.savetxt('./test/train_user.dat',Train_user) aSDAE_out = aSDAE_module(first_dimension_module, output_dimension_module, R.shape[1], S.shape[1]) feature_1 = np.random.uniform(size=(R.shape[0], output_dimension_module)) #np.savetxt('./test/feature_input.dat',feature_1) history = aSDAE_out.train(R.toarray(), S.toarray(), feature_1, 112) module_output = aSDAE_out.get_middle_layer(R.toarray(), S.toarray()) print("done") np.savetxt("./test/sdae_output.dat", module_output[0]) np.savetxt("./test/train_user_output.dat", module_output[1]) np.savetxt("./test/feature_output.dat", module_output[2])
#conding=utf-8 from text_analysis.aSDAE import aSDAE_module import numpy as np num_item = 1000 num_user = 100 user_feature = 200 output_dimension_module = 30 first_dimension_module = 100 np.random.seed(112) SDAE = np.random.uniform(size=(num_user, num_item)) np.savetxt('./test/sdae.dat', SDAE) np.random.seed(112) Train_user = np.random.uniform(size=(num_user, user_feature)) np.savetxt('./test/train_user.dat', Train_user) aSDAE_out = aSDAE_module(first_dimension_module, output_dimension_module, num_item, user_feature) feature_1 = np.random.uniform(size=(num_user, output_dimension_module)) np.savetxt('./test/feature_input.dat', feature_1) history = aSDAE_out.train(SDAE, Train_user, feature_1, 112) module_output = aSDAE_out.get_middle_layer(SDAE, Train_user) np.savetxt("./test/sdae_output.dat", module_output[0]) np.savetxt("./test/train_user_output.dat", module_output[1]) np.savetxt("./test/feature_output.dat", module_output[2])