def ConvMF(res_dir, train_user, train_item, valid_user, test_user, R, CNN_X, vocab_size, init_W=None, give_item_weight=True, max_iter=50, lambda_u=1, lambda_v=100, dimension=50, dropout_rate=0.2, emb_dim=200, max_len=300, num_kernel_per_ws=100): # explicit setting a = 1 b = 0 num_user = R.shape[0] num_item = R.shape[1] PREV_LOSS = 1e-50 if not os.path.exists(res_dir): os.makedirs(res_dir) f1 = open(res_dir + '/state.log', 'w') Train_R_I = train_user[1] Train_R_J = train_item[1] Test_R = test_user[1] Valid_R = valid_user[1] if give_item_weight is True: item_weight = np.array([math.sqrt(len(i)) for i in Train_R_J], dtype=float) item_weight = (float(num_item) / item_weight.sum()) * item_weight else: item_weight = np.ones(num_item, dtype=float) pre_val_eval = 1e10 cnn_module = CNN_module(dimension, vocab_size, dropout_rate, emb_dim, max_len, num_kernel_per_ws, init_W) theta = cnn_module.get_projection_layer(CNN_X) np.random.seed(133) U = np.random.uniform(size=(num_user, dimension)) V = theta endure_count = 5 count = 0 for iteration in range(max_iter): loss = 0 tic = time.time() print("%d iteration\t(patience: %d)" % (iteration, count)) VV = b * (V.T.dot(V)) + lambda_u * np.eye(dimension) sub_loss = np.zeros(num_user) for i in range(num_user): idx_item = train_user[0][i] V_i = V[idx_item] R_i = Train_R_I[i] A = VV + (a - b) * (V_i.T.dot(V_i)) B = (a * V_i * (np.tile(R_i, (dimension, 1)).T)).sum(0) U[i] = np.linalg.solve(A, B) sub_loss[i] = -0.5 * lambda_u * np.dot(U[i], U[i]) loss = loss + np.sum(sub_loss) sub_loss = np.zeros(num_item) UU = b * (U.T.dot(U)) for j in range(num_item): idx_user = train_item[0][j] U_j = U[idx_user] R_j = Train_R_J[j] tmp_A = UU + (a - b) * (U_j.T.dot(U_j)) A = tmp_A + lambda_v * item_weight[j] * np.eye(dimension) B = (a * U_j * (np.tile(R_j, (dimension, 1)).T) ).sum(0) + lambda_v * item_weight[j] * theta[j] V[j] = np.linalg.solve(A, B) sub_loss[j] = -0.5 * np.square(R_j * a).sum() sub_loss[j] = sub_loss[j] + a * np.sum((U_j.dot(V[j])) * R_j) sub_loss[j] = sub_loss[j] - 0.5 * np.dot(V[j].dot(tmp_A), V[j]) loss = loss + np.sum(sub_loss) seed = np.random.randint(100000) history = cnn_module.train(CNN_X, V, item_weight, seed) theta = cnn_module.get_projection_layer(CNN_X) cnn_loss = history.history['loss'][-1] loss = loss - 0.5 * lambda_v * cnn_loss * num_item tr_eval = eval_RMSE(Train_R_I, U, V, train_user[0]) val_eval = eval_RMSE(Valid_R, U, V, valid_user[0]) te_eval = eval_RMSE(Test_R, U, V, test_user[0]) toc = time.time() elapsed = toc - tic converge = abs((loss - PREV_LOSS) / PREV_LOSS) if (val_eval < pre_val_eval): cnn_module.save_model(res_dir + '/CNN_weights.hdf5') np.savetxt(res_dir + '/U.dat', U) np.savetxt(res_dir + '/V.dat', V) np.savetxt(res_dir + '/theta.dat', theta) else: count = count + 1 pre_val_eval = val_eval print( "Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f" % (loss, elapsed, converge, tr_eval, val_eval, te_eval)) f1.write( "Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f\n" % (loss, elapsed, converge, tr_eval, val_eval, te_eval)) if (count == endure_count): break PREV_LOSS = loss f1.close()
def ConvMF(res_dir, train_user, train_item, valid_user, test_user, R, CNN_X, vocab_size, init_W=None, give_item_weight=True, max_iter=50, lambda_u=1, lambda_v=100, dimension=50, dropout_rate=0.2, emb_dim=200, max_len=300, num_kernel_per_ws=100): ''' 构造并训练卷积矩阵分解模型 :param res_dir:结果文件路径 :param train_user:训练集用户稀疏评分向量(libSVM format) :param train_item:训练集物品稀疏评分向量(libSVM format) :param valid_user:测试集用户稀疏评分向量(libSVM format) :param test_user:测试集物品稀疏评分向量(libSVM format) :param R:原始评分数据,format: user id::item id::rating :param CNN_X:物品描述词序列 :param vocab_size:词表大小 :param init_W:如果为None则动态训练词向量权重 :param give_item_weight:如果为True则使用静态词向量,否则动态训练词向量 :param max_iter:最大迭代次数 :param lambda_u:用户端正则惩罚项系数 :param lambda_v:用户端正则惩罚项系数 :param dimension:隐变量维度 :param dropout_rate:丢弃率 :param emb_dim:词嵌入维度 :param max_len:物品文本描述序列最大长度 :param num_kernel_per_ws:CNN的卷积核个数 :return:None ''' # explicit setting a = 1 b = 0 num_user = R.shape[0] #6040 num_item = R.shape[1] #3544 PREV_LOSS = 1e-50 if not os.path.exists(res_dir): os.makedirs(res_dir) f1 = open(res_dir + '/state.log', 'w') Train_R_I = train_user[1] #user rating_list Train_R_J = train_item[1] #item rating_list Test_R = test_user[1] Valid_R = valid_user[1] if give_item_weight is True: item_weight = np.array([math.sqrt(len(i)) for i in Train_R_J], dtype=float) item_weight = (float(num_item) / item_weight.sum()) * item_weight else: item_weight = np.ones(num_item, dtype=float) pre_val_eval = 1e10 ## init CNN model cnn_module = CNN_module(dimension, vocab_size, dropout_rate, emb_dim, max_len, num_kernel_per_ws, init_W) theta = cnn_module.get_projection_layer(CNN_X) np.random.seed(133) # user-latent matrix U = np.random.uniform(size=(num_user, dimension)) # item-latent matrix V = theta endure_count = 5 #超出5次则退出迭代训练 count = 0 for iteration in range(max_iter): loss = 0 tic = time.time() print("%d iteration\t(patience: %d)" % (iteration, count)) ##get user-latent matirx loss VV = b * (V.T.dot(V)) + lambda_u * np.eye(dimension) #theta V sub_loss = np.zeros(num_user) for i in range(num_user): idx_item = train_user[0][i] V_i = V[idx_item] R_i = Train_R_I[i] A = VV + (a - b) * (V_i.T.dot(V_i)) B = (a * V_i * (np.tile(R_i, (dimension, 1)).T)).sum(0) U[i] = np.linalg.solve(A, B) sub_loss[i] = -0.5 * lambda_u * np.dot(U[i], U[i]) loss = loss + np.sum(sub_loss) ##get item-latent matirx loss sub_loss = np.zeros(num_item) UU = b * (U.T.dot(U)) for j in range(num_item): idx_user = train_item[0][j] U_j = U[idx_user] R_j = Train_R_J[j] tmp_A = UU + (a - b) * (U_j.T.dot(U_j)) A = tmp_A + lambda_v * item_weight[j] * np.eye(dimension) B = (a * U_j * (np.tile(R_j, (dimension, 1)).T) ).sum(0) + lambda_v * item_weight[j] * theta[j] V[j] = np.linalg.solve(A, B) sub_loss[j] = -0.5 * np.square(R_j * a).sum() sub_loss[j] = sub_loss[j] + a * np.sum((U_j.dot(V[j])) * R_j) sub_loss[j] = sub_loss[j] - 0.5 * np.dot(V[j].dot(tmp_A), V[j]) loss = loss + np.sum(sub_loss) seed = np.random.randint(100000) # get cnn loss history = cnn_module.train(CNN_X, V, item_weight, seed) theta = cnn_module.get_projection_layer(CNN_X) cnn_loss = history.history['loss'][-1] loss = loss - 0.5 * lambda_v * cnn_loss * num_item # get rmse eval tr_eval = eval_RMSE(Train_R_I, U, V, train_user[0]) val_eval = eval_RMSE(Valid_R, U, V, valid_user[0]) te_eval = eval_RMSE(Test_R, U, V, test_user[0]) toc = time.time() elapsed = toc - tic converge = abs((loss - PREV_LOSS) / PREV_LOSS) #save u,v,w weight if (val_eval < pre_val_eval): cnn_module.save_model(res_dir + '/CNN_weights.hdf5') np.savetxt(res_dir + '/U.dat', U) np.savetxt(res_dir + '/V.dat', V) np.savetxt(res_dir + '/theta.dat', theta) else: count = count + 1 pre_val_eval = val_eval print( "Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f" % (loss, elapsed, converge, tr_eval, val_eval, te_eval)) f1.write( "Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f\n" % (loss, elapsed, converge, tr_eval, val_eval, te_eval)) if (count == endure_count): break PREV_LOSS = loss f1.close()
def ConvMF(res_dir, state_log_dir, train_user, train_item, valid_user, test_user, R, CNN_X, vocab_size, init_W=None, give_item_weight=False, max_iter=50, lambda_u=1, lambda_v=100, dimension=50, dropout_rate=0.2, emb_dim=200, max_len=300, num_kernel_per_ws=100): # explicit settinggit a = 1 b = 0.01 alpha = 40 num_user = R.shape[0] num_item = R.shape[1] PREV_LOSS = -1e-50 if not os.path.exists(res_dir): os.makedirs(res_dir) os.chdir(res_dir) # f1 = open(res_dir + '/state.log', 'w') if not os.path.exists(state_log_dir): os.makedirs(state_log_dir) f1 = open(state_log_dir + '/state.log', 'w') # log metrics into tf.summary log_dir_name = os.path.basename(os.path.dirname(state_log_dir + '/')) log_dir = os.path.join(state_log_dir, log_dir_name) logger_tb = Tb_Logger(log_dir) # indicate folder to save, plus other options tensorboard = TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=False, write_images=False) # save it in your callback list, where you can include other callbacks callbacks_list = [tensorboard] # then pass to fit as callback, remember to use validation_data also Train_R_I = train_user[1] Train_R_J = train_item[1] Test_R = test_user[1] no_validation = False if valid_user: Valid_R = valid_user[1] else: no_validation = True if give_item_weight is True: item_weight = np.array([math.sqrt(len(i)) for i in Train_R_J], dtype=float) item_weight = (float(num_item) / item_weight.sum()) * item_weight item_weight[item_weight == 0] = 1 else: item_weight = np.ones(num_item, dtype=float) pre_val_eval = 1e10 cnn_module = CNN_module(dimension, vocab_size, dropout_rate, emb_dim, max_len, num_kernel_per_ws, init_W) theta = cnn_module.get_projection_layer(CNN_X) np.random.seed(133) U = np.random.uniform(size=(num_user, dimension)) V = theta print ('Training CNN-MF ...') endure_count = 5 count = 0 converge_threshold = 1e-4 converge = 1.0 iteration = 0 while (iteration < max_iter and converge > converge_threshold) or iteration < min_iter: # for iteration in xrange(max_iter): loss = 0 tic = time.time() print "%d iteration\t(patience: %d)" % (iteration, count) # VV = b * (V.T.dot(V)) + lambda_u * np.eye(dimension) VV = (V.T.dot(V)) + lambda_u * np.eye(dimension) sub_loss = np.zeros(num_user) for i in xrange(num_user): idx_item = train_user[0][i] V_i = V[idx_item] R_i = Train_R_I[i] # A = VV + (a - b) * (V_i.T.dot(V_i)) # B = (a * V_i * (np.tile(R_i, (dimension, 1)).T)).sum(0) C_i = np.diag(alpha * R_i) A = VV + V_i.T.dot(C_i).dot(V_i) B = V_i.T.dot(C_i + np.eye(len(idx_item))).dot(R_i) U[i] = np.linalg.solve(A, B) sub_loss[i] = -0.5 * lambda_u * np.dot(U[i], U[i]) loss = loss + np.sum(sub_loss) sub_loss = np.zeros(num_item) # UU = b * (U.T.dot(U)) UU = (U.T.dot(U)) for j in xrange(num_item): idx_user = train_item[0][j] U_j = U[idx_user] R_j = Train_R_J[j] C_j = np.diag(alpha * R_j) if len(U_j) > 0: # tmp_A = UU + (a - b) * (U_j.T.dot(U_j)) tmp_A = UU + (U_j.T.dot(C_j).dot(U_j)) A = tmp_A + lambda_v * item_weight[j] * np.eye(dimension) B = U_j.T.dot(C_j + np.eye(len(idx_user))).dot(R_j) + lambda_v * item_weight[j] * theta[j] # B = (a * U_j * (np.tile(R_j, (dimension, 1)).T) # ).sum(0) + lambda_v * item_weight[j] * theta[j] V[j] = np.linalg.solve(A, B) # sub_loss[j] = -0.5 * np.square(R_j * a).sum() # sub_loss[j] = sub_loss[j] + a * np.sum((U_j.dot(V[j])) * R_j) sub_loss[j] = -0.5 * np.square(R_j * C_j).sum() sub_loss[j] = sub_loss[j] + np.sum(C_j * (U_j.dot(V[j])) * R_j) sub_loss[j] = sub_loss[j] - 0.5 * np.dot(V[j].dot(tmp_A), V[j]) else: V[j] = theta[j] loss = loss + np.sum(sub_loss) seed = np.random.randint(100000) history = cnn_module.train(CNN_X, V, item_weight, seed, callbacks_list) theta = cnn_module.get_projection_layer(CNN_X) cnn_loss = history.history['loss'][-1] loss = loss - 0.5 * lambda_v * cnn_loss * num_item tr_eval = eval_RMSE(Train_R_I, U, V, train_user[0]) if not no_validation: val_eval = eval_RMSE(Valid_R, U, V, valid_user[0]) else: val_eval = -1 te_eval = eval_RMSE(Test_R, U, V, test_user[0]) logger_tb.log_scalar('train_rmse', tr_eval, iteration) if not no_validation: logger_tb.log_scalar('eval_rmse', val_eval, iteration) logger_tb.log_scalar('test_rmse', te_eval, iteration) logger_tb.writer.flush() toc = time.time() elapsed = toc - tic converge = abs((loss - PREV_LOSS) / PREV_LOSS) # if (val_eval < pre_val_eval): if (loss > PREV_LOSS): # count = 0 print ("likelihood is increasing!") cnn_module.save_model(res_dir + '/CNN_weights.hdf5') np.savetxt(res_dir + '/final-U.dat', U) np.savetxt(res_dir + '/final-V.dat', V) np.savetxt(res_dir + '/theta.dat', theta) best_train_rmse = tr_eval best_test_rmse = te_eval best_val_rmse = val_eval else: count = count + 1 # if (val_eval < pre_val_eval): # count = 0 # cnn_module.save_model(res_dir + '/CNN_weights.hdf5') # np.savetxt(res_dir + '/final-U.dat', U) # np.savetxt(res_dir + '/final-V.dat', V) # np.savetxt(res_dir + '/theta.dat', theta) # else: # count = count + 1 pre_val_eval = val_eval print "Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f" % ( loss, elapsed, converge, tr_eval, val_eval, te_eval) f1.write("Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f\n" % ( loss, elapsed, converge, tr_eval, val_eval, te_eval)) if (count >= endure_count and iteration > min_iter): # if (count == endure_count): break elif (iteration < min_iter): count = 0 PREV_LOSS = loss iteration += 1 f1.close() return best_train_rmse, best_test_rmse, best_val_rmse
def ConvMF(res_dir, train_user, train_item, valid_user, test_user, R, CNN_X, vocab_size, init_W=None, give_item_weight=True, max_iter=50, lambda_u=1, lambda_v=100, dimension=50, dropout_rate=0.2, emb_dim=200, max_len=300, num_kernel_per_ws=100): # explicit setting a = 1 b = 0 num_user = R.shape[0] num_item = R.shape[1] PREV_LOSS = 1e-50 if not os.path.exists(res_dir): os.makedirs(res_dir) f1 = open(res_dir + '/state.log', 'w') Train_R_I = train_user[1] Train_R_J = train_item[1] Test_R = test_user[1] Valid_R = valid_user[1] if give_item_weight is True: item_weight = np.array([math.sqrt(len(i)) for i in Train_R_J], dtype=float) item_weight = (float(num_item) / item_weight.sum()) * item_weight else: item_weight = np.ones(num_item, dtype=float) pre_val_eval = 1e10 cnn_module = CNN_module(dimension, vocab_size, dropout_rate, emb_dim, max_len, num_kernel_per_ws, init_W) theta = cnn_module.get_projection_layer(CNN_X) np.random.seed(133) U = np.random.uniform(size=(num_user, dimension)) V = theta endure_count = 5 count = 0 for iteration in xrange(max_iter): loss = 0 tic = time.time() print "%d iteration\t(patience: %d)" % (iteration, count) VV = b * (V.T.dot(V)) + lambda_u * np.eye(dimension) sub_loss = np.zeros(num_user) for i in xrange(num_user): idx_item = train_user[0][i] V_i = V[idx_item] R_i = Train_R_I[i] A = VV + (a - b) * (V_i.T.dot(V_i)) B = (a * V_i * (np.tile(R_i, (dimension, 1)).T)).sum(0) U[i] = np.linalg.solve(A, B) sub_loss[i] = -0.5 * lambda_u * np.dot(U[i], U[i]) loss = loss + np.sum(sub_loss) sub_loss = np.zeros(num_item) UU = b * (U.T.dot(U)) for j in xrange(num_item): idx_user = train_item[0][j] U_j = U[idx_user] R_j = Train_R_J[j] tmp_A = UU + (a - b) * (U_j.T.dot(U_j)) A = tmp_A + lambda_v * item_weight[j] * np.eye(dimension) B = (a * U_j * (np.tile(R_j, (dimension, 1)).T) ).sum(0) + lambda_v * item_weight[j] * theta[j] V[j] = np.linalg.solve(A, B) sub_loss[j] = -0.5 * np.square(R_j * a).sum() sub_loss[j] = sub_loss[j] + a * np.sum((U_j.dot(V[j])) * R_j) sub_loss[j] = sub_loss[j] - 0.5 * np.dot(V[j].dot(tmp_A), V[j]) loss = loss + np.sum(sub_loss) seed = np.random.randint(100000) history = cnn_module.train(CNN_X, V, item_weight, seed) theta = cnn_module.get_projection_layer(CNN_X) cnn_loss = history.history['loss'][-1] loss = loss - 0.5 * lambda_v * cnn_loss * num_item tr_eval = eval_RMSE(Train_R_I, U, V, train_user[0]) val_eval = eval_RMSE(Valid_R, U, V, valid_user[0]) te_eval = eval_RMSE(Test_R, U, V, test_user[0]) toc = time.time() elapsed = toc - tic converge = abs((loss - PREV_LOSS) / PREV_LOSS) if (val_eval < pre_val_eval): cnn_module.save_model(res_dir + '/CNN_weights.hdf5') np.savetxt(res_dir + '/U.dat', U) np.savetxt(res_dir + '/V.dat', V) np.savetxt(res_dir + '/theta.dat', theta) else: count = count + 1 pre_val_eval = val_eval print "Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f" % ( loss, elapsed, converge, tr_eval, val_eval, te_eval) f1.write("Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f\n" % ( loss, elapsed, converge, tr_eval, val_eval, te_eval)) if (count == endure_count): break PREV_LOSS = loss f1.close()
def Raw_att_CNN_concat(res_dir, state_log_dir, train_user, train_item, valid_user, test_user, R, attributes_X, CNN_X, vocab_size, init_W, max_iter, lambda_u, lambda_v, dimension, use_CAE, dropout_rate=0.2, emb_dim=200, max_len=300, num_kernel_per_ws=100, a=1, b=0.01, give_item_weight=False): # explicit setting # a = 1 # b = 0.01 alpha = 40 # confidence_matrix = get_confidence_matrix(R,'user-dependant',alpha=40) num_user = R.shape[0] num_item = R.shape[1] num_features = attributes_X.shape[1] '''prepare path to store results and log''' if not os.path.exists(res_dir): os.makedirs(res_dir) os.chdir(res_dir) if not os.path.exists(state_log_dir): os.makedirs(state_log_dir) f1 = open(state_log_dir + '/state.log', 'w') '''log metrics using tf.summary ''' log_dir_name = os.path.basename(os.path.dirname(state_log_dir + '/')) log_dir = os.path.join(state_log_dir, log_dir_name) logger_tb = Tb_Logger(log_dir) # indicate folder to save, plus other options tensorboard = TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=False, write_images=False) # save it in your callback list, where you can include other callbacks callbacks_list = [tensorboard] # then pass to fit as callback, remember to use validation_data also Train_R_I = train_user[1] Train_R_J = train_item[1] Test_R = test_user[1] # check if the dataset has validation set no_validation = False if valid_user: Valid_R = valid_user[1] else: no_validation = True # assign weights to each item according to the number of time the item was rated if give_item_weight is True: item_weight = np.array([math.sqrt(len(i)) for i in Train_R_J], dtype=float) item_weight = (float(num_item) / item_weight.sum()) * item_weight item_weight[item_weight == 0] = 1 else: item_weight = np.ones(num_item, dtype=float) '''initialize''' cnn_output_dim = 150 att_output_dim = dimension - cnn_output_dim cnn_module = CNN_module(cnn_output_dim, vocab_size, dropout_rate, emb_dim, max_len, num_kernel_per_ws, init_W) if use_CAE: att_module = CAE_module(att_output_dim, cae_N_hidden=att_output_dim, nb_features=num_features) else: att_module = Stacking_NN_CNN_CAE(input_dim=num_features,output_dimesion=att_output_dim, num_layers=1, hidden_dim=num_features * 2) theta = cnn_module.get_projection_layer(CNN_X) gamma = att_module.get_projection_layer(attributes_X) delta = np.concatenate((gamma, theta), axis=1) if not (theta.shape[1] + gamma.shape[1] == dimension): sys.exit("theta and gamma shapes are wrong") np.random.seed(133) U = np.random.uniform(size=(num_user, dimension)) V = delta print ('Training CNN-CAE-MF ...') pre_val_eval = -1e10 PREV_LOSS = -1e-50 endure_count = 5 count = 0 converge_threshold = 1e-4 converge = 1.0 iteration = 0 while (iteration < max_iter and converge > converge_threshold) or iteration < min_iter: # for iteration in xrange(max_iter): loss = 0 tic = time.time() print "%d iteration\t(patience: %d)" % (iteration, count) # Update U # VV = b * (V.T.dot(V)) + lambda_u * np.eye(dimension) VV = (V.T.dot(V)) + lambda_u * np.eye(dimension) sub_loss = np.zeros(num_user) for i in xrange(num_user): idx_item = train_user[0][i] V_i = V[idx_item] R_i = Train_R_I[i] C_i = np.diag(alpha * R_i) # A = VV + (a - b) * (V_i.T.dot(V_i)) # B = (a * V_i * (np.tile(R_i, (dimension, 1)).T)).sum(0) A = VV + V_i.T.dot(C_i).dot(V_i) B = V_i.T.dot(C_i + np.eye(len(idx_item))).dot(R_i) U[i] = np.linalg.solve(A, B) sub_loss[i] = -0.5 * lambda_u * np.dot(U[i], U[i]) loss = loss + np.sum(sub_loss) # Update V sub_loss = np.zeros(num_item) # UU = b * (U.T.dot(U)) UU = (U.T.dot(U)) for j in xrange(num_item): idx_user = train_item[0][j] U_j = U[idx_user] R_j = Train_R_J[j] C_j = np.diag(alpha * R_j) if len(U_j) > 0: # tmp_A = UU + (a - b) * (U_j.T.dot(U_j)) tmp_A = UU + (U_j.T.dot(C_j).dot(U_j)) A = tmp_A + lambda_v * item_weight[j] * np.eye(dimension) # B = (a * U_j * (np.tile(R_j, (dimension, 1)).T) # ).sum(0) + lambda_v * item_weight[j] * delta[j] B = U_j.T.dot(C_j + np.eye(len(idx_user))).dot(R_j) + lambda_v * item_weight[j] * delta[j] V[j] = np.linalg.solve(A, B) sub_loss[j] = -0.5 * np.square(R_j * C_j).sum() sub_loss[j] = sub_loss[j] + np.sum(C_j * ((U_j.dot(V[j])) * R_j)) sub_loss[j] = sub_loss[j] - 0.5 * np.dot(V[j].dot(tmp_A), V[j]) else: # in case the item has no ratings V[j] = delta[j] loss = loss + np.sum(sub_loss) # Update theta seed = np.random.randint(100000) history = cnn_module.train(CNN_X, V[:, att_output_dim:], item_weight=item_weight, seed=seed, callbacks_list=callbacks_list) theta = cnn_module.get_projection_layer(CNN_X) cnn_loss = history.history['loss'][-1] # update gamma history = att_module.train(attributes_X, V[:, :att_output_dim], item_weight, seed, callbacks_list) gamma = att_module.get_projection_layer(attributes_X) att_loss = history.history['loss'][-1] # update delta delta = np.concatenate((gamma, theta), axis=1) loss = loss - 0.5 * lambda_v * (cnn_loss + att_loss) * num_item toc = time.time() elapsed = toc - tic '''calculate RMSE''' tr_eval = eval_RMSE(Train_R_I, U, V, train_user[0]) if not no_validation: val_eval = eval_RMSE(Valid_R, U, V, valid_user[0]) else: val_eval = -1 te_eval = eval_RMSE(Test_R, U, V, test_user[0]) ''' write tf.summary''' logger_tb.log_scalar('train_rmse', tr_eval, iteration) if not no_validation: logger_tb.log_scalar('eval_rmse', val_eval, iteration) logger_tb.log_scalar('test_rmse', te_eval, iteration) logger_tb.writer.flush() '''Calculate converge and stor best values of U,V,theta''' converge = abs((loss - PREV_LOSS) / PREV_LOSS) # if (val_eval < pre_val_eval): if (loss > PREV_LOSS): # count = 0 print ("likelihood is increasing!") cnn_module.save_model(res_dir + '/CNN_weights.hdf5') cnn_module.save_model(res_dir + '/Att_weights.hdf5') np.savetxt(res_dir + '/final-U.dat', U) np.savetxt(res_dir + '/final-V.dat', V) np.savetxt(res_dir + '/theta.dat', theta) np.savetxt(res_dir + '/gamma.dat', gamma) best_train_rmse = tr_eval best_test_rmse = te_eval best_val_rmse = val_eval else: count = count + 1 pre_val_eval = val_eval print "Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f" % ( loss, elapsed, converge, tr_eval, val_eval, te_eval) f1.write("Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f\n" % ( loss, elapsed, converge, tr_eval, val_eval, te_eval)) if (count >= endure_count and iteration > min_iter): # if (count == endure_count): break elif (iteration < min_iter): count = 0 PREV_LOSS = loss iteration += 1 f1.close() return best_train_rmse, best_test_rmse, best_val_rmse