def matrix_factorisation(R, P, Q, K, steps=5000, alpha=0.0002, beta=0.02): global X global Y Q = Q.T for step in range(steps): for i in range(len(R)): for j in range(len(R[i])): if R[i][j] > 0: eij = R[i][j] - numpy.dot(P[i, :], Q[:, j]) for k in range(K): P[i][k] = P[i][k] + alpha * (2 * eij * Q[k][j] - beta * P[i][k]) Q[k][j] = Q[k][j] + alpha * (2 * eij * P[i][k] - beta * Q[k][j]) eR = numpy.dot(P, Q) e = 0 t = 0 for i in range(len(R)): for j in range(len(R[i])): if R[i][j] > 0: e = e + pow(R[i][j] - numpy.dot(P[i, :], Q[:, j]), 2) for k in range(K): t = t + (beta / 2) * (pow(P[i][k], 2) + pow(Q[k][j], 2)) e = e + t print 'zui:', t print 'loss', e if (step > 200): X.append(step) Y.append(e) if step > 4000: figure.paint1(X, Y) break return P, Q.T
def start_sample_matrix(ratio, lambdas, lr, table_name, l): MAE = 0 RMSE = 0 start_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) log.start_log( start_time, "../matrix/" + table_name.decode('utf-8') + "/" + "MF矩阵分解结果.txt".decode('utf-8')) f = log.write_log() lc_table_name = 'lc_' + table_name tp_table_name = 'tp_' + table_name # start预测部分 # # 得到的矩阵是挖取过值的矩阵 # C, original_matrix, changed_zero = dm.getMatrix(table_name, ratio) C, original_matrix, changed_zero = dm.get_Matrix_from_lc_tp( lc_table_name, tp_table_name, ratio, 1) # C = np.array(C) d = C.shape U = np.random.rand(d[0], l) V = np.random.rand(d[1], l) print "开始矩阵分解" matrix, X, Y = simple_mat.matrix_factorization(C, U, V, lambdas, step, lr) # 开始验证 print "开始验证" matrix0, pre_or_mat, num = de.norma_matrix(matrix, original_matrix) MAE, RMSE = vali.validate(matrix, original_matrix, changed_zero) # #end # end预测部分 file_path = "../matrix/" + table_name.decode('utf-8') t = str(ratio) + "_" + str(num) + "_" + start_time + ".txt" # start将矩阵分解后的矩阵保存 np.savetxt(file_path + "/matrix_factorization/MF_matrix_factorization_" + str(ratio) + ".txt", matrix, fmt='%.8f') # end将矩阵分解后的矩阵保存 # start将原矩阵经预测填充后的矩阵保存 np.savetxt(file_path + "/result/MF_pre_" + str(ratio) + ".txt", pre_or_mat, fmt='%.8f') # end 将原矩阵经预测填充后的矩阵保存 # start将矩阵分解后的矩阵(处理过的,负数变0)保存 np.savetxt(file_path + "/out/MF_" + t.decode('utf-8'), matrix0, fmt='%.8f') # end 将矩阵分解后的矩阵(处理过的,负数变0)保存 # end end_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) print >> f, "开始时间:", start_time print >> f, "结束时间:", end_time # 显示梯度下降情况 title = 'lr:{0} alpha:{1} beta:{2} step:{3} lambdas:{4} sim:{5}' title = title.format(lr, ab[0], ab[1], step, lambdas, simlambda) print >> f, "参数:", title figure_path = "../matrix/" + table_name.decode( 'utf-8') + "/figure/MF_" + str(ratio) + "_" + start_time + ".jpg" figure.paint1(X, Y, title, figure_path) log.close_log() return MAE, RMSE
def matrix_factorization(C, U, V, alpha, beta, lamdas, steps, lr,sk): global X global Y X = [] Y = [] f = log.write_log() step = 0 loss1, def_v, all_sim_u, all_sim_v = getloss(C, U, V, alpha, beta, lamdas,sk) while step<=steps: U, V = gardient(C, U, V, def_v, all_sim_u, all_sim_v, alpha, beta, lamdas, lr) loss2, def_v, all_sim_u, all_sim_v = getloss(C, U, V, alpha, beta, lamdas,sk) #用于显示梯度下降的效果 X.append(step) Y.append(loss2) if(step%250==0): figure.paint1(X,Y) print >>f,'loss:',loss1 if(loss1-loss2<0.001): break; loss1 = loss2 step = step +1
def getresult(l, table_name, lambdas, ab, simlambda, lr, step, sim_k, ratio): MAE = 0 RMSE = 0 alpha = ab[0] beta = ab[1] start_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) log.start_log( start_time, "../matrix/" + table_name.decode('utf-8') + "/" + "矩阵分解结果.txt".decode('utf-8')) f = log.write_log() lc_table_name = 'lc_' + table_name tp_table_name = 'tp_' + table_name # start预测部分 # # 得到的矩阵是挖取过值的矩阵 # C, original_matrix, changed_zero = dm.getMatrix(tp_table_name, ratio) C, original_matrix, changed_zero = dm.get_Matrix_from_lc_tp( lc_table_name, tp_table_name, ratio, 1) # C = np.array(C) d = C.shape U = np.random.rand(d[0], l) V = np.random.rand(d[1], l) print "开始矩阵分解" matrix, X, Y, loss = de.matrix_factorization(C, U, V, lambdas, step, alpha, beta, simlambda, lr, sim_k, tp_table_name) # 开始验证 print "开始验证" matrix0, pre_or_mat, num = de.norma_matrix(matrix, original_matrix) MAE, RMSE = vali.validate(matrix, original_matrix, changed_zero) # #end # end预测部分 file_path = "../matrix/" + table_name.decode('utf-8') t = str(ratio) + "_" + str(num) + "_" + start_time + ".txt" # start将矩阵分解后的矩阵保存 np.savetxt(file_path + "/matrix_factorization/matrix_factorization_" + str(ratio) + ".txt", matrix, fmt='%.8f') # end将矩阵分解后的矩阵保存 # start将原矩阵经预测填充后的矩阵保存 # filematrix0 = open(file_path + "/result/pre_" + str(ratio) + "." + "txt", 'w'); # filematrix0.close() np.savetxt(file_path + "/result/pre_" + str(ratio) + ".txt", pre_or_mat, fmt='%.8f') # end 将原矩阵经预测填充后的矩阵保存 # start将矩阵分解后的矩阵(处理过的,负数变0)保存 # filematrix1 = open(file_path + "/out/" + t.decode('utf-8'), 'w'); # filematrix1.close() np.savetxt(file_path + "/out/" + t.decode('utf-8'), matrix0, fmt='%.8f') # end 将矩阵分解后的矩阵(处理过的,负数变0)保存 # end # k, disease, mitrax, table_name # top_k = dv.getTopK(k, disease, matrix, table_name) # 筛选医院,得到与目标人物处在同一个城市的医院 # pre_top = dv.pre_data(top_k) # 筛选医院 # matrix = dealtxt.load_file_to_array("../matrix/" + table_name.decode('utf-8') + "/result/pre_" + str(ratio) + ".txt"); # filter_hos = dv.filter_hos_By_sorted100(table_name,matrix,disease,people_locat) # dv.getResult(pre_top,disease) # dv.getResult1(disease,matrix,table_name,city,people_locat) end_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) print >> f, "开始时间:", start_time print >> f, "结束时间:", end_time # 显示梯度下降情况 title = 'lr:{0} alpha:{1} beta:{2} step:{3} lambdas:{4} sim:{5} sim_k:{6}' title = title.format(lr, ab[0], ab[1], step, lambdas, simlambda, sim_k) print >> f, "参数:", title figure_path = "../matrix/" + table_name.decode('utf-8') + "/figure/" + str( ratio) + "_" + end_time + ".jpg" figure.paint1(X, Y, title, figure_path) log.close_log() return MAE, RMSE, loss
def showgard(title, file_path): global X global Y print "图" figure.paint1(X, Y, title, file_path)
def showgard(x, y, title, file_path): """ 显示梯度下降情况 """ print "图" figure.paint1(x, y, title, file_path)