Ejemplo n.º 1
0
def ConvMF(res_dir,
           train_user,
           train_item,
           valid_user,
           test_user,
           R,
           CNN_X,
           vocab_size,
           init_W=None,
           give_item_weight=True,
           max_iter=50,
           lambda_u=1,
           lambda_v=100,
           dimension=50,
           dropout_rate=0.2,
           emb_dim=200,
           max_len=300,
           num_kernel_per_ws=100):
    # explicit setting
    a = 1
    b = 0

    num_user = R.shape[0]
    num_item = R.shape[1]
    PREV_LOSS = 1e-50
    if not os.path.exists(res_dir):
        os.makedirs(res_dir)
    f1 = open(res_dir + '/state.log', 'w')

    Train_R_I = train_user[1]
    Train_R_J = train_item[1]
    Test_R = test_user[1]
    Valid_R = valid_user[1]

    if give_item_weight is True:
        item_weight = np.array([math.sqrt(len(i)) for i in Train_R_J],
                               dtype=float)
        item_weight = (float(num_item) / item_weight.sum()) * item_weight
    else:
        item_weight = np.ones(num_item, dtype=float)

    pre_val_eval = 1e10

    cnn_module = CNN_module(dimension, vocab_size, dropout_rate, emb_dim,
                            max_len, num_kernel_per_ws, init_W)
    theta = cnn_module.get_projection_layer(CNN_X)
    np.random.seed(133)
    U = np.random.uniform(size=(num_user, dimension))
    V = theta

    endure_count = 5
    count = 0
    for iteration in range(max_iter):
        loss = 0
        tic = time.time()
        print("%d iteration\t(patience: %d)" % (iteration, count))

        VV = b * (V.T.dot(V)) + lambda_u * np.eye(dimension)
        sub_loss = np.zeros(num_user)

        for i in range(num_user):
            idx_item = train_user[0][i]
            V_i = V[idx_item]
            R_i = Train_R_I[i]
            A = VV + (a - b) * (V_i.T.dot(V_i))
            B = (a * V_i * (np.tile(R_i, (dimension, 1)).T)).sum(0)

            U[i] = np.linalg.solve(A, B)

            sub_loss[i] = -0.5 * lambda_u * np.dot(U[i], U[i])

        loss = loss + np.sum(sub_loss)

        sub_loss = np.zeros(num_item)
        UU = b * (U.T.dot(U))
        for j in range(num_item):
            idx_user = train_item[0][j]
            U_j = U[idx_user]
            R_j = Train_R_J[j]

            tmp_A = UU + (a - b) * (U_j.T.dot(U_j))
            A = tmp_A + lambda_v * item_weight[j] * np.eye(dimension)
            B = (a * U_j * (np.tile(R_j, (dimension, 1)).T)
                 ).sum(0) + lambda_v * item_weight[j] * theta[j]
            V[j] = np.linalg.solve(A, B)

            sub_loss[j] = -0.5 * np.square(R_j * a).sum()
            sub_loss[j] = sub_loss[j] + a * np.sum((U_j.dot(V[j])) * R_j)
            sub_loss[j] = sub_loss[j] - 0.5 * np.dot(V[j].dot(tmp_A), V[j])

        loss = loss + np.sum(sub_loss)
        seed = np.random.randint(100000)
        history = cnn_module.train(CNN_X, V, item_weight, seed)
        theta = cnn_module.get_projection_layer(CNN_X)
        cnn_loss = history.history['loss'][-1]

        loss = loss - 0.5 * lambda_v * cnn_loss * num_item

        tr_eval = eval_RMSE(Train_R_I, U, V, train_user[0])
        val_eval = eval_RMSE(Valid_R, U, V, valid_user[0])
        te_eval = eval_RMSE(Test_R, U, V, test_user[0])

        toc = time.time()
        elapsed = toc - tic

        converge = abs((loss - PREV_LOSS) / PREV_LOSS)

        if (val_eval < pre_val_eval):
            cnn_module.save_model(res_dir + '/CNN_weights.hdf5')
            np.savetxt(res_dir + '/U.dat', U)
            np.savetxt(res_dir + '/V.dat', V)
            np.savetxt(res_dir + '/theta.dat', theta)
        else:
            count = count + 1

        pre_val_eval = val_eval

        print(
            "Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f"
            % (loss, elapsed, converge, tr_eval, val_eval, te_eval))
        f1.write(
            "Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f\n"
            % (loss, elapsed, converge, tr_eval, val_eval, te_eval))

        if (count == endure_count):
            break

        PREV_LOSS = loss

    f1.close()
Ejemplo n.º 2
0
def ConvMF(res_dir,
           train_user,
           train_item,
           valid_user,
           test_user,
           R,
           CNN_X,
           vocab_size,
           init_W=None,
           give_item_weight=True,
           max_iter=50,
           lambda_u=1,
           lambda_v=100,
           dimension=50,
           dropout_rate=0.2,
           emb_dim=200,
           max_len=300,
           num_kernel_per_ws=100):
    '''
    构造并训练卷积矩阵分解模型
    :param res_dir:结果文件路径
    :param train_user:训练集用户稀疏评分向量(libSVM format)
    :param train_item:训练集物品稀疏评分向量(libSVM format)
    :param valid_user:测试集用户稀疏评分向量(libSVM format)
    :param test_user:测试集物品稀疏评分向量(libSVM format)
    :param R:原始评分数据,format: user id::item id::rating
    :param CNN_X:物品描述词序列
    :param vocab_size:词表大小
    :param init_W:如果为None则动态训练词向量权重
    :param give_item_weight:如果为True则使用静态词向量,否则动态训练词向量
    :param max_iter:最大迭代次数
    :param lambda_u:用户端正则惩罚项系数
    :param lambda_v:用户端正则惩罚项系数
    :param dimension:隐变量维度
    :param dropout_rate:丢弃率
    :param emb_dim:词嵌入维度
    :param max_len:物品文本描述序列最大长度
    :param num_kernel_per_ws:CNN的卷积核个数
    :return:None
    '''
    # explicit setting
    a = 1
    b = 0

    num_user = R.shape[0]  #6040
    num_item = R.shape[1]  #3544
    PREV_LOSS = 1e-50
    if not os.path.exists(res_dir):
        os.makedirs(res_dir)
    f1 = open(res_dir + '/state.log', 'w')

    Train_R_I = train_user[1]  #user rating_list
    Train_R_J = train_item[1]  #item rating_list
    Test_R = test_user[1]
    Valid_R = valid_user[1]

    if give_item_weight is True:
        item_weight = np.array([math.sqrt(len(i)) for i in Train_R_J],
                               dtype=float)
        item_weight = (float(num_item) / item_weight.sum()) * item_weight
    else:
        item_weight = np.ones(num_item, dtype=float)

    pre_val_eval = 1e10
    ## init CNN model
    cnn_module = CNN_module(dimension, vocab_size, dropout_rate, emb_dim,
                            max_len, num_kernel_per_ws, init_W)
    theta = cnn_module.get_projection_layer(CNN_X)
    np.random.seed(133)
    # user-latent matrix
    U = np.random.uniform(size=(num_user, dimension))
    # item-latent matrix
    V = theta

    endure_count = 5  #超出5次则退出迭代训练
    count = 0
    for iteration in range(max_iter):
        loss = 0
        tic = time.time()
        print("%d iteration\t(patience: %d)" % (iteration, count))

        ##get user-latent matirx loss
        VV = b * (V.T.dot(V)) + lambda_u * np.eye(dimension)  #theta V
        sub_loss = np.zeros(num_user)

        for i in range(num_user):
            idx_item = train_user[0][i]
            V_i = V[idx_item]
            R_i = Train_R_I[i]
            A = VV + (a - b) * (V_i.T.dot(V_i))
            B = (a * V_i * (np.tile(R_i, (dimension, 1)).T)).sum(0)

            U[i] = np.linalg.solve(A, B)

            sub_loss[i] = -0.5 * lambda_u * np.dot(U[i], U[i])

        loss = loss + np.sum(sub_loss)
        ##get item-latent matirx loss
        sub_loss = np.zeros(num_item)
        UU = b * (U.T.dot(U))
        for j in range(num_item):
            idx_user = train_item[0][j]
            U_j = U[idx_user]
            R_j = Train_R_J[j]

            tmp_A = UU + (a - b) * (U_j.T.dot(U_j))
            A = tmp_A + lambda_v * item_weight[j] * np.eye(dimension)
            B = (a * U_j * (np.tile(R_j, (dimension, 1)).T)
                 ).sum(0) + lambda_v * item_weight[j] * theta[j]
            V[j] = np.linalg.solve(A, B)

            sub_loss[j] = -0.5 * np.square(R_j * a).sum()
            sub_loss[j] = sub_loss[j] + a * np.sum((U_j.dot(V[j])) * R_j)
            sub_loss[j] = sub_loss[j] - 0.5 * np.dot(V[j].dot(tmp_A), V[j])

        loss = loss + np.sum(sub_loss)
        seed = np.random.randint(100000)

        # get cnn loss
        history = cnn_module.train(CNN_X, V, item_weight, seed)
        theta = cnn_module.get_projection_layer(CNN_X)
        cnn_loss = history.history['loss'][-1]

        loss = loss - 0.5 * lambda_v * cnn_loss * num_item

        # get rmse eval
        tr_eval = eval_RMSE(Train_R_I, U, V, train_user[0])
        val_eval = eval_RMSE(Valid_R, U, V, valid_user[0])
        te_eval = eval_RMSE(Test_R, U, V, test_user[0])

        toc = time.time()
        elapsed = toc - tic

        converge = abs((loss - PREV_LOSS) / PREV_LOSS)
        #save u,v,w weight
        if (val_eval < pre_val_eval):
            cnn_module.save_model(res_dir + '/CNN_weights.hdf5')
            np.savetxt(res_dir + '/U.dat', U)
            np.savetxt(res_dir + '/V.dat', V)
            np.savetxt(res_dir + '/theta.dat', theta)
        else:
            count = count + 1

        pre_val_eval = val_eval
        print(
            "Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f"
            % (loss, elapsed, converge, tr_eval, val_eval, te_eval))
        f1.write(
            "Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f\n"
            % (loss, elapsed, converge, tr_eval, val_eval, te_eval))
        if (count == endure_count):
            break
        PREV_LOSS = loss

    f1.close()
Ejemplo n.º 3
0
def ConvMF(res_dir, state_log_dir, train_user, train_item, valid_user, test_user,
           R, CNN_X, vocab_size, init_W=None, give_item_weight=False,
           max_iter=50, lambda_u=1, lambda_v=100, dimension=50,
           dropout_rate=0.2, emb_dim=200, max_len=300, num_kernel_per_ws=100):
    # explicit settinggit
    a = 1
    b = 0.01
    alpha = 40
    num_user = R.shape[0]
    num_item = R.shape[1]
    PREV_LOSS = -1e-50
    if not os.path.exists(res_dir):
        os.makedirs(res_dir)
    os.chdir(res_dir)
    # f1 = open(res_dir + '/state.log', 'w')
    if not os.path.exists(state_log_dir):
        os.makedirs(state_log_dir)
    f1 = open(state_log_dir + '/state.log', 'w')
    # log metrics into tf.summary
    log_dir_name = os.path.basename(os.path.dirname(state_log_dir + '/'))
    log_dir = os.path.join(state_log_dir, log_dir_name)
    logger_tb = Tb_Logger(log_dir)

    # indicate folder to save, plus other options
    tensorboard = TensorBoard(log_dir=log_dir, histogram_freq=0,
                              write_graph=False, write_images=False)
    # save it in your callback list, where you can include other callbacks
    callbacks_list = [tensorboard]
    # then pass to fit as callback, remember to use validation_data also

    Train_R_I = train_user[1]
    Train_R_J = train_item[1]
    Test_R = test_user[1]

    no_validation = False
    if valid_user:
        Valid_R = valid_user[1]
    else:
        no_validation = True

    if give_item_weight is True:
        item_weight = np.array([math.sqrt(len(i))
                                for i in Train_R_J], dtype=float)
        item_weight = (float(num_item) / item_weight.sum()) * item_weight
        item_weight[item_weight == 0] = 1
    else:
        item_weight = np.ones(num_item, dtype=float)

    pre_val_eval = 1e10

    cnn_module = CNN_module(dimension, vocab_size, dropout_rate,
                            emb_dim, max_len, num_kernel_per_ws, init_W)
    theta = cnn_module.get_projection_layer(CNN_X)
    np.random.seed(133)
    U = np.random.uniform(size=(num_user, dimension))
    V = theta

    print ('Training CNN-MF ...')

    endure_count = 5
    count = 0
    converge_threshold = 1e-4
    converge = 1.0
    iteration = 0
    while (iteration < max_iter and converge > converge_threshold) or iteration < min_iter:
        # for iteration in xrange(max_iter):
        loss = 0
        tic = time.time()
        print "%d iteration\t(patience: %d)" % (iteration, count)

        # VV = b * (V.T.dot(V)) + lambda_u * np.eye(dimension)
        VV = (V.T.dot(V)) + lambda_u * np.eye(dimension)
        sub_loss = np.zeros(num_user)

        for i in xrange(num_user):
            idx_item = train_user[0][i]
            V_i = V[idx_item]
            R_i = Train_R_I[i]
            # A = VV + (a - b) * (V_i.T.dot(V_i))
            # B = (a * V_i * (np.tile(R_i, (dimension, 1)).T)).sum(0)
            C_i = np.diag(alpha * R_i)
            A = VV + V_i.T.dot(C_i).dot(V_i)
            B = V_i.T.dot(C_i + np.eye(len(idx_item))).dot(R_i)
            U[i] = np.linalg.solve(A, B)

            sub_loss[i] = -0.5 * lambda_u * np.dot(U[i], U[i])

        loss = loss + np.sum(sub_loss)

        sub_loss = np.zeros(num_item)
        # UU = b * (U.T.dot(U))
        UU = (U.T.dot(U))
        for j in xrange(num_item):
            idx_user = train_item[0][j]
            U_j = U[idx_user]
            R_j = Train_R_J[j]
            C_j = np.diag(alpha * R_j)
            if len(U_j) > 0:
                # tmp_A = UU + (a - b) * (U_j.T.dot(U_j))
                tmp_A = UU + (U_j.T.dot(C_j).dot(U_j))
                A = tmp_A + lambda_v * item_weight[j] * np.eye(dimension)
                B = U_j.T.dot(C_j + np.eye(len(idx_user))).dot(R_j) + lambda_v * item_weight[j] * theta[j]
                # B = (a * U_j * (np.tile(R_j, (dimension, 1)).T)
                #      ).sum(0) + lambda_v * item_weight[j] * theta[j]
                V[j] = np.linalg.solve(A, B)

                # sub_loss[j] = -0.5 * np.square(R_j * a).sum()
                # sub_loss[j] = sub_loss[j] + a * np.sum((U_j.dot(V[j])) * R_j)
                sub_loss[j] = -0.5 * np.square(R_j * C_j).sum()
                sub_loss[j] = sub_loss[j] + np.sum(C_j * (U_j.dot(V[j])) * R_j)
                sub_loss[j] = sub_loss[j] - 0.5 * np.dot(V[j].dot(tmp_A), V[j])
            else:
                V[j] = theta[j]

        loss = loss + np.sum(sub_loss)
        seed = np.random.randint(100000)
        history = cnn_module.train(CNN_X, V, item_weight, seed, callbacks_list)
        theta = cnn_module.get_projection_layer(CNN_X)
        cnn_loss = history.history['loss'][-1]

        loss = loss - 0.5 * lambda_v * cnn_loss * num_item

        tr_eval = eval_RMSE(Train_R_I, U, V, train_user[0])
        if not no_validation:
            val_eval = eval_RMSE(Valid_R, U, V, valid_user[0])
        else:
            val_eval = -1
        te_eval = eval_RMSE(Test_R, U, V, test_user[0])

        logger_tb.log_scalar('train_rmse', tr_eval, iteration)
        if not no_validation:
            logger_tb.log_scalar('eval_rmse', val_eval, iteration)
        logger_tb.log_scalar('test_rmse', te_eval, iteration)
        logger_tb.writer.flush()

        toc = time.time()
        elapsed = toc - tic

        converge = abs((loss - PREV_LOSS) / PREV_LOSS)

        # if (val_eval < pre_val_eval):

        if (loss > PREV_LOSS):
            # count = 0

            print ("likelihood is increasing!")
            cnn_module.save_model(res_dir + '/CNN_weights.hdf5')
            np.savetxt(res_dir + '/final-U.dat', U)
            np.savetxt(res_dir + '/final-V.dat', V)
            np.savetxt(res_dir + '/theta.dat', theta)
            best_train_rmse = tr_eval
            best_test_rmse = te_eval
            best_val_rmse = val_eval

        else:
            count = count + 1
        # if (val_eval < pre_val_eval):
        # count = 0

        #     cnn_module.save_model(res_dir + '/CNN_weights.hdf5')
        #     np.savetxt(res_dir + '/final-U.dat', U)
        #     np.savetxt(res_dir + '/final-V.dat', V)
        #     np.savetxt(res_dir + '/theta.dat', theta)
        # else:
        #     count = count + 1

        pre_val_eval = val_eval

        print "Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f" % (
            loss, elapsed, converge, tr_eval, val_eval, te_eval)
        f1.write("Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f\n" % (
            loss, elapsed, converge, tr_eval, val_eval, te_eval))

        if (count >= endure_count and iteration > min_iter):
            # if (count == endure_count):
            break
        elif (iteration < min_iter):
            count = 0

        PREV_LOSS = loss
        iteration += 1
    f1.close()
    return best_train_rmse, best_test_rmse, best_val_rmse
Ejemplo n.º 4
0
def ConvMF(res_dir, train_user, train_item, valid_user, test_user,
           R, CNN_X, vocab_size, init_W=None, give_item_weight=True,
           max_iter=50, lambda_u=1, lambda_v=100, dimension=50,
           dropout_rate=0.2, emb_dim=200, max_len=300, num_kernel_per_ws=100):
    # explicit setting
    a = 1
    b = 0

    num_user = R.shape[0]
    num_item = R.shape[1]
    PREV_LOSS = 1e-50
    if not os.path.exists(res_dir):
        os.makedirs(res_dir)
    f1 = open(res_dir + '/state.log', 'w')

    Train_R_I = train_user[1]
    Train_R_J = train_item[1]
    Test_R = test_user[1]
    Valid_R = valid_user[1]

    if give_item_weight is True:
        item_weight = np.array([math.sqrt(len(i))
                                for i in Train_R_J], dtype=float)
        item_weight = (float(num_item) / item_weight.sum()) * item_weight
    else:
        item_weight = np.ones(num_item, dtype=float)

    pre_val_eval = 1e10

    cnn_module = CNN_module(dimension, vocab_size, dropout_rate,
                            emb_dim, max_len, num_kernel_per_ws, init_W)
    theta = cnn_module.get_projection_layer(CNN_X)
    np.random.seed(133)
    U = np.random.uniform(size=(num_user, dimension))
    V = theta

    endure_count = 5
    count = 0
    for iteration in xrange(max_iter):
        loss = 0
        tic = time.time()
        print "%d iteration\t(patience: %d)" % (iteration, count)

        VV = b * (V.T.dot(V)) + lambda_u * np.eye(dimension)
        sub_loss = np.zeros(num_user)

        for i in xrange(num_user):
            idx_item = train_user[0][i]
            V_i = V[idx_item]
            R_i = Train_R_I[i]
            A = VV + (a - b) * (V_i.T.dot(V_i))
            B = (a * V_i * (np.tile(R_i, (dimension, 1)).T)).sum(0)

            U[i] = np.linalg.solve(A, B)

            sub_loss[i] = -0.5 * lambda_u * np.dot(U[i], U[i])

        loss = loss + np.sum(sub_loss)

        sub_loss = np.zeros(num_item)
        UU = b * (U.T.dot(U))
        for j in xrange(num_item):
            idx_user = train_item[0][j]
            U_j = U[idx_user]
            R_j = Train_R_J[j]

            tmp_A = UU + (a - b) * (U_j.T.dot(U_j))
            A = tmp_A + lambda_v * item_weight[j] * np.eye(dimension)
            B = (a * U_j * (np.tile(R_j, (dimension, 1)).T)
                 ).sum(0) + lambda_v * item_weight[j] * theta[j]
            V[j] = np.linalg.solve(A, B)

            sub_loss[j] = -0.5 * np.square(R_j * a).sum()
            sub_loss[j] = sub_loss[j] + a * np.sum((U_j.dot(V[j])) * R_j)
            sub_loss[j] = sub_loss[j] - 0.5 * np.dot(V[j].dot(tmp_A), V[j])

        loss = loss + np.sum(sub_loss)
        seed = np.random.randint(100000)
        history = cnn_module.train(CNN_X, V, item_weight, seed)
        theta = cnn_module.get_projection_layer(CNN_X)
        cnn_loss = history.history['loss'][-1]

        loss = loss - 0.5 * lambda_v * cnn_loss * num_item

        tr_eval = eval_RMSE(Train_R_I, U, V, train_user[0])
        val_eval = eval_RMSE(Valid_R, U, V, valid_user[0])
        te_eval = eval_RMSE(Test_R, U, V, test_user[0])

        toc = time.time()
        elapsed = toc - tic

        converge = abs((loss - PREV_LOSS) / PREV_LOSS)

        if (val_eval < pre_val_eval):
            cnn_module.save_model(res_dir + '/CNN_weights.hdf5')
            np.savetxt(res_dir + '/U.dat', U)
            np.savetxt(res_dir + '/V.dat', V)
            np.savetxt(res_dir + '/theta.dat', theta)
        else:
            count = count + 1

        pre_val_eval = val_eval

        print "Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f" % (
            loss, elapsed, converge, tr_eval, val_eval, te_eval)
        f1.write("Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f\n" % (
            loss, elapsed, converge, tr_eval, val_eval, te_eval))

        if (count == endure_count):
            break

        PREV_LOSS = loss

    f1.close()
Ejemplo n.º 5
0
def Raw_att_CNN_concat(res_dir, state_log_dir, train_user, train_item, valid_user, test_user,
                       R, attributes_X, CNN_X, vocab_size, init_W, max_iter, lambda_u, lambda_v,
                       dimension, use_CAE,
                       dropout_rate=0.2, emb_dim=200, max_len=300, num_kernel_per_ws=100,
                       a=1, b=0.01, give_item_weight=False):
    # explicit setting
    # a = 1
    # b = 0.01
    alpha = 40
    # confidence_matrix = get_confidence_matrix(R,'user-dependant',alpha=40)
    num_user = R.shape[0]
    num_item = R.shape[1]

    num_features = attributes_X.shape[1]

    '''prepare path to store results and log'''
    if not os.path.exists(res_dir):
        os.makedirs(res_dir)
    os.chdir(res_dir)
    if not os.path.exists(state_log_dir):
        os.makedirs(state_log_dir)
    f1 = open(state_log_dir + '/state.log', 'w')

    '''log metrics using tf.summary '''
    log_dir_name = os.path.basename(os.path.dirname(state_log_dir + '/'))
    log_dir = os.path.join(state_log_dir, log_dir_name)
    logger_tb = Tb_Logger(log_dir)
    # indicate folder to save, plus other options
    tensorboard = TensorBoard(log_dir=log_dir, histogram_freq=0,
                              write_graph=False, write_images=False)
    # save it in your callback list, where you can include other callbacks
    callbacks_list = [tensorboard]
    # then pass to fit as callback, remember to use validation_data also

    Train_R_I = train_user[1]
    Train_R_J = train_item[1]
    Test_R = test_user[1]

    # check if the dataset has validation set
    no_validation = False
    if valid_user:
        Valid_R = valid_user[1]
    else:
        no_validation = True

    # assign weights to each item according to the number of time the item was rated
    if give_item_weight is True:
        item_weight = np.array([math.sqrt(len(i))
                                for i in Train_R_J], dtype=float)
        item_weight = (float(num_item) / item_weight.sum()) * item_weight
        item_weight[item_weight == 0] = 1
    else:
        item_weight = np.ones(num_item, dtype=float)

    '''initialize'''
    cnn_output_dim = 150
    att_output_dim = dimension - cnn_output_dim
    cnn_module = CNN_module(cnn_output_dim, vocab_size, dropout_rate,
                            emb_dim, max_len, num_kernel_per_ws, init_W)
    if use_CAE:
        att_module = CAE_module(att_output_dim, cae_N_hidden=att_output_dim, nb_features=num_features)

    else:
        att_module = Stacking_NN_CNN_CAE(input_dim=num_features,output_dimesion=att_output_dim,
                                         num_layers=1, hidden_dim=num_features * 2)

    theta = cnn_module.get_projection_layer(CNN_X)
    gamma = att_module.get_projection_layer(attributes_X)
    delta = np.concatenate((gamma, theta), axis=1)
    if not (theta.shape[1] + gamma.shape[1] == dimension):
        sys.exit("theta and gamma shapes are wrong")
    np.random.seed(133)
    U = np.random.uniform(size=(num_user, dimension))
    V = delta

    print ('Training CNN-CAE-MF ...')
    pre_val_eval = -1e10
    PREV_LOSS = -1e-50
    endure_count = 5
    count = 0
    converge_threshold = 1e-4
    converge = 1.0
    iteration = 0
    while (iteration < max_iter and converge > converge_threshold) or iteration < min_iter:
        # for iteration in xrange(max_iter):
        loss = 0
        tic = time.time()
        print "%d iteration\t(patience: %d)" % (iteration, count)

        # Update U
        # VV = b * (V.T.dot(V)) + lambda_u * np.eye(dimension)
        VV = (V.T.dot(V)) + lambda_u * np.eye(dimension)
        sub_loss = np.zeros(num_user)

        for i in xrange(num_user):
            idx_item = train_user[0][i]
            V_i = V[idx_item]
            R_i = Train_R_I[i]
            C_i = np.diag(alpha * R_i)
            # A = VV + (a - b) * (V_i.T.dot(V_i))
            # B = (a * V_i * (np.tile(R_i, (dimension, 1)).T)).sum(0)
            A = VV + V_i.T.dot(C_i).dot(V_i)
            B = V_i.T.dot(C_i + np.eye(len(idx_item))).dot(R_i)
            U[i] = np.linalg.solve(A, B)

            sub_loss[i] = -0.5 * lambda_u * np.dot(U[i], U[i])

        loss = loss + np.sum(sub_loss)

        # Update V
        sub_loss = np.zeros(num_item)
        # UU = b * (U.T.dot(U))
        UU = (U.T.dot(U))

        for j in xrange(num_item):
            idx_user = train_item[0][j]
            U_j = U[idx_user]
            R_j = Train_R_J[j]
            C_j = np.diag(alpha * R_j)
            if len(U_j) > 0:
                # tmp_A = UU + (a - b) * (U_j.T.dot(U_j))
                tmp_A = UU + (U_j.T.dot(C_j).dot(U_j))
                A = tmp_A + lambda_v * item_weight[j] * np.eye(dimension)
                # B = (a * U_j * (np.tile(R_j, (dimension, 1)).T)
                #      ).sum(0) + lambda_v * item_weight[j] * delta[j]
                B = U_j.T.dot(C_j + np.eye(len(idx_user))).dot(R_j) + lambda_v * item_weight[j] * delta[j]
                V[j] = np.linalg.solve(A, B)

                sub_loss[j] = -0.5 * np.square(R_j * C_j).sum()
                sub_loss[j] = sub_loss[j] + np.sum(C_j * ((U_j.dot(V[j])) * R_j))
                sub_loss[j] = sub_loss[j] - 0.5 * np.dot(V[j].dot(tmp_A), V[j])
            else:
                # in case the item has no ratings
                V[j] = delta[j]
        loss = loss + np.sum(sub_loss)

        # Update theta
        seed = np.random.randint(100000)
        history = cnn_module.train(CNN_X, V[:, att_output_dim:], item_weight=item_weight,
                                   seed=seed, callbacks_list=callbacks_list)
        theta = cnn_module.get_projection_layer(CNN_X)
        cnn_loss = history.history['loss'][-1]

        # update gamma
        history = att_module.train(attributes_X, V[:, :att_output_dim], item_weight, seed, callbacks_list)
        gamma = att_module.get_projection_layer(attributes_X)
        att_loss = history.history['loss'][-1]
        # update delta
        delta = np.concatenate((gamma, theta), axis=1)
        loss = loss - 0.5 * lambda_v * (cnn_loss + att_loss) * num_item

        toc = time.time()
        elapsed = toc - tic

        '''calculate RMSE'''
        tr_eval = eval_RMSE(Train_R_I, U, V, train_user[0])
        if not no_validation:
            val_eval = eval_RMSE(Valid_R, U, V, valid_user[0])
        else:
            val_eval = -1
        te_eval = eval_RMSE(Test_R, U, V, test_user[0])

        ''' write tf.summary'''
        logger_tb.log_scalar('train_rmse', tr_eval, iteration)
        if not no_validation:
            logger_tb.log_scalar('eval_rmse', val_eval, iteration)
        logger_tb.log_scalar('test_rmse', te_eval, iteration)
        logger_tb.writer.flush()

        '''Calculate converge and stor best values of U,V,theta'''
        converge = abs((loss - PREV_LOSS) / PREV_LOSS)

        # if (val_eval < pre_val_eval):
        if (loss > PREV_LOSS):
            # count = 0
            print ("likelihood is increasing!")
            cnn_module.save_model(res_dir + '/CNN_weights.hdf5')
            cnn_module.save_model(res_dir + '/Att_weights.hdf5')
            np.savetxt(res_dir + '/final-U.dat', U)
            np.savetxt(res_dir + '/final-V.dat', V)
            np.savetxt(res_dir + '/theta.dat', theta)
            np.savetxt(res_dir + '/gamma.dat', gamma)

            best_train_rmse = tr_eval
            best_test_rmse = te_eval
            best_val_rmse = val_eval

        else:
            count = count + 1

        pre_val_eval = val_eval

        print "Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f" % (
            loss, elapsed, converge, tr_eval, val_eval, te_eval)
        f1.write("Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f\n" % (
            loss, elapsed, converge, tr_eval, val_eval, te_eval))
        if (count >= endure_count and iteration > min_iter):
            # if (count == endure_count):
            break
        elif (iteration < min_iter):
            count = 0

        PREV_LOSS = loss
        iteration += 1
    f1.close()
    return best_train_rmse, best_test_rmse, best_val_rmse