Beispiel #1
0
def encoder_run(spa):
    train_data = base_path + '/Dataset/ws/train_n/sparseness%d/training%d.txt' % (
        spa, case)
    test_data = base_path + '/Dataset/ws/test_n/sparseness%d/test%d.txt' % (
        spa, case)
    W_path = base_path + '/Dataset/ws/BP_CF_W_spa%d_t%d.txt' % (spa, case)
    loc_path = base_path + '/Dataset/ws'
    values_path = base_path + '/Dataset/ae_values_space/spa%d' % (spa)

    # train_data = test_data;
    print('开始实验,稀疏度=%d,case=%d' % (spa, case))
    #     print ('加载训练数据开始');
    #     now = time.time();
    #     trdata = np.loadtxt(train_data, dtype=float);
    #     n = np.alen(trdata);
    #     print ('加载训练数据完成,耗时 %.2f秒,数据总条数%d  \n'%((time.time() - now),n));
    #
    #     print ('转换数据到矩阵开始');
    #     tnow = time.time();
    #     u = trdata[:,0];
    #     s = trdata[:,1];
    #     u = np.array(u,int);
    #     s = np.array(s,int);
    #     R = np.full(us_shape, NoneValue, float);
    #     R[u,s]=trdata[:,2];
    #     del trdata,u,s;
    #     print ('转换数据到矩阵结束,耗时 %.2f秒  \n'%((time.time() - tnow)));

    R = np.loadtxt(origin_data, dtype=float)
    print('预处理数据开始')
    tnow = time.time()
    Preprocess.removeNoneValue(R)
    Preprocess.preprocess(R)
    R = preprocess(R)
    print('预处理数据结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    if isUserAutoEncoder:
        x_list = np.arange(us_shape[1])
        sum_list = np.sum(R, axis=0)
    else:
        x_list = np.arange(us_shape[0])
        sum_list = np.sum(R, axis=1)

    print(np.median(sum_list), np.mean(sum_list), np.std(sum_list))

    zeros = np.array(np.where(sum_list == 0)[0])
    one = np.array(np.where(sum_list == 1)[0])
    np.savetxt(values_path + '/zero_ind.txt', zeros, '%d')
    np.savetxt(values_path + '/one_ind.txt', one, '%d')
    print(len(zeros))
    print(len(one))
    print(len(np.where(sum_list == 2)[0]))
    print(len(np.where(sum_list == 3)[0]))
    print(len(np.where(sum_list == 4)[0]))
    setFigure(x_list, sum_list, spa)
Beispiel #2
0
def encoder_run(spa):
    train_data = base_path + '/Dataset/ws/train_n/sparseness%d/training%d.txt' % (
        spa, case)
    test_data = base_path + '/Dataset/ws/test_n/sparseness%d/test%d.txt' % (
        spa, case)
    W_path = base_path + '/Dataset/ws/BP_CF_W_spa%d_t%d.txt' % (spa, case)
    loc_path = base_path + '/Dataset/ws'
    values_path = base_path + '/Dataset/ae_values_space/spa%d' % (spa)

    print('开始实验,稀疏度=%d,case=%d' % (spa, case))
    print('加载训练数据开始')
    now = time.time()
    trdata = np.loadtxt(train_data, dtype=float)
    n = np.alen(trdata)
    print('加载训练数据完成,耗时 %.2f秒,数据总条数%d  \n' % ((time.time() - now), n))

    print('转换数据到矩阵开始')
    tnow = time.time()
    u = trdata[:, 0]
    s = trdata[:, 1]
    u = np.array(u, int)
    s = np.array(s, int)
    R = np.full(us_shape, NoneValue, float)
    R[u, s] = trdata[:, 2]
    del trdata, u, s
    print('转换数据到矩阵结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    print('预处理数据开始')
    tnow = time.time()
    Preprocess.removeNoneValue(R)
    Preprocess.preprocess(R)
    print('预处理数据结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    r_list = np.reshape(R, (-1, ))
    r_list = r_list[np.where(r_list > 0)]
    mean = np.mean(r_list)
    std = np.std(r_list)
    print(mean, std)
    # R = (R-mean)/std;
    delta = mean / std
    step_range = 400
    step = 20.0 / step_range
    boxes = np.zeros((step_range, ), float)
    for u in range(us_shape[0]):
        for s in range(us_shape[1]):
            rt = R[u, s]
            if rt == 0.0: continue
            bid = int(rt / step)
            boxes[bid] += 1

    x_list = np.arange(20, step=step)

    setFigure(x_list, boxes, spa)
Beispiel #3
0
def encoder_run(spa):
    train_data = base_path + '/Dataset/ws/train_n/sparseness%d/training%d.txt' % (
        spa, case)
    test_data = base_path + '/Dataset/ws/test_n/sparseness%d/test%d.txt' % (
        spa, case)
    W_path = base_path + '/Dataset/ws/BP_CF_W_spa%d_t%d.txt' % (spa, case)
    loc_path = base_path + '/Dataset/ws'
    values_path = base_path + '/Dataset/dae_values/spa%d' % (spa)

    mf_values_path = base_path + '/Dataset/mf_baseline_values/spa%d' % (spa)

    print('开始实验,稀疏度=%d,case=%d' % (spa, case))
    print('加载训练数据开始')
    now = time.time()
    trdata = np.loadtxt(train_data, dtype=float)
    n = np.alen(trdata)
    print('加载训练数据完成,耗时 %.2f秒,数据总条数%d  \n' % ((time.time() - now), n))

    print('转换数据到矩阵开始')
    tnow = time.time()
    u = trdata[:, 0]
    s = trdata[:, 1]
    u = np.array(u, int)
    s = np.array(s, int)
    R = np.full(us_shape, NoneValue, float)
    R[u, s] = trdata[:, 2]
    del trdata, u, s
    print('转换数据到矩阵结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    print('预处理数据开始')
    tnow = time.time()
    Preprocess.removeNoneValue(R)
    oriR = R.copy()
    ############################
    # 矩阵分解填补预处理
    mean = np.sum(R) / np.count_nonzero(R)
    mf = MF_bl(R.shape, f, mean)
    mf.preloadValues(mf_values_path)

    ############################
    Preprocess.preprocessMF_rat(R, mf, isUAE=False, rat=cmp_rat)
    print(np.sum(R - oriR))
    R /= 20.0
    oriR /= 20.0
    print('预处理数据结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    print('加载地理位置信息开始')
    tnow = time.time()
    if isICF:
        loc_path += '/ws_info.txt'
    else:
        loc_path += '/user_info.txt'
    global loc_tab
    loc_tab = loadLocation(loc_path)
    print('加载地理位置信息完成,耗时 %.2f秒,数据总条数%d  \n' %
          ((time.time() - tnow), len(loc_tab)))

    print('训练模型开始')
    tnow = time.time()
    tx = us_shape[0]
    if isUserAutoEncoder:
        tx = us_shape[1]
    encoder = BPAE.DenoiseAutoEncoder(tx, hidden_node, actfunc1, deactfunc1,
                                      actfunc1, deactfunc1, check_none)
    if not isUserAutoEncoder:
        R = R.T
    if loadvalues and encoder.exisValues(values_path):
        encoder.preloadValues(values_path)
    if continue_train:
        encoder.train(R, oriR, learn_param, repeat, None)
        encoder.saveValues(values_path)

    # R = oriR;
    PR = encoder.calFill(R)
    print(R)
    print()
    print(PR)
    print()
    ############# PR 还原处理   ###############
    PR = PR * 20.0
    R = R * 20
    oriR = oriR * 20
    PR = np.where(R != NoneValue, R, PR)
    print(PR)
    if not isUserAutoEncoder:
        PR = PR.T
        R = R.T


############# PR 还原处理   ###############
    print('训练模型结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    global W, S
    print('计算相似度矩阵开始')
    tnow = time.time()
    oR = R
    R = PR
    if isICF:
        R = R.T
    if readWcache and os.path.exists(W_path):
        W = np.loadtxt(W_path, np.float64)
    else:
        for i in range(axis0 - 1):
            if i % 50 == 0:
                print('----->step%d' % (i))
            for j in range(i + 1, axis0):
                ws = 0.0
                ws += np.sum((R[i, :] - R[j, :])**2)
                W[i, j] = W[j, i] = 1.0 / math.exp(np.sqrt(ws / axis1))

                # origin W[i,j]=W[j,i]=1.0/(ws ** (1.0/p)+1.0);
                # W[i,j]=W[j,i]=1.0/( ((ws/cot) ** (1.0/p))+1.0);

                # W[i,j]=W[j,i]= 1.0/math.exp(((ws) ** (1.0/p))/cot);
        np.savetxt(W_path, W, '%.30f')
    print('计算相似度矩阵结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    print('生成相似列表开始')
    tnow = time.time()
    S = np.argsort(-W)[:, 0:k]
    print('生成相似列表开始结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    print('加载测试数据开始')
    tnow = time.time()
    trdata = np.loadtxt(test_data, dtype=float)
    n = np.alen(trdata)
    print('加载测试数据完成,耗时 %.2f秒,数据总条数%d  \n' % ((time.time() - tnow), n))

    print('评测开始')
    tnow = time.time()
    mae = 0.0
    rmse = 0.0
    cot = 0
    #     print('oR',oR);
    #     print('R',R);
    for tc in trdata:
        if tc[2] <= 0:
            continue
        rt = predict(int(tc[0]), int(tc[1]), R, W, S)
        mae += abs(rt - tc[2])
        rmse += (rt - tc[2])**2
        cot += 1
    mae = mae * 1.0 / cot
    rmse = np.sqrt(rmse / cot)
    print('评测完成,耗时 %.2f秒\n' % ((time.time() - tnow)))

    print('实验结束,总耗时 %.2f秒,稀疏度=%d,MAE=%.6f,RMSE=%.6f\n' %
          ((time.time() - now), spa, mae, rmse))

    print(W)
Beispiel #4
0
def encoder_run(spa):
    train_data = base_path + '/Dataset/ws/train_n/sparseness%d/training%d.txt' % (
        spa, case)
    test_data = base_path + '/Dataset/ws/test_n/sparseness%d/test%d.txt' % (
        spa, case)
    W_path = base_path + '/Dataset/ws/BP_CF_W_spa%d_t%d.txt' % (spa, case)
    loc_path = base_path + '/Dataset/ws'
    values_path = base_path + '/Dataset/ae_values_space/spa%d' % (spa)
    mf_values_path = base_path + '/Dataset/mf_baseline_values/spa%d' % (spa)

    print('开始实验,稀疏度=%d,case=%d' % (spa, case))
    print('加载训练数据开始')
    now = time.time()
    trdata = np.loadtxt(train_data, dtype=float)
    n = np.alen(trdata)
    print('加载训练数据完成,耗时 %.2f秒,数据总条数%d  \n' % ((time.time() - now), n))

    print('转换数据到矩阵开始')
    tnow = time.time()
    u = trdata[:, 0]
    s = trdata[:, 1]
    u = np.array(u, int)
    s = np.array(s, int)
    R = np.full(us_shape, NoneValue, float)
    R[u, s] = trdata[:, 2]
    del trdata, u, s
    print('转换数据到矩阵结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    print('预处理数据开始')
    tnow = time.time()
    Preprocess.removeNoneValue(R)
    oriR = R.copy()
    ############################
    # 矩阵分解填补预处理
    mean = np.sum(R) / np.count_nonzero(R)
    mf = MF_bl(R.shape, f, mean)
    mf.preloadValues(mf_values_path)

    ############################
    #     Preprocess.preprocessMF_random_replace(R,mf,rat=cmp_rat);
    Preprocess.preprocess(R)
    print(np.sum(R - oriR))
    R /= 20.0
    oriR /= 20.0
    print('预处理数据结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    print('加载地理位置信息开始')
    tnow = time.time()
    if isICF:
        loc_path += '/ws_info.txt'
    else:
        loc_path += '/user_info.txt'
    global loc_tab
    loc_tab = loadLocation(loc_path)
    print('加载地理位置信息完成,耗时 %.2f秒,数据总条数%d  \n' %
          ((time.time() - tnow), len(loc_tab)))

    print('训练模型开始')
    tnow = time.time()
    tx = us_shape[0]
    if isUserAutoEncoder:
        tx = us_shape[1]
    encoder = BPAE.BPAutoEncoder(tx, hidden_node, actfunc1, deactfunc1,
                                 actfunc1, deactfunc1, check_none)
    if not isUserAutoEncoder:
        R = R.T
        oriR = oriR.T
    if loadvalues and encoder.exisValues(values_path):
        encoder.preloadValues(values_path)
    if continue_train:
        encoder.train(R, learn_param, repeat, None)
        encoder.saveValues(values_path)
    # R = oriR;
    PR = encoder.calFill(R)
    # R = oriR;
    print(R)
    print()
    print(PR)
    print()
    ############# PR 还原处理   ###############
    PR = PR * 20.0
    R = R * 20
    PR = np.where(R != NoneValue, R, PR)
    print(PR)
    if not isUserAutoEncoder:
        PR = PR.T
        R = R.T


############# PR 还原处理   ###############
    print('训练模型开始结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    print('加载测试数据开始')
    tnow = time.time()
    trdata = np.loadtxt(test_data, dtype=float)
    n = np.alen(trdata)
    print('加载测试数据完成,耗时 %.2f秒,数据总条数%d  \n' % ((time.time() - tnow), n))

    print('评测开始')
    tnow = time.time()
    mae = 0.0
    rmse = 0.0
    cot = 0
    ana = np.zeros(us_shape)
    R_ana = np.zeros(us_shape)
    for tc in trdata:
        if tc[2] <= 0:
            continue
        u = int(tc[0])
        s = int(tc[1])
        rt = PR[u, s]
        t = abs(rt - tc[2])
        mae += t
        ana[u, s] = t
        R_ana[u, s] = tc[2]
        rmse += (rt - tc[2])**2
        cot += 1
    mae = mae * 1.0 / cot
    rmse = np.sqrt(rmse / cot)

    list_ana = ana.reshape((-1, ))
    ind = np.argsort(-list_ana)[:1000]
    ana_sorted = list_ana[ind]
    arg_list = [[int(i / us_shape[1]),
                 int(i % us_shape[1])] for i in ind]
    ori_list = [R_ana[i[0], i[1]] for i in arg_list]
    np.savetxt(values_path + '/test_ana_value.txt', np.array(ana_sorted),
               '%.6f')
    np.savetxt(values_path + '/test_ana_ind.txt', np.array(arg_list), '%d')
    np.savetxt(values_path + '/test_ana_ori_value.txt', np.array(ori_list),
               '%.6f')

    print('评测完成,耗时 %.2f秒\n' % ((time.time() - tnow)))

    print('实验结束,总耗时 %.2f秒,稀疏度=%d,MAE=%.6f,RMSE=%.6f\n' %
          ((time.time() - now), spa, mae, rmse))

    print(W)
    print(S)
Beispiel #5
0
def encoder_run(spa):
    train_data = base_path + '/Dataset/ws/train_n/sparseness%d/training%d.txt' % (
        spa, case)
    test_data = base_path + '/Dataset/ws/test_n/sparseness%d/test%d.txt' % (
        spa, case)
    W_path = base_path + '/Dataset/ws/BP_CF_W_spa%d_t%d.txt' % (spa, case)
    loc_path = base_path + '/Dataset/ws'
    values_path = base_path + '/Dataset/ae_values_space/spa%d' % (spa)

    # train_data = test_data;
    print('开始实验,稀疏度=%d,case=%d' % (spa, case))
    print('加载训练数据开始')
    now = time.time()
    trdata = np.loadtxt(train_data, dtype=float)
    n = np.alen(trdata)
    print('加载训练数据完成,耗时 %.2f秒,数据总条数%d  \n' % ((time.time() - now), n))

    print('转换数据到矩阵开始')
    tnow = time.time()
    u = trdata[:, 0]
    s = trdata[:, 1]
    u = np.array(u, int)
    s = np.array(s, int)
    R = np.full(us_shape, NoneValue, float)
    R[u, s] = trdata[:, 2]
    del trdata, u, s
    print('转换数据到矩阵结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    print('预处理数据开始')
    tnow = time.time()
    Preprocess.removeNoneValue(R)
    Preprocess.preprocess(R)
    R = preprocess(R)
    N = np.count_nonzero(R)
    print('预处理数据结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    if isUserAutoEncoder:
        x_list = np.arange(us_shape[1])
        sum_list = np.sum(R, axis=0)
    else:
        x_list = np.arange(us_shape[0])
        sum_list = np.sum(R, axis=1)

    Ps = (sum_list + 1) / (N + 2)
    Ps = np.log(Ps)
    qsum = np.sum(R * Ps, axis=1)
    # print(qsum);
    NX = np.count_nonzero(R, axis=1)
    print(NX)
    # Q = -1.0 / np.sqrt(NX) * qsum;
    Q = -1.0 * qsum
    print(Q)
    print(np.sort(Q))
    print(np.median(sum_list), np.mean(sum_list), np.std(sum_list))

    zeros = np.array(np.where(sum_list == 0)[0])
    one = np.array(np.where(sum_list == 1)[0])
    #     np.savetxt(values_path+'/zero_ind.txt',zeros,'%d');
    #     np.savetxt(values_path+'/one_ind.txt',one,'%d');
    print(len(zeros))
    print(len(one))
    print(len(np.where(sum_list == 2)[0]))
    print(len(np.where(sum_list == 3)[0]))
    print(len(np.where(sum_list == 4)[0]))
    setFigure(x_list, Ps, spa)
Beispiel #6
0
def encoder_run(spa):
    train_data = base_path + '/Dataset/ws/train_n/sparseness%d/training%d.txt' % (
        spa, case)
    test_data = base_path + '/Dataset/ws/test_n/sparseness%d/test%d.txt' % (
        spa, case)
    W_path = base_path + '/Dataset/ws/BP_CF_W_spa%d_t%d.txt' % (spa, case)
    SW_path = base_path + '/Dataset/ws/BP_CF_SW_spa%d_t%d.txt' % (spa, case)
    loc_path = base_path + '/Dataset/ws'
    values_path = base_path + '/Dataset/dae_values/spa%d' % (spa)

    mf_values_path = base_path + '/Dataset/mf_baseline_values/spa%d' % (spa)

    print('开始实验,稀疏度=%d,case=%d' % (spa, case))
    print('加载训练数据开始')
    now = time.time()
    trdata = np.loadtxt(train_data, dtype=float)
    n = np.alen(trdata)
    print('加载训练数据完成,耗时 %.2f秒,数据总条数%d  \n' % ((time.time() - now), n))

    print('转换数据到矩阵开始')
    tnow = time.time()
    u = trdata[:, 0]
    s = trdata[:, 1]
    u = np.array(u, int)
    s = np.array(s, int)
    R = np.full(us_shape, NoneValue, float)
    R[u, s] = trdata[:, 2]
    del trdata, u, s
    print('转换数据到矩阵结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    print('预处理数据开始')
    tnow = time.time()
    Preprocess.removeNoneValue(R)
    oriR = R.copy()
    ############################
    # 矩阵分解填补预处理
    mean = np.sum(R) / np.count_nonzero(R)
    mf = MF_bl(R.shape, f, mean)
    mf.preloadValues(mf_values_path)
    # 填补处理
    Preprocess.preprocessMF_rat(R, mf, isUAE=False, rat=cmp_rat)
    ############################

    print(np.sum(R - oriR))
    R /= 20.0
    # 归一化
    oriR /= 20.0
    print('预处理数据结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    print('加载地理位置信息开始')
    tnow = time.time()
    if isICF:
        loc_path += '/ws_info.txt'
    else:
        loc_path += '/user_info.txt'
    global loc_tab
    loc_tab = loadLocation(loc_path)
    print('加载地理位置信息完成,耗时 %.2f秒,数据总条数%d  \n' %
          ((time.time() - tnow), len(loc_tab)))

    print('训练模型开始')
    tnow = time.time()
    tx = us_shape[0]
    if isUserAutoEncoder:
        tx = us_shape[1]
    encoder = BPAE.DenoiseAutoEncoder(tx, hidden_node, actfunc1, deactfunc1,
                                      actfunc1, deactfunc1)
    if not isUserAutoEncoder:
        R = R.T
        oriR = oriR.T
    if loadvalues and encoder.exisValues(values_path):
        encoder.preloadValues(values_path)
    if continue_train:
        encoder.train(R, oriR, learn_param, repeat, None)
        encoder.saveValues(values_path)

    # R = oriR;
    PR = encoder.calFill(R)
    #     print(R);
    #     print();
    #     print(PR);
    #     print();
    ############# PR 还原处理   ###############
    PR = PR * 20.0
    R = R * 20
    oriR = oriR * 20
    PR = np.where(R != NoneValue, R, PR)
    if not isUserAutoEncoder:
        PR = PR.T
        R = R.T
        oriR = oriR.T


############# PR 还原处理   ###############
    print('训练模型结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    print('随机删除开始')
    tnow = time.time()
    Preprocess.random_empty(PR, cut_rate)
    print('随机删除开始,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    ###  oriR 原始US矩阵
    ###  R    经过MF处理的US矩阵
    ###  PR   经过随机删除的US 预测矩阵

    print('生成原矩阵分析开始')
    tnow = time.time()
    ## U-S 部分
    us_ana = get_oriR_ana(oriR)
    print('us - ana ')
    #     ## S-U 部分
    #     su_ana = get_oriR_ana(oriR.T);
    print('生成原矩阵结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    print('生成特征权重向量开始')
    tnow = time.time()
    feat_cout = np.count_nonzero(oriR, axis=0)
    med = np.median(feat_cout)
    feat_w_us = np.exp((med - feat_cout) / w_d)
    #     feat_w_us=np.exp(np.log2(med-feat_cout));
    feat_cout = np.count_nonzero(oriR, axis=1)
    med = np.median(feat_cout)
    feat_w_su = np.exp((med - feat_cout) / sw_d)
    print('生成特征权重向量结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    print('计算相似度矩阵开始')
    tnow = time.time()
    mf_R = R

    # U-CF
    R = PR
    bat_size, feat_size = R.shape
    W = np.zeros((bat_size, bat_size))
    show_step = int(bat_size / 100)
    if readWcache and os.path.exists(W_path) and False:
        del W
        W = np.loadtxt(W_path, np.float64)
    else:
        for i in range(bat_size - 1):
            if i % 30 == 0:
                print('----->u-cf step%d' % (i))
            a = R[i, :]
            for j in range(i + 1, bat_size):
                b = R[j, :]
                log_and = (a != 0) & (b != 0)
                ws = np.zeros_like(a)
                ana_chp = us_ana[i][j - i - 1]
                for indexk in range(3):
                    tmp = log_and & ana_chp[indexk]
                    ws+=np.subtract(a,b,out=np.zeros_like(a),where=tmp) \
                        * ana_chp[indexk+3]
                ws = ws * feat_w_us
                ws = np.sum(ws**2)
                W[i, j] = W[j, i] = 1.0 / math.exp(np.sqrt(ws / feat_size))
                # W[i,j]=W[j,i]= 1.0/(1+np.sqrt(ws/feat_size));
        np.savetxt(W_path, W, '%.12f')

    # S-CF
    R = PR.T
    bat_size, feat_size = R.shape
    SW = np.zeros((bat_size, bat_size))
    show_step = int(bat_size / 100)

    if readWcache and os.path.exists(SW_path):
        del SW
        SW = np.loadtxt(SW_path, np.float64)
    else:
        for i in range(bat_size - 1):
            if i % show_step == 0:
                print('----->s-cf step%d' % (i))
            a = R[i, :]
            for j in range(i + 1, bat_size):
                b = R[j, :]
                log_and = (a != 0) & (b != 0)
                ws = np.zeros_like(a)
                ana_chp = get_ana_item(R.shape, a, b)
                for indexk in range(3):
                    tmp = log_and & ana_chp[indexk]
                    ws+=np.subtract(a,b,out=np.zeros_like(a),where=tmp) \
                        * ana_chp[indexk+3]
                ws = ws * feat_w_su
                ws = np.sum(ws**2)
                SW[i, j] = SW[j, i] = 1.0 / math.exp(np.sqrt(ws / feat_size))
        np.savetxt(SW_path, SW, '%.12f')

    R = PR
    print('计算相似度矩阵结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    print('生成相似列表开始')
    tnow = time.time()
    S = np.argsort(-W)[:, 0:k]
    SS = np.argsort(-SW)[:, 0:k]
    print('生成相似列表开始结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    print('加载测试数据开始')
    tnow = time.time()
    trdata = np.loadtxt(test_data, dtype=float)
    n = np.alen(trdata)
    print('加载测试数据完成,耗时 %.2f秒,数据总条数%d  \n' % ((time.time() - tnow), n))

    print('评测开始')
    tnow = time.time()
    mae = 0.0
    rmse = 0.0
    cot = 0
    #     print('oR',oR);
    #     print('R',R);
    SR = R.T
    for tc in trdata:
        if tc[2] <= 0:
            continue
        urt = predict(int(tc[0]), int(tc[1]), R, W, S)
        srt = predict(int(tc[1]), int(tc[0]), SR, SW, SS)
        rt = cf_w * urt + (1 - cf_w) * srt
        mae += abs(rt - tc[2])
        rmse += (rt - tc[2])**2
        cot += 1
    mae = mae * 1.0 / cot
    rmse = np.sqrt(rmse / cot)
    print('评测完成,耗时 %.2f秒\n' % ((time.time() - tnow)))

    print('实验结束,总耗时 %.2f秒,稀疏度=%d,MAE=%.6f,RMSE=%.6f\n' %
          ((time.time() - now), spa, mae, rmse))
Beispiel #7
0
def encoder_run(spa):
    train_data = base_path + '/Dataset/ws/train_n/sparseness%d/training%d.txt' % (
        spa, case)
    test_data = base_path + '/Dataset/ws/test_n/sparseness%d/test%d.txt' % (
        spa, case)
    W_path = base_path + '/Dataset/ws/BP_CF_W_spa%d_t%d.txt' % (spa, case)
    loc_path = base_path + '/Dataset/ws'
    values_path = base_path + '/Dataset/ae_values_space/spa%d' % (spa)

    # train_data = test_data;
    print('开始实验,稀疏度=%d,case=%d' % (spa, case))
    #     print ('加载训练数据开始');
    #     now = time.time();
    #     trdata = np.loadtxt(train_data, dtype=float);
    #     n = np.alen(trdata);
    #     print ('加载训练数据完成,耗时 %.2f秒,数据总条数%d  \n'%((time.time() - now),n));
    #
    #     print ('转换数据到矩阵开始');
    #     tnow = time.time();
    #     u = trdata[:,0];
    #     s = trdata[:,1];
    #     u = np.array(u,int);
    #     s = np.array(s,int);
    #     R = np.full(us_shape, NoneValue, float);
    #     R[u,s]=trdata[:,2];
    #     del trdata,u,s;
    #     print ('转换数据到矩阵结束,耗时 %.2f秒  \n'%((time.time() - tnow)));

    R = np.loadtxt(origin_data, dtype=float)

    print('预处理数据开始')
    tnow = time.time()
    Preprocess.removeNoneValue(R)
    Preprocess.preprocess(R)
    R = preprocess(R)
    print('预处理数据结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    if isUserAutoEncoder:
        x_list = np.arange(us_shape[1])
        sum_list = np.sum(R, axis=0)
    else:
        x_list = np.arange(us_shape[0])
        sum_list = np.sum(R, axis=1)

    print(np.median(sum_list), np.mean(sum_list), np.std(sum_list))

    dataset = []
    if not isUserAutoEncoder:
        R = R.T
    for i in range(len(R)):
        idx = np.where(R[i] > 0)[0]
        dataset.append(idx.tolist())


#     print(dataset);

    L, sup = apriori(dataset, 0.02)
    print(L[::-1])
    print(sup)

    setFigure(x_list, sum_list, spa)
Beispiel #8
0
def encoder_run(spa):
    train_data = base_path+'/Dataset/ws/train_n/sparseness%d/training%d.txt'%(spa,case);
    test_data = base_path+'/Dataset/ws/test_n/sparseness%d/test%d.txt'%(spa,case);
    W_path = base_path+'/Dataset/ws/BP_CF_W_spa%d_t%d.txt'%(spa,case);
    loc_path = base_path+'/Dataset/ws';   
    values_path=base_path+'/Dataset/dae_values/spa%d'%(spa);
    
    mf_values_path=base_path+'/Dataset/mf_baseline_values/spa%d'%(spa);
    
    
    
    print('开始实验,稀疏度=%d,case=%d'%(spa,case));
    print ('加载训练数据开始');
    now = time.time();
    trdata = np.loadtxt(train_data, dtype=float);
    n = np.alen(trdata);
    print ('加载训练数据完成,耗时 %.2f秒,数据总条数%d  \n'%((time.time() - now),n));
    
    print ('转换数据到矩阵开始');
    tnow = time.time();
    u = trdata[:,0];
    s = trdata[:,1];
    u = np.array(u,int);
    s = np.array(s,int);
    R = np.full(us_shape, NoneValue, float);
    R[u,s]=trdata[:,2];
    del trdata,u,s;
    print ('转换数据到矩阵结束,耗时 %.2f秒  \n'%((time.time() - tnow)));
    
    print ('预处理数据开始');
    tnow = time.time();
    Preprocess.removeNoneValue(R);
    oriR = R.copy();
    ############################
    # 矩阵分解填补预处理
    mean = np.sum(R)/np.count_nonzero(R);
    mf = MF_bl(R.shape,f,mean);
    mf.preloadValues(mf_values_path);
    # 填补处理
    Preprocess.preprocessMF_rat(R,mf,isUAE=False,rat=cmp_rat);
    ############################
    
    print(np.sum(R-oriR));
    R/=20.0;# 归一化
    oriR/=20.0;
    print ('预处理数据结束,耗时 %.2f秒  \n'%((time.time() - tnow)));
        
    print ('加载地理位置信息开始');
    tnow = time.time();
    if isICF:
        loc_path+='/ws_info.txt';
    else:
        loc_path+='/user_info.txt';
    global loc_tab;        
    loc_tab = loadLocation(loc_path);
    print ('加载地理位置信息完成,耗时 %.2f秒,数据总条数%d  \n'%((time.time() - tnow),len(loc_tab)));    
    
    print ('训练模型开始');
    tnow = time.time();
    tx = us_shape[0];
    if isUserAutoEncoder:
        tx = us_shape[1];
    encoder = BPAE.DenoiseAutoEncoder(tx,hidden_node,
                            actfunc1,deactfunc1,
                             actfunc1,deactfunc1);
    if not isUserAutoEncoder:
        R = R.T;
        oriR =oriR.T;
    if loadvalues and encoder.exisValues(values_path):
        encoder.preloadValues(values_path);
    if continue_train:
        encoder.train(R, oriR,learn_param, repeat,None);
        encoder.saveValues(values_path);
    
    # R = oriR;
    PR = encoder.calFill(R);
#     print(R);
#     print();
#     print(PR);
#     print();
############# PR 还原处理   ###############
    PR = PR * 20.0;
    R = R * 20;
    oriR=oriR*20;
    PR = np.where(R!=NoneValue,R,PR);
    if not isUserAutoEncoder:
        PR = PR.T;
        R = R.T;
        oriR =oriR.T;    
############# PR 还原处理   ###############        
    print ('训练模型结束,耗时 %.2f秒  \n'%((time.time() - tnow)));


    print ('随机删除开始');
    tnow = time.time();
    Preprocess.random_empty(PR, cut_rate);
    print ('随机删除开始,耗时 %.2f秒  \n'%((time.time() - tnow)));



    ###  oriR 原始US矩阵
    ###  R    经过MF处理的US矩阵
    ###  PR   经过随机删除的US 预测矩阵

    print ('生成原矩阵分析开始');
    tnow = time.time();
    b_s,f_s = us_shape;
    us_ana = [[] for _ in range(b_s)];
    for i in range(b_s-1):
        a = oriR[i,:];
        a_not_none = a!=NoneValue;
        a_is_none = a==NoneValue;
        for j in range(i+1,b_s):
            b = oriR[j,:];
            all_have = (b!=NoneValue) & a_not_none;
            none_have =(b==NoneValue) & a_is_none;
            any_have = np.logical_not(all_have | none_have);
            
#             all_p = np.exp(-1.0*np.count_nonzero(all_have)/f_s);
#             non_p = np.exp(-1.0*np.count_nonzero(none_have)/f_s);
#             any_p = np.exp(-1.0*np.count_nonzero(any_have)/f_s);
            
                        
            all_p = 1/(np.count_nonzero(all_have)/f_s);
            non_p = 1/(np.count_nonzero(none_have)/f_s);
            any_p = 1/(np.count_nonzero(any_have)/f_s); 
            
                        
            us_ana[i].append([all_have,none_have,any_have,all_p,non_p,any_p]);
            # us_ana[i].append([all_have,none_have,any_have,150.0,30.0,0.001]);
            # print(len(us_ana[i])); 
    print ('生成原矩阵结束,耗时 %.2f秒  \n'%((time.time() - tnow)));



    print ('生成特征权重向量开始');
    tnow = time.time();
    feat_cout=np.count_nonzero(oriR,axis=0);
    med = np.median(feat_cout);
    feat_w=np.exp((med-feat_cout)/w_d);
    print ('生成特征权重向量结束,耗时 %.2f秒  \n'%((time.time() - tnow)));




    print ('计算相似度矩阵开始');
    tnow = time.time();
    mf_R = R;
    R=PR;
    
    # U-CF
    bat_size,feat_size = R.shape;
    W = np.zeros((bat_size,bat_size));
    show_step = int(bat_size/100);
    
    if readWcache and os.path.exists(W_path): 
        del W;  
        W = np.loadtxt(W_path, np.float64);
    else:
        for i in range(bat_size-1):
            if i%show_step ==0:
                print('----->step%d'%(i));
            a = R[i,:];
            for j in range(i+1,bat_size):
                b = R[j,:];
                
                log_and = (a!=0) & (b!=0);
                
                # print([i,j]);
                ####################################
                ws = np.zeros_like(a);
                ana_chp= us_ana[i][j-i-1];
                for indexk in range(3):
                    tmp = log_and & ana_chp[indexk];
                    ws+=np.subtract(a,b,out=np.zeros_like(a),where=tmp) \
                        * ana_chp[indexk+3];
                ws=ws*feat_w;
                ws=np.sum(ws**2);
                #####################################
#                 ws=0.0;
#                 ana_chp= us_ana[i][j-i-1];
#                 deta = np.subtract(a,b,out=np.zeros_like(a),
#                                    where=log_and)                
#                 for indexk in range(3):
#                     tmp = log_and & ana_chp[indexk];
#                     ws+=np.multiply(deta,ana_chp[indexk+3],out=np.zeros_like(a),where=tmp);
#                 ws=np.sum(ws**2);                    
                ####################################

#                 deta = np.subtract(a,b,out=np.zeros_like(a),
#                                    where=log_and)
#                 ws = np.sum(deta**2);
                                
                ###################################


                W[i,j]=W[j,i]= 1.0/math.exp(np.sqrt(ws/feat_size));
                # W[i,j]=W[j,i]= 1.0/(1+np.sqrt(ws/feat_size));
        np.savetxt(W_path,W,'%.30f');                
    print ('计算相似度矩阵结束,耗时 %.2f秒  \n'%((time.time() - tnow)));


    print ('生成相似列表开始');
    tnow = time.time();
    S = np.argsort(-W)[:,0:k];            
    print ('生成相似列表开始结束,耗时 %.2f秒  \n'%((time.time() - tnow)));


    print ('加载测试数据开始');
    tnow = time.time();
    trdata = np.loadtxt(test_data, dtype=float);
    n = np.alen(trdata);
    print ('加载测试数据完成,耗时 %.2f秒,数据总条数%d  \n'%((time.time() - tnow),n));

    print ('评测开始');
    tnow = time.time();
    mae=0.0;rmse=0.0;cot=0;
#     print('oR',oR);
#     print('R',R);
    for tc in trdata:
        if tc[2]<=0:
            continue;
        rt = predict(int(tc[0]),int(tc[1]),R,W,S);
        mae+=abs(rt-tc[2]);
        rmse+=(rt-tc[2])**2;
        cot+=1;
    mae = mae * 1.0 / cot;
    rmse= np.sqrt(rmse/cot);
    print ('评测完成,耗时 %.2f秒\n'%((time.time() - tnow)));    

    print('实验结束,总耗时 %.2f秒,稀疏度=%d,MAE=%.6f,RMSE=%.6f\n'%((time.time()-now),spa,mae,rmse));
Beispiel #9
0
def encoder_run(spa):
    
    global last_w_path,tmp_W,tmp_SW;
    train_data = base_path+'/Dataset/ws/train_n/sparseness%.1f/training%d.txt'%(spa,case);
    test_data = base_path+'/Dataset/ws/test_n/sparseness%.1f/test%d.txt'%(spa,case);
    W_path = base_path+'/Dataset/ws/BP_CF_W_spa%.1f_t%d.txt'%(spa,case);
    SW_path = base_path+'/Dataset/ws/BP_CF_SW_spa%.1f_t%d.txt'%(spa,case);
    loc_path = base_path+'/Dataset/ws';   
    values_path=base_path+'/Dataset/dae_values/spa%.1f_case%d'%(spa,case);
    
    mf_values_path=base_path+'/Dataset/mf_baseline_values/spa%.1f_case%d'%(spa,case);
    
    
    
    print('开始实验,稀疏度=%d,case=%d'%(spa,case));
    print ('加载训练数据开始');
    now = time.time();
    trdata = np.loadtxt(train_data, dtype=float);
    n = np.alen(trdata);
    print ('加载训练数据完成,耗时 %.2f秒,数据总条数%d  \n'%((time.time() - now),n));
    
    print ('转换数据到矩阵开始');
    tnow = time.time();
    u = trdata[:,0];
    s = trdata[:,1];
    u = np.array(u,int);
    s = np.array(s,int);
    R = np.full(us_shape, NoneValue, float);
    R[u,s]=trdata[:,2];
    del trdata,u,s;
    print ('转换数据到矩阵结束,耗时 %.2f秒  \n'%((time.time() - tnow)));
    
    print ('预处理数据开始');
    tnow = time.time();
    Preprocess.removeNoneValue(R);
    oriR = R.copy();
    ############################
    # 矩阵分解填补预处理
    mean = np.sum(R)/np.count_nonzero(R);
    mf = MF_bl(R.shape,f,mean);
    print(mf_values_path)
    mf.preloadValues(mf_values_path);
    # 填补处理
    cmp_rat = out_cmp_rat(spa);
    print(cmp_rat);
    Preprocess.preprocessMF_rat(R,mf,isUAE=False,rat=cmp_rat);
    ############################
    
    print(np.sum(R-oriR));
    R/=20.0;# 归一化
    oriR/=20.0;
    print ('预处理数据结束,耗时 %.2f秒  \n'%((time.time() - tnow)));
        
    print ('加载地理位置信息开始');
    tnow = time.time();
    if isICF:
        loc_path+='/ws_info.txt';
    else:
        loc_path+='/user_info.txt';
    global loc_tab;        
    loc_tab = loadLocation(loc_path);
    print ('加载地理位置信息完成,耗时 %.2f秒,数据总条数%d  \n'%((time.time() - tnow),len(loc_tab)));    
    
    print ('训练模型开始');
    tnow = time.time();
    tx = us_shape[0];
    if isUserAutoEncoder:
        tx = us_shape[1];
    encoder = BPAE.DenoiseAutoEncoder(tx,hidden_node,
                            actfunc1,deactfunc1,
                             actfunc1,deactfunc1);
    if not isUserAutoEncoder:
        R = R.T;
        oriR =oriR.T;
    if loadvalues and encoder.exisValues(values_path):
        encoder.preloadValues(values_path);
    if continue_train:
        encoder.train(R, oriR,learn_param, repeat,None);
        encoder.saveValues(values_path);
    
    # R = oriR;
    PR = encoder.calFill(R);
#     print(R);
#     print();
#     print(PR);
#     print();
############# PR 还原处理   ###############
    PR = PR * 20.0;
    R = R * 20;
    oriR=oriR*20;
    PR = np.where(R!=NoneValue,R,PR);
    if not isUserAutoEncoder:
        PR = PR.T;
        R = R.T;
        oriR =oriR.T;    
############# PR 还原处理   ###############        
    print ('训练模型结束,耗时 %.2f秒  \n'%((time.time() - tnow)));


    print ('随机删除开始');
    tnow = time.time();
    Preprocess.random_empty(PR, cut_rate);
    print ('随机删除开始,耗时 %.2f秒  \n'%((time.time() - tnow)));



    ###  oriR 原始US矩阵
    ###  R    经过MF处理的US矩阵
    ###  PR   经过随机删除的US 预测矩阵

    print ('生成原矩阵分析开始');
    tnow = time.time(); 
    ## U-S 部分
    us_ana = get_oriR_ana(oriR);
    print('us - ana ')        
#     ## S-U 部分
#     su_ana = get_oriR_ana(oriR.T);
    print ('生成原矩阵结束,耗时 %.2f秒  \n'%((time.time() - tnow)));

    print ('生成特征权重向量开始');
    tnow = time.time();
    feat_cout=np.count_nonzero(oriR,axis=0);
    med = np.median(feat_cout);
    feat_w_us=np.exp((med-feat_cout)/w_d);
#     feat_w_us=np.exp(np.log2(med-feat_cout));
    feat_cout=np.count_nonzero(oriR,axis=1);
    med = np.median(feat_cout);
    feat_w_su=np.exp((med-feat_cout)/sw_d);        
    print ('生成特征权重向量结束,耗时 %.2f秒  \n'%((time.time() - tnow)));

    print ('计算相似度矩阵开始');
    tnow = time.time();
    mf_R = R;
    
    if readWcache and (last_w_path != W_path):
        last_w_path = W_path;
        tmp_W = np.loadtxt(W_path, np.float64);
        tmp_SW = np.loadtxt(SW_path, np.float64);
          
    # U-CF
    R=PR;
    bat_size,feat_size = R.shape;
    W = np.zeros((bat_size,bat_size));
    show_step = int(bat_size/100);
    if readWcache and os.path.exists(W_path) :  
        W = tmp_W;
    else:
        for i in range(bat_size-1):
            if i%60 ==0:
                print('----->u-cf step%d'%(i));
            a = R[i,:];
            for j in range(i+1,bat_size):
                b = R[j,:];                
                log_and = (a!=0) & (b!=0);
                ws = np.zeros_like(a);
                ana_chp= us_ana[i][j-i-1];
                for indexk in range(3):
                    tmp = log_and & ana_chp[indexk];
                    ws+=np.subtract(a,b,out=np.zeros_like(a),where=tmp) \
                        * ana_chp[indexk+3];
                ws=ws*feat_w_us;
                ws=np.sum(ws**2);
                W[i,j]=W[j,i]= 1.0/math.exp(np.sqrt(ws/feat_size));
                # W[i,j]=W[j,i]= 1.0/(1+np.sqrt(ws/feat_size));
        np.savetxt(W_path,W,'%.12f');
        
    # S-CF
    R=PR.T;
    bat_size,feat_size = R.shape;
    SW = np.zeros((bat_size,bat_size));
    show_step = 500;
    
    if readWcache and os.path.exists(SW_path):  
        SW = tmp_SW;
    else:
        for i in range(bat_size-1):
            if i%show_step ==0:
                print('----->s-cf step%d'%(i));
            a = R[i,:];
            oria = oriR[:,i];
            for j in range(i+1,bat_size):
                b = R[j,:];
                orib = oriR[:,j];                
                log_and = (a!=0) & (b!=0);
                ws = np.zeros_like(a);
                ana_chp= get_ana_item(R.shape,oria,orib);
                for indexk in range(3):
                    tmp = log_and & ana_chp[indexk];
                    ws+=np.subtract(a,b,out=np.zeros_like(a),where=tmp) \
                        * ana_chp[indexk+3];
                ws=ws*feat_w_su;
                ws=np.sum(ws**2);
                SW[i,j]=SW[j,i]= 1.0/math.exp(np.power(ws/feat_size,1.0/3));
        np.savetxt(SW_path,SW,'%.12f');        
        
    R = PR;                    
    print ('计算相似度矩阵结束,耗时 %.2f秒  \n'%((time.time() - tnow)));


    print ('生成相似列表开始');
    tnow = time.time();
#     k  = get_cf_k(spa);
#     sk = get_cf_sk(spa);
    S = np.argsort(-W)[:,0:k];
    SS = np.argsort(-SW)[:,0:sk];
    print ('生成相似列表开始结束,耗时 %.2f秒  \n'%((time.time() - tnow)));


    print ('加载测试数据开始');
    tnow = time.time();
    trdata = np.loadtxt(test_data, dtype=float);
    n = np.alen(trdata);
    print ('加载测试数据完成,耗时 %.2f秒,数据总条数%d  \n'%((time.time() - tnow),n));

    print ('评测开始');
    tnow = time.time();
    mae=0.0;rmse=0.0;cot=0;
#     print('oR',oR);
#     print('R',R);
    SR = R.T;
    for tc in trdata:
        if tc[2]<=0:
            continue;
        urt = predict(int(tc[0]),int(tc[1]),R,W,S);
        srt = predict(int(tc[1]),int(tc[0]),SR,SW,SS);
        rt = cf_w * urt + (1-cf_w) * srt;
        mae+=abs(rt-tc[2]);
        rmse+=(rt-tc[2])**2;
        cot+=1;
    mae = mae * 1.0 / cot;
    rmse= np.sqrt(rmse/cot);
    print ('评测完成,耗时 %.2f秒\n'%((time.time() - tnow)));    

    print('实验结束,总耗时 %.2f秒,稀疏度=%d,MAE=%.6f,RMSE=%.6f\n'%((time.time()-now),spa,mae,rmse));
    return mae,rmse;