Beispiel #1
0
def plot_contourf_overlap(result=None, title=None, color=None):
    result = normalize_matrix_full(csr_matrix1=csr_matrix(result))

    # 准备画图数据
    result = csr_matrix(result)
    x = np.linspace(0, 1, result.shape[0])
    y = np.linspace(0, 1, result.shape[0])
    [X, Y] = np.meshgrid(x, y)
    Z = result.A
    # 为等高线图填充颜色, 16指定将等高线分为几部分
    colorslist = ['white']
    colorslist.append(color)
    # 将颜色条命名为mylist,一共插值颜色条50个
    cmap = col.LinearSegmentedColormap.from_list('mylist',
                                                 colorslist,
                                                 N=len(colorslist) * 1)
    temp = plt.contourf(X, Y, Z, 1, alpha=1.0, cmap=cmap)  # 使用颜色映射来区分不同高度的区域
    plt.contour(X,
                Y,
                Z, [temp._A[-2]],
                linewidths=1.0,
                alpha=1.0,
                colors='black')  # 使用颜色映射来区分不同高度的区域
    # ax = plt.axes()
    # ax.set_title(title, fontsize=18, position=(0.5, 1.05))
    foo_fig = plt.gcf()  # 'get current figure'
    foo_fig.savefig('D:\第二个工作-实验数据\overlap//' + title + '.eps', format='eps')
    plt.show()
Beispiel #2
0
def plot_contourf(result=None, title=None, binNum=10):
    result = normalize_matrix_full(csr_matrix1=csr_matrix(result))

    # 设置颜色映射cmap
    cmap = sns.diverging_palette(50, 20, sep=16, as_cmap=True, n=1)
    cmap = sns.light_palette((260, 75, 60),
                             input="husl")  # input: {'rgb','hls','husl',xkcd'}
    cmap = sns.dark_palette((260, 75, 60), input="husl", reverse=True)
    # startcolor = '#ffffff'   #红色,读者可以自行修改 #ff0000
    # midcolor = '#0000ff'     #绿色,读者可以自行修改  #00ff00
    # endcolor = '#ff0000'          #蓝色,读者可以自行修改  #0000ff
    # cmap = col.LinearSegmentedColormap.from_list('own2',[startcolor,midcolor,endcolor])

    # 准备画图数据
    result = csr_matrix(result)
    x = np.linspace(0, 1, result.shape[0])
    y = np.linspace(0, 1, result.shape[0])
    [X, Y] = np.meshgrid(x, y)
    Z = result.A
    # 为等高线图填充颜色, 16指定将等高线分为几部分
    # colorslist = ['GhostWhite', 'LightGray', 'LightBLue', 'SkyBlue', 'LightGoldenrodYellow', 'OrangeRed',  'DarkMagenta']
    colorslist = [
        'GhostWhite', 'LightGray', 'LightBLue', 'SkyBlue',
        'LightGoldenrodYellow', 'OrangeRed'
    ]
    # 将颜色条命名为mylist,一共插值颜色条50个
    cmap = col.LinearSegmentedColormap.from_list('mylist',
                                                 colorslist,
                                                 N=len(colorslist) * 50)
    temp = plt.contourf(X, Y, Z, binNum, alpha=1.0,
                        cmap=cmap)  # 使用颜色映射来区分不同高度的区域
    plt.colorbar()
    C = plt.contour(X,
                    Y,
                    Z, [temp._A[binNum - 2]],
                    linewidths=1.0,
                    alpha=1.0,
                    colors='black')  # 使用颜色映射来区分不同高度的区域
    # plt.clabel(C, inline = True, fontsize = 10)
    ax = plt.axes()
    ax.set_title(title, fontsize=18, position=(0.5, 1.05))
    foo_fig = plt.gcf()  # 'get current figure'
    foo_fig.savefig('./figures//' + title + '.png', format='png', dpi=600)
    plt.show()
Beispiel #3
0
def auto_DNN(prex=None,
             graph_name=None,
             emb_method_name1=None,
             emb_method_name2=None,
             model_name=None,
             DNN_binNum=None):
    print('----------------------------------------------------------')
    print("dataset: " + graph_name + '\n' + "baselines:" + emb_method_name1 +
          "," + emb_method_name2)

    results_base_dir = 'D:\hybridrec//results//'
    all_file_dir = 'D:\hybridrec\dataset\split_train_test//' + prex
    results_dir = 'D:\hybridrec/results//' + prex
    graph_results_dir = results_dir + graph_name + '//'
    # (facebook_combined的规律:ratio越小则正负样本的预测准确率越高,花的时间也越少)
    ratio = 1  # 负样本的总数是正样 本的ratio倍  # 改这里

    path_scores_method1 = results_base_dir + prex + graph_name + "//" + graph_name + "_" + emb_method_name1 + "_scores.mat"
    path_scores_method2 = results_base_dir + prex + graph_name + "//" + graph_name + "_" + emb_method_name2 + "_scores.mat"

    # Initialize the model,改这里

    # hidden_layer_sizes=(10, 20, 10):三个隐藏层,分别10、20、10个神经元
    if model_name == "mlp":
        model = MLPClassifier(hidden_layer_sizes=(10, 20),
                              activation='relu',
                              solver='adam',
                              max_iter=200,
                              alpha=0.01,
                              batch_size=256,
                              learning_rate='constant',
                              learning_rate_init=0.001,
                              shuffle=False,
                              random_state=2020,
                              early_stopping=True,
                              validation_fraction=0.2,
                              beta_1=0.9,
                              beta_2=0.999,
                              epsilon=1e-08,
                              n_iter_no_change=10)
    pass

    if model_name == "svm":
        model = SVC(C=5, random_state=42)  # 出问题了
    pass

    if model_name == "lr":
        model = LogisticRegression(C=5,
                                   penalty='l1',
                                   tol=1e-6,
                                   random_state=42)  # penalty 有l1和l2
    pass

    if model_name == "lgbm":
        model = LGBMClassifier(num_leaves=31,
                               learning_rate=0.1,
                               n_estimators=64,
                               random_state=42,
                               n_jobs=-1)
    pass

    if model_name == "xgb":
        model = XGBClassifier(max_depth=5,
                              learning_rate=0.1,
                              n_jobs=-1,
                              nthread=-1,
                              gamma=0.06,
                              min_child_weight=5,
                              subsample=1,
                              colsample_bytree=0.9,
                              reg_alpha=0,
                              reg_lambda=0.5,
                              random_state=42)
    pass

    if model_name == "ld":
        model = LinearDiscriminantAnalysis(solver='lsqr')
    pass

    if model_name == "rf":
        model = RandomForestClassifier(n_estimators=50,
                                       max_depth=20,
                                       min_samples_split=2,
                                       min_samples_leaf=5,
                                       max_features="log2",
                                       random_state=12)
    pass

    if not (os.path.exists(path_scores_method1)
            and os.path.exists(path_scores_method2)):
        print("dataset: " + graph_name + '----' + "baselines:" +
              emb_method_name1 + "," + emb_method_name2 + ': 分数未完全计算')

    if os.path.exists(path_scores_method1) and os.path.exists(
            path_scores_method2):
        # 获取归一化分数
        scores_matrix_one_dict = (loadmat(path_scores_method1))
        scores_matrix_two_dict = (loadmat(path_scores_method2))
        scores_matrix_one = scores_matrix_one_dict['scores']
        scores_matrix_two = scores_matrix_two_dict['scores']
        if emb_method_name1 not in all_embedding_methods:
            scores_matrix_one = csr_matrix(np.triu(scores_matrix_one.A,
                                                   k=1))  # k=1表示不包括对角线
        if emb_method_name2 not in all_embedding_methods:
            scores_matrix_two = csr_matrix(np.triu(scores_matrix_two.A, k=1))
        scores_matrix_one_norm = normalize_matrix(
            csr_matrix1=csr_matrix(scores_matrix_one))
        scores_matrix_two_norm = normalize_matrix(
            csr_matrix1=csr_matrix(scores_matrix_two))

        # 获取train_binary和test_binary
        graph_train_path = get_trainset_path(base_dir=all_file_dir,
                                             graph_name=graph_name,
                                             connected_pattern='undirected',
                                             from_zeros_one='0')
        graph_test_path = get_testset_path(base_dir=all_file_dir,
                                           graph_name=graph_name)
        G = read_graph(weighted=0, input=graph_train_path, directed=0)
        train_binary = csr_matrix(nx.convert_matrix.to_scipy_sparse_matrix(G))
        train_binary = csr_matrix(np.triu(train_binary.A, k=1))
        test_binary = get_test_matrix_binary(graph_test_path=graph_test_path,
                                             N=train_binary.shape[0])

        del scores_matrix_one, scores_matrix_two
        gc.collect()

        # 获取正样本的分数
        exist_binary = csr_matrix(np.triu(train_binary.A, k=1))  # k=1表示不包括对角线
        exist_scores_one_list = (np.array(
            scores_matrix_one_norm[exist_binary > 0], dtype=float))[0]
        exist_scores_two_list = (np.array(
            scores_matrix_two_norm[exist_binary > 0], dtype=float))[0]

        # 构建测试样本(正样本+负样本)
        X_train_1 = (np.array([exist_scores_one_list,
                               exist_scores_two_list])).T
        X_train_0 = negative_samples(
            train_binary=train_binary,
            test_binary=test_binary,
            scores_matrix_one_norm=scores_matrix_one_norm,
            scores_matrix_two_norm=scores_matrix_two_norm,
            ratio=ratio)
        Y_train_1 = np.random.randint(1, 2, X_train_1.shape[0])
        Y_train_0 = np.random.randint(0, 1, X_train_0.shape[0])
        X_train = np.vstack((np.array(X_train_1), np.array(X_train_0)))
        Y_train = (np.hstack((np.array(Y_train_1), np.array(Y_train_0)))).T

        time_start = time.time()

        # 模型训练
        model.fit(X_train, Y_train)

        # 模型预测
        preds_0 = model.predict(X_train_0)
        preds_1 = model.predict(X_train_1)
        print(np.sum(preds_0))
        print(np.sum(preds_1))
        preds_0_proba = model.predict_proba(X_train_0)
        preds_1_proba = model.predict_proba(X_train_1)

        # 模型预测
        scores_matrix_DNN = predicted_scores_DNN(
            model=model,
            train_binary=train_binary,
            test_binary=test_binary,
            scores_matrix_one_norm=scores_matrix_one_norm,
            scores_matrix_two_norm=scores_matrix_two_norm)
        save_DNN_hybrid_scores(scores_matrix_DNN=scores_matrix_DNN,
                               method1=emb_method_name1,
                               method2=emb_method_name2,
                               graph_results_dir=graph_results_dir,
                               dataset_name=graph_name,
                               model_name=model_name)
        scores_matrix_DNN_norm = normalize_matrix(
            csr_matrix1=scores_matrix_DNN)

        # 计算DNN的rasterization grids
        DNN_raster_grids = rasterization_grids(
            binNum=DNN_binNum,
            train_binary=train_binary,
            scores_matrix_DNN=scores_matrix_DNN_norm,
            scores_matrix_one_norm=scores_matrix_one_norm,
            scores_matrix_two_norm=scores_matrix_two_norm)
        # DNN_raster_grids = np.log10(DNN_raster_grids) # 出现-inf而报错
        DNN_raster_grids = normalize_matrix_full(
            csr_matrix1=csr_matrix(DNN_raster_grids))
        DNN_raster_grids = better_show_grids(csr_matrix1=DNN_raster_grids)
        save_DNN_raster_scores(rastser_grids=DNN_raster_grids,
                               method1=emb_method_name1,
                               method2=emb_method_name2,
                               graph_results_dir=graph_results_dir,
                               dataset_name=graph_name,
                               model_name=model_name,
                               DNN_binNum=DNN_binNum)
        source = np.float32(DNN_raster_grids.A)
        result = cv2.GaussianBlur(source, (5, 5), 0)
        title = graph_name + '-' + model_name + '-' + emb_method_name1 + '-' + emb_method_name2
        plot_contourf(result=result, title=title, binNum=10)

        # 读取PNR grids
        PNR_path = results_base_dir + prex + graph_name + "//" + "PNR1_" + graph_name + "_" + emb_method_name1 + "_" + emb_method_name2 + "_50_count.mat"
        if is_excel_file_exist(PNR_path):
            PNR_dict = (loadmat(PNR_path))
            PNR_matrix = PNR_dict["count"]
            PNR_matrix = better_show_grids(csr_matrix1=PNR_matrix)
            source = np.float32(PNR_matrix.A)
            result = cv2.GaussianBlur(source, (5, 5),
                                      0)  #(5, 5)表示高斯矩阵的长与宽都是5,标准差取0
            title = graph_name + '-PNR-' + emb_method_name1 + '-' + emb_method_name2
            plot_contourf(result=result, title=title, binNum=10)

        # 评估DNN
        exist_binary = csr_matrix(np.triu(train_binary.A, k=1))  # k=1表示不包括对角线
        nonexist_binary = csr_matrix(
            np.triu(np.ones(exist_binary.shape), k=1) - exist_binary.A)
        nonexist_scores_DNN_list = (np.array(
            scores_matrix_DNN[nonexist_binary > 0], dtype=float))[0]
        L_full = int(np.sum(test_binary))
        L_array = np.array([
            int(L_full / 20),
            int(L_full / 10),
            int(L_full / 5),
            int(L_full / 2), L_full
        ])
        AP_DNN, AUC_DNN, Precision_DNN, Recall_DNN, F1score_DNN = \
            evaluators(train_binary=train_binary,
                       test_binary=test_binary,
                       scores_list=nonexist_scores_DNN_list,
                       L_array=L_array)
        # print('AP_DNN:  ' + str(AP_DNN))
        # print('\n')
        # print('AUC_DNN:  ' + str(AUC_DNN))
        # print('\n')
        # print('Precision_DNN:  ' + str(Precision_DNN))
        # print('\n')
        # print('Recall_DNN:  ' + str(Recall_DNN))
        # print('\n')
        # print('F1score_DNN:  ' + str(F1score_DNN))
        # print('\n')

        # 把precision、recall、F1score、AP写入excel文件
        DNN_write_to_excel(DL_name=model_name,
                           dataset_name=graph_name,
                           method1=emb_method_name1,
                           method2=emb_method_name2,
                           precision_DL=Precision_DNN,
                           recall_DL=Recall_DNN,
                           F1score_DL=F1score_DNN,
                           AP_DL=AP_DNN)

        time_end = time.time()
        print("It takes : " + str((time_end - time_start) / 60.0) + "  mins.")
        pass