df_btest[df_btest[labels] == 1])
    print(t, v)

    # #划分训练测试集
    X_train_tra, X_test_tra, df_btest = data_seperate(df_train,
                                                      df_btest,
                                                      size=0.3,
                                                      label=labels,
                                                      cri=None,
                                                      undeal_column=None)

    # # 划分label
    print(df_train.columns)

    #  划分label
    x_train, y_train = seperate_label(X_train_tra, label=labels)
    x_test, y_test = seperate_label(X_test_tra, label=labels)
    print("x_train", x_train.shape)

    #SBS
    # sbs = SBS(rf, k_features=9,test_size=0.3, random_state=0,scoring=f1_score)
    # sbs.fit(x.drop(["studentNo", "teacherId"], axis=1),y)
    # k_feat = [len(k) for k in sbs.subsets_]
    # import matplotlib.pyplot as plt
    # plt.plot(k_feat, sbs.scores_, marker='o')
    # plt.ylim([0.7, 1.1])
    # plt.ylabel('Accuracy')
    # plt.xlabel('Number of features')
    # plt.grid()
    # plt.tight_layout()
    # # plt.savefig('./sbs.png', dpi=300)
예제 #2
0
    df_train = df_voice[(df_voice["order_apply_time"] >= "2018-09-01") & (
        df_voice["order_apply_time"] < "2019-01-10")].drop(["order_apply_time","content"], axis=1)
    df_btest = df_voice[(df_voice["order_apply_time"] >= "2019-01-10") & (
        df_voice["order_apply_time"] < "2019-01-15")].drop(["order_apply_time","content"], axis=1)


    # # #划分训练测试集
    X_train_tra, X_test_tra, df_btest = data_seperate(df_train, df_btest,
                                                      size=0.3,
                                                      label=labels,
                                                      cri=None,
                                                      undeal_column=None
                                                      )

    # 划分label
    x_trains, y_train = seperate_label(X_train_tra, label=labels)
    x_tests, y_test = seperate_label(X_test_tra, label=labels)
    x_btests, y_btest = seperate_label(df_btest, label=labels)

    x_train = np.array(list(x_trains['sent']))
    x_test = np.array(list(x_tests['sent']))
    x_btest = np.array(list(x_btests['sent']))


    print("x_train", x_train.shape)

    model = Sequential()
    model.add(Embedding(input_dim=len(dict), output_dim=32, input_length=maxlen))
    # model.add(
        # Embedding(input_dim=128, output_dim=64, input_length=maxlen))/