df_btest[df_btest[labels] == 1]) print(t, v) # #划分训练测试集 X_train_tra, X_test_tra, df_btest = data_seperate(df_train, df_btest, size=0.3, label=labels, cri=None, undeal_column=None) # # 划分label print(df_train.columns) # 划分label x_train, y_train = seperate_label(X_train_tra, label=labels) x_test, y_test = seperate_label(X_test_tra, label=labels) print("x_train", x_train.shape) #SBS # sbs = SBS(rf, k_features=9,test_size=0.3, random_state=0,scoring=f1_score) # sbs.fit(x.drop(["studentNo", "teacherId"], axis=1),y) # k_feat = [len(k) for k in sbs.subsets_] # import matplotlib.pyplot as plt # plt.plot(k_feat, sbs.scores_, marker='o') # plt.ylim([0.7, 1.1]) # plt.ylabel('Accuracy') # plt.xlabel('Number of features') # plt.grid() # plt.tight_layout() # # plt.savefig('./sbs.png', dpi=300)
df_train = df_voice[(df_voice["order_apply_time"] >= "2018-09-01") & ( df_voice["order_apply_time"] < "2019-01-10")].drop(["order_apply_time","content"], axis=1) df_btest = df_voice[(df_voice["order_apply_time"] >= "2019-01-10") & ( df_voice["order_apply_time"] < "2019-01-15")].drop(["order_apply_time","content"], axis=1) # # #划分训练测试集 X_train_tra, X_test_tra, df_btest = data_seperate(df_train, df_btest, size=0.3, label=labels, cri=None, undeal_column=None ) # 划分label x_trains, y_train = seperate_label(X_train_tra, label=labels) x_tests, y_test = seperate_label(X_test_tra, label=labels) x_btests, y_btest = seperate_label(df_btest, label=labels) x_train = np.array(list(x_trains['sent'])) x_test = np.array(list(x_tests['sent'])) x_btest = np.array(list(x_btests['sent'])) print("x_train", x_train.shape) model = Sequential() model.add(Embedding(input_dim=len(dict), output_dim=32, input_length=maxlen)) # model.add( # Embedding(input_dim=128, output_dim=64, input_length=maxlen))/