def load_condition_stacking_main(): tn_conf = TrainConfigure() term_vocab_dict = data_utils.pickle_load(tn_conf.term_dict) term_embed_matrix = data_utils.load_embedding(term_vocab_dict, 'data/sgns.target.word-character.char1-2.dynwin5.thr10.neg5.dim300.iter5', dump_path='data/term_embed.pkl') # term_embed_matrix = data_utils.load_embedding(term_vocab_dict, # 'data/sgns.target.word-word.dynwin5.thr10.neg5.dim300.iter5', # dump_path='data/term_embed_ww.pkl') char_vocab_dict = data_utils.pickle_load(tn_conf.char_dict) char_embed_matrix = data_utils.load_embedding(char_vocab_dict, 'data/sgns.target.word-character.char1-2.dynwin5.thr10.neg5.dim300.iter5', dump_path='data/char_embed.pkl') name = 'model/stack_condition_model.pkl' model_dir = 'model/stack/' n_fold = 3 name = 'model/stack_condition_model5.pkl' model_dir = 'model/stack5/' n_fold = 5 conf = conditionmodelbase.ModelConfigure() stk_model = stacking(n_fold, name=name, is_condition=True) stk_model.add_model(ConditionConvModel, {"conf":conf,"char_embed_matrix": char_embed_matrix, "term_embed_matrix": term_embed_matrix, "name": model_dir + 'conditionconvmodel_PE.h5'}) stk_model.add_model(ConditionDPCNNModel, {"conf":conf,"char_embed_matrix": char_embed_matrix, "term_embed_matrix": term_embed_matrix, "name": model_dir + 'conditiondpcnnmodel_PE.h5'}) stk_model.add_model(ConditionGatedConvModel, {"conf":conf,"char_embed_matrix": char_embed_matrix, "term_embed_matrix": term_embed_matrix, "name": model_dir + 'conditiongatedconvmodel_PE.h5'}) stk_model.add_model(ConditionGatedDeepCNNModel, {"conf":conf,"char_embed_matrix": char_embed_matrix, "term_embed_matrix": term_embed_matrix, "name": model_dir + 'conditiongateddeepcnnmodel_PE.h5'}) stk_model.load( ) return stk_model
def get_model_conf(self): model_conf = conditionmodelbase.ModelConfigure() model_conf.batch_size = 128 return model_conf
def get_model_conf(self): model_conf = conditionmodelbase.ModelConfigure() model_conf.lr = 0.0005 return model_conf
def stacking_main_condition(): print('load data') tn_conf = TrainConfigure() data_dict = data_utils.pickle_load(tn_conf.char_file) y = to_categorical(data_dict['y']) x = data_dict['x'] xterm = data_utils.pickle_load(tn_conf.term_file) xfeat = data_utils.pickle_load(tn_conf.feat_file) # normalization from sklearn.preprocessing import MinMaxScaler scaler = MinMaxScaler() scaler.fit(xfeat) data_utils.pickle_dump(scaler, tn_conf.feat_norm) xfeat = scaler.transform(xfeat) xe = [[i for i in range(600)] for _ in range(y.shape[0])] xe = np.array(xe) xe_term = [[i for i in range(300)] for _ in range(y.shape[0])] xe_term = np.array(xe_term) xtopic = data_utils.pickle_load('data/lda_vec.pkl') print('loading embed ...') term_vocab_dict = data_utils.pickle_load(tn_conf.term_dict) term_embed_matrix = data_utils.load_embedding(term_vocab_dict, 'data/sgns.target.word-character.char1-2.dynwin5.thr10.neg5.dim300.iter5', dump_path='data/term_embed.pkl') # term_embed_matrix = data_utils.load_embedding(term_vocab_dict, # 'data/sgns.target.word-word.dynwin5.thr10.neg5.dim300.iter5', # dump_path='data/term_embed_ww.pkl') char_vocab_dict = data_utils.pickle_load(tn_conf.char_dict) char_embed_matrix = data_utils.load_embedding(char_vocab_dict, 'data/sgns.target.word-character.char1-2.dynwin5.thr10.neg5.dim300.iter5', dump_path='data/char_embed.pkl') print('load embed done.') name = 'model/stack_condition_model.pkl' model_dir = 'model/stack/' n_fold = 3 name = 'model/stack_condition_model5.pkl' model_dir = 'model/stack5/' n_fold = 5 stk_model = stacking(n_fold, name=name, is_condition=True) conf = conditionmodelbase.ModelConfigure() conf.PE = True stk_model.add_model(ConditionConvModel, {"conf":conf,"char_embed_matrix":char_embed_matrix, "term_embed_matrix":term_embed_matrix, "name":model_dir+'conditionconvmodel_PE.h5'}) stk_model.add_model(ConditionGatedConvModel, {"conf":conf,"char_embed_matrix": char_embed_matrix, "term_embed_matrix": term_embed_matrix, "name": model_dir+'conditiongatedconvmodel_PE.h5'}) stk_model.add_model(ConditionGatedDeepCNNModel, {"conf":conf,"char_embed_matrix": char_embed_matrix, "term_embed_matrix": term_embed_matrix, "name": model_dir+'conditiongateddeepcnnmodel_PE.h5'}) conf.lr = 0.0005 stk_model.add_model(ConditionDPCNNModel, {"conf": conf, "char_embed_matrix": char_embed_matrix, "term_embed_matrix": term_embed_matrix, "name": model_dir + 'conditiondpcnnmodel_PE.h5'}) #采样0.1用于测试 # x_tn, y_tn, x_ts, y_ts = training_utils.split([x, xe, xterm, xe_term, xfeat, xtopic], y, split_ratio=0.005, shuffle=False) # x_tn, y_tn, x_ts, y_ts = training_utils.split(x_tn, y_tn, shuffle=False) x_tn, y_tn, x_ts, y_ts = training_utils.split([x, xe, xterm, xe_term, xfeat, xtopic], y, split_ratio=0.95) stk_model.fit(x_tn, y_tn) # joblib.dump(stk_model, 'model/stack_model_3.pkl') y_pred = stk_model.predict(x_ts) acc = accuracy_score(training_utils.convert_y(y_pred), training_utils.convert_y(y_ts) ) print(acc) cnf_matrix = confusion_matrix(training_utils.convert_y(y_pred), training_utils.convert_y(y_ts) ) print(cnf_matrix) stk_model.save( )