Beispiel #1
0
def load_condition_stacking_main():
    tn_conf = TrainConfigure()
    term_vocab_dict = data_utils.pickle_load(tn_conf.term_dict)
    term_embed_matrix = data_utils.load_embedding(term_vocab_dict,
                                                  'data/sgns.target.word-character.char1-2.dynwin5.thr10.neg5.dim300.iter5',
                                                  dump_path='data/term_embed.pkl')
    # term_embed_matrix = data_utils.load_embedding(term_vocab_dict,
    #                                               'data/sgns.target.word-word.dynwin5.thr10.neg5.dim300.iter5',
    #                                               dump_path='data/term_embed_ww.pkl')
    char_vocab_dict = data_utils.pickle_load(tn_conf.char_dict)
    char_embed_matrix = data_utils.load_embedding(char_vocab_dict,
                                                  'data/sgns.target.word-character.char1-2.dynwin5.thr10.neg5.dim300.iter5',
                                                  dump_path='data/char_embed.pkl')
    name = 'model/stack_condition_model.pkl'
    model_dir = 'model/stack/'
    n_fold = 3
    name = 'model/stack_condition_model5.pkl'
    model_dir = 'model/stack5/'
    n_fold = 5
    conf = conditionmodelbase.ModelConfigure()
    stk_model = stacking(n_fold, name=name, is_condition=True)
    stk_model.add_model(ConditionConvModel, {"conf":conf,"char_embed_matrix": char_embed_matrix,
                                             "term_embed_matrix": term_embed_matrix,
                                             "name": model_dir + 'conditionconvmodel_PE.h5'})
    stk_model.add_model(ConditionDPCNNModel, {"conf":conf,"char_embed_matrix": char_embed_matrix,
                                              "term_embed_matrix": term_embed_matrix,
                                              "name": model_dir + 'conditiondpcnnmodel_PE.h5'})
    stk_model.add_model(ConditionGatedConvModel, {"conf":conf,"char_embed_matrix": char_embed_matrix,
                                                  "term_embed_matrix": term_embed_matrix,
                                                  "name": model_dir + 'conditiongatedconvmodel_PE.h5'})
    stk_model.add_model(ConditionGatedDeepCNNModel, {"conf":conf,"char_embed_matrix": char_embed_matrix,
                                                     "term_embed_matrix": term_embed_matrix,
                                                     "name": model_dir + 'conditiongateddeepcnnmodel_PE.h5'})
    stk_model.load( )
    return stk_model
Beispiel #2
0
 def get_model_conf(self):
     model_conf = conditionmodelbase.ModelConfigure()
     model_conf.batch_size = 128
     return model_conf
Beispiel #3
0
 def get_model_conf(self):
     model_conf = conditionmodelbase.ModelConfigure()
     model_conf.lr = 0.0005
     return model_conf
Beispiel #4
0
def stacking_main_condition():
    print('load data')
    tn_conf = TrainConfigure()
    data_dict = data_utils.pickle_load(tn_conf.char_file)
    y = to_categorical(data_dict['y'])
    x = data_dict['x']
    xterm = data_utils.pickle_load(tn_conf.term_file)
    xfeat = data_utils.pickle_load(tn_conf.feat_file)
    # normalization
    from sklearn.preprocessing import MinMaxScaler
    scaler = MinMaxScaler()
    scaler.fit(xfeat)
    data_utils.pickle_dump(scaler, tn_conf.feat_norm)
    xfeat = scaler.transform(xfeat)
    xe = [[i for i in range(600)] for _ in range(y.shape[0])]
    xe = np.array(xe)
    xe_term = [[i for i in range(300)] for _ in range(y.shape[0])]
    xe_term = np.array(xe_term)
    xtopic = data_utils.pickle_load('data/lda_vec.pkl')

    print('loading embed ...')
    term_vocab_dict = data_utils.pickle_load(tn_conf.term_dict)
    term_embed_matrix = data_utils.load_embedding(term_vocab_dict,
                                                  'data/sgns.target.word-character.char1-2.dynwin5.thr10.neg5.dim300.iter5',
                                                  dump_path='data/term_embed.pkl')
    # term_embed_matrix = data_utils.load_embedding(term_vocab_dict,
    #                                               'data/sgns.target.word-word.dynwin5.thr10.neg5.dim300.iter5',
    #                                               dump_path='data/term_embed_ww.pkl')
    char_vocab_dict = data_utils.pickle_load(tn_conf.char_dict)
    char_embed_matrix = data_utils.load_embedding(char_vocab_dict,
                                                  'data/sgns.target.word-character.char1-2.dynwin5.thr10.neg5.dim300.iter5',
                                                  dump_path='data/char_embed.pkl')
    print('load embed done.')

    name = 'model/stack_condition_model.pkl'
    model_dir = 'model/stack/'
    n_fold = 3
    name = 'model/stack_condition_model5.pkl'
    model_dir = 'model/stack5/'
    n_fold = 5
    stk_model = stacking(n_fold, name=name, is_condition=True)
    conf = conditionmodelbase.ModelConfigure()
    conf.PE = True
    stk_model.add_model(ConditionConvModel, {"conf":conf,"char_embed_matrix":char_embed_matrix,
                            "term_embed_matrix":term_embed_matrix,
                                             "name":model_dir+'conditionconvmodel_PE.h5'})
    stk_model.add_model(ConditionGatedConvModel, {"conf":conf,"char_embed_matrix": char_embed_matrix,
                                          "term_embed_matrix": term_embed_matrix,
                                                  "name": model_dir+'conditiongatedconvmodel_PE.h5'})
    stk_model.add_model(ConditionGatedDeepCNNModel, {"conf":conf,"char_embed_matrix": char_embed_matrix,
                                          "term_embed_matrix": term_embed_matrix,
                                            "name": model_dir+'conditiongateddeepcnnmodel_PE.h5'})
    conf.lr = 0.0005
    stk_model.add_model(ConditionDPCNNModel, {"conf": conf, "char_embed_matrix": char_embed_matrix,
                                              "term_embed_matrix": term_embed_matrix,
                                              "name": model_dir + 'conditiondpcnnmodel_PE.h5'})
    #采样0.1用于测试
    # x_tn, y_tn, x_ts, y_ts = training_utils.split([x, xe, xterm, xe_term, xfeat, xtopic], y, split_ratio=0.005, shuffle=False)
    # x_tn, y_tn, x_ts, y_ts = training_utils.split(x_tn, y_tn, shuffle=False)
    x_tn, y_tn, x_ts, y_ts = training_utils.split([x, xe, xterm, xe_term, xfeat, xtopic],  y, split_ratio=0.95)
    stk_model.fit(x_tn, y_tn)
    # joblib.dump(stk_model, 'model/stack_model_3.pkl')
    y_pred = stk_model.predict(x_ts)
    acc = accuracy_score(training_utils.convert_y(y_pred), training_utils.convert_y(y_ts) )
    print(acc)
    cnf_matrix = confusion_matrix(training_utils.convert_y(y_pred), training_utils.convert_y(y_ts) )
    print(cnf_matrix)
    stk_model.save( )