if status == 'train': print("Model saved to:", save_model_dir) # 立即把stdout缓存内容输出 sys.stdout.flush() if status == 'train': data = Data() data.model_name = model_name data.HP_gpu = gpu data.use_bichar = conf_dict['use_bichar'] data.HP_batch_size = conf_dict['HP_batch_size'] # 1 data.HP_iteration = conf_dict['HP_iteration'] # 100 data.HP_lr = conf_dict['HP_lr'] # 0.015 data.HP_lr_decay = conf_dict['HP_lr_decay'] # 0.5 data.HP_hidden_dim = conf_dict['HP_hidden_dim'] data.MAX_SENTENCE_LENGTH = conf_dict['MAX_SENTENCE_LENGTH'] data.HP_lstm_layer = conf_dict['HP_lstm_layer'] data_initialization(data, gaz_file, train_file, dev_file, test_file) if data.model_name in ['CNN_model', 'LSTM_model']: data.generate_instance_with_gaz_2(train_file, 'train') data.generate_instance_with_gaz_2(dev_file, 'dev') data.generate_instance_with_gaz_2(test_file, 'test') elif data.model_name in ['WC-LSTM_model']: data.generate_instance_with_gaz_3(train_file, 'train') data.generate_instance_with_gaz_3(dev_file, 'dev') data.generate_instance_with_gaz_3(test_file, 'test') else: print("model_name is not set!") sys.exit(1) data.build_char_pretrain_emb(char_emb)
print("Gaz file:", gaz_file) if status == 'train': print("Model saved to:", save_model_dir) sys.stdout.flush() if status == 'train': data = Data() data.HP_gpu = gpu data.HP_lr = float(lr) data.HP_use_char = False data.HP_batch_size = 1 data.use_bigram = False if bichar_emb is None else True data.gaz_dropout = 0.5 data.norm_gaz_emb = False data.HP_fix_gaz_emb = False data.MAX_SENTENCE_LENGTH = maxlen data_initialization(data, gaz_file, train_file, dev_file, test_file, word_sense_map_file) data_build_gold(data, train_gold, dev_gold, test_gold) data.generate_instance_with_gaz(train_file, 'train') data.generate_instance_with_gaz(dev_file, 'dev') data.generate_instance_with_gaz(test_file, 'test') data.build_word_pretrain_emb(char_emb) data.build_biword_pretrain_emb(bichar_emb) data.build_gaz_pretrain_emb(gaz_file) train(data, save_model_dir, dataset, seg) elif status == 'test': data = load_data_setting(dset_dir) data.build_word_sense_map(word_sense_map_file) data.MAX_SENTENCE_LENGTH = maxlen data_build_gold(data, train_gold, dev_gold, test_gold)