print('Char Embedding: ', char_emb_file) name = 'BaseLSTM' # catnlp config = Config() config.layers = 2 config.optim = 'Adam' config.char_features = 'CNN' config.lr = 0.015 config.hidden_dim = 200 config.bid_flag = True config.number_normalized = True data_initialization(config, train_file, dev_file, test_file) config.gpu = gpu config.word_features = name print('Word features: ', config.word_features) config.generate_instance(train_file, 'train') config.generate_instance(dev_file, 'dev') config.generate_instance(test_file, 'test') if emb_file: print('load word emb file...norm: ', config.norm_word_emb) config.build_word_pretain_emb(emb_file) if char_emb_file != 'none': print('load char emb file...norm: ', config.norm_char_emb) config.build_char_pretrain_emb(char_emb_file) name = 'joint-disease' # catnlp train(config, name, dset_dir, save_model_dir, seg, ignore=True) elif status == 'test': data = load_data_setting(dset_dir) data.generate_instance(dev_file, 'dev') load_model_decode(model_dir, data, 'dev', gpu, seg)
test_file = os.path.join(line, args.test) print('Train file: ', train_file) print('Dev file: ', devel_file) print('Test file: ', test_file) data_initialization(config, train_file, devel_file, test_file) # config.fix_alphabet() config.num_corpus = count with open(args.dataset, 'r') as f: for line in f: line = line.strip() train_file = os.path.join(line, args.train) devel_file = os.path.join(line, args.devel) test_file = os.path.join(line, args.test) config.generate_instance(train_file, 'train') config.generate_instance(devel_file, 'dev') config.generate_instance(test_file, 'test') if emb_file: print('load word emb file...norm: ', config.norm_word_emb) config.build_word_pretain_emb(emb_file) if char_emb_file != 'none': print('load char emb file...norm: ', config.norm_char_emb) config.build_char_pretrain_emb(char_emb_file) for label in config.label_alphabet.instances: print(label) name = 'MultiMetaLSTM' train(config, name, dset_dir, save_model_dir, seg) # elif status == 'test':