def __train(): log_file = os.path.join(config.LOG_DIR, '{}-{}-{}-{}.log'.format(os.path.splitext( os.path.basename(__file__))[0], args.idx, str_today, config.MACHINE_NAME)) # log_file = None init_universal_logging(log_file, mode='a', to_stdout=True) logging.info('logging to {}'.format(log_file)) train_config = srlfetexp.TrainConfig(loss_name='mm', neg_scale=0.1, n_steps=70000) lstm_dim = 250 mlp_hidden_dim = 500 type_embed_dim = 500 word_vecs_file = config.WIKI_FETEL_WORDVEC_FILE # dataset = 'figer' dataset = 'bbn' datafiles = config.FIGER_FILES if dataset == 'figer' else config.BBN_FILES data_prefix = datafiles['srl-train-data-prefix'] dev_data_pkl = data_prefix + '-dev.pkl' train_data_pkl = data_prefix + '-train.pkl' test_file_tup = (datafiles['test-mentions'], datafiles['test-sents'], datafiles['test-sents-dep'], datafiles['test-srl']) single_type_path = False if dataset == 'figer' else True # output_model_file = None save_model_file_prefix = os.path.join(config.DATA_DIR, 'models/srl-{}'.format(dataset)) gres = expdata.ResData(datafiles['type-vocab'], word_vecs_file) logging.info('dataset={} {}'.format(dataset, data_prefix)) srlfetexp.train_srlfet(device, gres, train_data_pkl, dev_data_pkl, None, test_file_tup, lstm_dim, mlp_hidden_dim, type_embed_dim, train_config, single_type_path, save_model_file_prefix=save_model_file_prefix)
def __train3(): log_file = os.path.join(config.LOG_DIR, '{}-{}-{}-{}.log'.format(os.path.splitext( os.path.basename(__file__))[0], args.idx, str_today, config.MACHINE_NAME)) # log_file = None init_universal_logging(log_file, mode='a', to_stdout=True) logging.info('logging to {}'.format(log_file)) margin = 1.0 train_config = srlfetexp.TrainConfig( pos_margin=margin, neg_margin=margin, neg_scale=1.0, batch_size=128, schedule_lr=True, n_steps=70000) lstm_dim = 250 mlp_hidden_dim = 500 type_embed_dim = 500 word_vecs_file = config.WIKI_FETEL_WORDVEC_FILE dataset = 'figer' # dataset = 'bbn' datafiles = config.FIGER_FILES if dataset == 'figer' else config.BBN_FILES data_prefix = datafiles['srl-train-data-prefix'] dev_data_pkl = data_prefix + '-dev.pkl' train_data_pkl = data_prefix + '-train.pkl' test_file_tup = (datafiles['test-mentions'], datafiles['test-sents'], datafiles['test-sents-dep'], datafiles['test-srl']) single_type_path = False if dataset == 'figer' else True # output_model_file = None save_model_file_prefix = os.path.join(config.DATA_DIR, 'models/srl3-{}'.format(dataset)) val_mentions_file = os.path.join(config.DATA_DIR, 'figer/wiki-valcands-figer-mentions.json') val_sents_file = os.path.join(config.DATA_DIR, 'figer/wiki-valcands-figer-sents.json') val_srl_file = os.path.join(config.DATA_DIR, 'figer/wiki-valcands-figer-srl.txt') val_dep_file = os.path.join(config.DATA_DIR, 'figer/wiki-valcands-figer-tok-dep.txt') val_manual_label_file = os.path.join(config.DATA_DIR, 'figer/figer-dev-man-labeled.txt') # manual_val_file_tup = (val_mentions_file, val_sents_file, val_dep_file, val_srl_file, val_manual_label_file) manual_val_file_tup = None gres = expdata.ResData(datafiles['type-vocab'], word_vecs_file) logging.info('dataset={} {}'.format(dataset, data_prefix)) srlfetexp.train_srlfet( device, gres, train_data_pkl, dev_data_pkl, manual_val_file_tup, test_file_tup, lstm_dim, mlp_hidden_dim, type_embed_dim, train_config, single_type_path, save_model_file_prefix=save_model_file_prefix)