blm_rnn_layer = rnn_map[args.lm_rnn_layer](args.lm_layer_num, args.lm_rnn_unit, args.lm_word_dim, args.lm_hid_dim, args.lm_droprate) flm_model = LM(flm_rnn_layer, None, len(flm_map), args.lm_word_dim, args.lm_droprate, label_dim = args.lm_label_dim) blm_model = LM(blm_rnn_layer, None, len(blm_map), args.lm_word_dim, args.lm_droprate, label_dim = args.lm_label_dim) flm_model_seq = SparseSeqLM(flm_model, False, args.lm_droprate, False) blm_model_seq = SparseSeqLM(blm_model, True, args.lm_droprate, False) SL_map = {'vanilla':Vanilla_SeqLabel, 'lm-aug': SeqLabel} seq_model = SL_map[args.seq_model](flm_model_seq, blm_model_seq, len(c_map), args.seq_c_dim, args.seq_c_hid, args.seq_c_layer, len(gw_map), args.seq_w_dim, args.seq_w_hid, args.seq_w_layer, len(y_map), args.seq_droprate, unit=args.seq_rnn_unit) pw.info('Loading pre-trained models from {}.'.format(args.load_seq)) seq_file = wrapper.restore_checkpoint(args.load_seq)['model'] seq_model.load_state_dict(seq_file) seq_model.to(device) crit = CRFLoss(y_map) decoder = CRFDecode(y_map) evaluator = eval_wc(decoder, 'f1') pw.info('Constructing dataset.') train_dataset, test_dataset, dev_dataset = [SeqDataset(tup_data, flm_map['\n'], blm_map['\n'], gw_map['<\n>'], c_map[' '], c_map['\n'], y_map['<s>'], y_map['<eof>'], len(y_map), args.batch_size) for tup_data in [train_data, test_data, dev_data]] pw.info('Constructing optimizer.') param_dict = filter(lambda t: t.requires_grad, seq_model.parameters()) optim_map = {'Adam' : optim.Adam, 'Adagrad': optim.Adagrad, 'Adadelta': optim.Adadelta, 'SGD': functools.partial(optim.SGD, momentum=0.9)} if args.lr > 0: optimizer=optim_map[args.update](param_dict, lr=args.lr) else: optimizer=optim_map[args.update](param_dict) pw.info('Saving configues.')
args.seq_c_hid, args.seq_c_layer, len(gw_map), args.seq_w_dim, args.seq_w_hid, args.seq_w_layer, len(y_map), args.seq_droprate, unit=args.seq_rnn_unit) seq_model.rand_init() seq_model.load_pretrained_word_embedding(torch.FloatTensor(emb_array)) seq_config = seq_model.to_params() seq_model.to(device) crit = CRFLoss(y_map) decoder = CRFDecode(y_map) evaluator = eval_wc(decoder, args.eval_type) pw.info('Constructing dataset') train_dataset, test_dataset, dev_dataset = [ SeqDataset(tup_data, gw_map['<\n>'], c_map[' '], c_map['\n'], y_map['<s>'], y_map['<eof>'], len(y_map), args.batch_size) for tup_data in [train_data, test_data, dev_data] ] pw.info('Constructing optimizer') param_dict = filter(lambda t: t.requires_grad, seq_model.parameters()) optim_map = { 'Adam': optim.Adam, 'Adagrad': optim.Adagrad,