blm_model_seq = SparseSeqLM(blm_model, True, args.lm_droprate, False) SL_map = {'vanilla':Vanilla_SeqLabel, 'lm-aug': SeqLabel} seq_model = SL_map[args.seq_model](flm_model_seq, blm_model_seq, len(c_map), args.seq_c_dim, args.seq_c_hid, args.seq_c_layer, len(gw_map), args.seq_w_dim, args.seq_w_hid, args.seq_w_layer, len(y_map), args.seq_droprate, unit=args.seq_rnn_unit) pw.info('Loading pre-trained models from {}.'.format(args.load_seq)) seq_file = wrapper.restore_checkpoint(args.load_seq)['model'] seq_model.load_state_dict(seq_file) seq_model.to(device) crit = CRFLoss(y_map) decoder = CRFDecode(y_map) evaluator = eval_wc(decoder, 'f1') pw.info('Constructing dataset.') train_dataset, test_dataset, dev_dataset = [SeqDataset(tup_data, flm_map['\n'], blm_map['\n'], gw_map['<\n>'], c_map[' '], c_map['\n'], y_map['<s>'], y_map['<eof>'], len(y_map), args.batch_size) for tup_data in [train_data, test_data, dev_data]] pw.info('Constructing optimizer.') param_dict = filter(lambda t: t.requires_grad, seq_model.parameters()) optim_map = {'Adam' : optim.Adam, 'Adagrad': optim.Adagrad, 'Adadelta': optim.Adadelta, 'SGD': functools.partial(optim.SGD, momentum=0.9)} if args.lr > 0: optimizer=optim_map[args.update](param_dict, lr=args.lr) else: optimizer=optim_map[args.update](param_dict) pw.info('Saving configues.') pw.save_configue(args) pw.info('Setting up training environ.')
args.seq_w_layer, len(y_map), args.seq_droprate, unit=args.seq_rnn_unit) seq_model.rand_init() seq_model.load_pretrained_word_embedding(torch.FloatTensor(emb_array)) seq_config = seq_model.to_params() seq_model.to(device) crit = CRFLoss(y_map) decoder = CRFDecode(y_map) evaluator = eval_wc(decoder, args.eval_type) pw.info('Constructing dataset') train_dataset, test_dataset, dev_dataset = [ SeqDataset(tup_data, gw_map['<\n>'], c_map[' '], c_map['\n'], y_map['<s>'], y_map['<eof>'], len(y_map), args.batch_size) for tup_data in [train_data, test_data, dev_data] ] pw.info('Constructing optimizer') param_dict = filter(lambda t: t.requires_grad, seq_model.parameters()) optim_map = { 'Adam': optim.Adam, 'Adagrad': optim.Adagrad, 'Adadelta': optim.Adadelta, 'SGD': functools.partial(optim.SGD, momentum=0.9) } if args.lr > 0: optimizer = optim_map[args.update](param_dict, lr=args.lr) else:
param.requires_grad = False print("set ", name, "as False") #print(name) seq_config = seq_model.to_params() seq_model.to(device) crit = CRFLoss(y_map) decoder = CRFDecode(y_map) evaluator = eval_wc(decoder, args.eval_type, inv_w_map, inv_y_map) models = [seq_model] pw.info('Constructing dataset') train_dataset = SeqDataset(train_data, gw_map['<\n>'], c_map[' '], c_map['\n'], y_map['<s>'], y_map['<eof>'], len(y_map), args.batch_size) test_dataset, dev_dataset = [ SeqDataset(tup_data, gw_map['<\n>'], c_map[' '], c_map['\n'], y_map['<s>'], y_map['<eof>'], len(y_map), args.batch_size) for tup_data in [test_data, dev_data] ] pw.info('Constructing optimizer') #param_dict = filter(lambda t: t.requires_grad, seq_model.parameters()) #params = list(seq_model.parameters()) + list(crit.parameters()) params = list(seq_model.parameters()) param_dict = filter(lambda t: t.requires_grad, params) optim_map = {
gw_map['<\n>'], c_map[' '], c_map['\n'], y_map['<s>'], y_map['<eof>'], len(y_map), args.batch_size, y_unk=y_map['<unk>'], y_O=y_map['O'], if_shuffle=False) test_dataset, dev_dataset = [ SeqDataset(tup_data, gw_map['<\n>'], c_map[' '], c_map['\n'], y_map['<s>'], y_map['<eof>'], len(y_map), args.batch_size, if_shuffle=False) for tup_data in [test_data, dev_data] ] pw.info('Constructing optimizer') #param_dict = filter(lambda t: t.requires_grad, seq_model.parameters()) #params = list(seq_model.parameters()) + list(crit.parameters()) params = list(seq_model.parameters()) param_dict = filter(lambda t: t.requires_grad, params) optim_map = { 'Adam': optim.Adam,