def init_model(): characters = list("0123456789abcd ") characters.append("<EOS>") global int2char int2char = list(characters) global char2int char2int = {c: i for i, c in enumerate(characters)} global VOCAB_SIZE VOCAB_SIZE = len(characters) dyparams = dy.DynetParams() dyparams.set_random_seed(666) dyparams.init() pc = dy.ParameterCollection() lstm = dy.LSTMBuilder(LAYERS, INPUT_DIM, HIDDEN_DIM, pc) params = {} params["lookup"] = pc.add_lookup_parameters((VOCAB_SIZE, INPUT_DIM)) params["R"] = pc.add_parameters((LSTM_OUTPUT_SIZE, HIDDEN_DIM)) params["bias"] = pc.add_parameters((LSTM_OUTPUT_SIZE)) params["w1"] = pc.add_parameters((N1, LSTM_OUTPUT_SIZE)) params["w2"] = pc.add_parameters((MLP_OUTPUT_SIZE, N1)) params["b1"] = pc.add_parameters((N1)) params["b2"] = pc.add_parameters((MLP_OUTPUT_SIZE)) trainer = dy.RMSPropTrainer(pc) return (lstm, params, pc), trainer
def get_trainer(opt, s2s): if opt.trainer == 'sgd': trainer = dy.SimpleSGDTrainer(s2s.pc, e0=opt.learning_rate, edecay=opt.learning_rate_decay) elif opt.trainer == 'clr': trainer = dy.CyclicalSGDTrainer(s2s.pc, e0_min=opt.learning_rate / 10.0, e0_max=opt.learning_rate, edecay=opt.learning_rate_decay) elif opt.trainer == 'momentum': trainer = dy.MomentumSGDTrainer(s2s.pc, e0=opt.learning_rate, edecay=opt.learning_rate_decay) elif opt.trainer == 'rmsprop': trainer = dy.RMSPropTrainer(s2s.pc, e0=opt.learning_rate, edecay=opt.learning_rate_decay) elif opt.trainer == 'adam': trainer = dy.AdamTrainer(s2s.pc, opt.learning_rate, edecay=opt.learning_rate_decay) else: print('Trainer name invalid or not provided, using SGD', file=sys.stderr) trainer = dy.SimpleSGDTrainer(s2s.pc, e0=opt.learning_rate, edecay=opt.learning_rate_decay) trainer.set_clip_threshold(opt.gradient_clip) return trainer
def _init_optimizer(self, model, **kwargs): mom = float(kwargs.get('mom', 0.0)) optim = kwargs.get('optim', 'sgd') clip = kwargs.get('clip') self.current_lr = kwargs.get('eta', kwargs.get('lr', 0.01)) if optim == 'adadelta': self.optimizer = dy.AdadeltaTrainer(model.pc) elif optim == 'adam': self.optimizer = dy.AdamTrainer(model.pc, alpha=self.current_lr, beta_1=kwargs.get('beta1', 0.9), beta_2=kwargs.get('beta2', 0.999), eps=kwargs.get('epsilon', 1e-8)) elif optim == 'rmsprop': self.optimizer = dy.RMSPropTrainer(model.pc, learning_rate=self.current_lr) else: if mom == 0 or mom is None: self.optimizer = dy.SimpleSGDTrainer( model.pc, learning_rate=self.current_lr) else: logging.info('Using mom %f', mom) self.optimizer = dy.MomentumSGDTrainer( model.pc, learning_rate=self.current_lr, mom=mom) if clip is not None: self.optimizer.set_clip_threshold(clip) self.optimizer.set_sparse_updates(False)
def add_parameters(self, dropout, lstm_size, optimizer, model_type, gru=True): if model_type == "gru": self.encoder_rnn = dy.GRUBuilder(NUM_LAYERS, EMBEDDING_SIZE, lstm_size, self.model) self.encoder_rnn.set_dropout(dropout) self.encoder_rnn2 = dy.GRUBuilder(NUM_LAYERS, EMBEDDING_SIZE, lstm_size, self.model) self.encoder_rnn2.set_dropout(dropout) self.decoder_rnn = dy.GRUBuilder(NUM_LAYERS, EMBEDDING_SIZE + lstm_size, lstm_size, self.model) self.decoder_rnn.set_dropout(dropout) else: self.encoder_rnn = dy.LSTMBuilder(NUM_LAYERS, EMBEDDING_SIZE, lstm_size, self.model) self.encoder_rnn.set_dropout(dropout) self.encoder_rnn2 = dy.LSTMBuilder(NUM_LAYERS, EMBEDDING_SIZE, lstm_size, self.model) self.encoder_rnn2.set_dropout(dropout) self.decoder_rnn = dy.LSTMBuilder(NUM_LAYERS, EMBEDDING_SIZE + lstm_size, lstm_size, self.model) self.decoder_rnn.set_dropout(dropout) global DROPOUT DROPOUT = dropout self.W1 = self.model.add_parameters((200, lstm_size)) self.b1 = self.model.add_parameters((200, 1)) self.W2 = self.model.add_parameters((100, 200)) self.b2 = self.model.add_parameters((100, 1)) self.W3 = self.model.add_parameters((len(self.C2I), 100)) self.b3 = self.model.add_parameters((len(self.C2I), 1)) self.W_query = self.model.add_parameters((lstm_size, lstm_size)) self.W_key = self.model.add_parameters((lstm_size, lstm_size)) self.W_val = self.model.add_parameters((lstm_size, lstm_size)) self.W_att = self.model.add_parameters((1, EMBEDDING_SIZE)) self.W_c_s = self.model.add_parameters((lstm_size, EMBEDDING_SIZE)) self.W_direct = self.model.add_parameters((len(self.C2I), lstm_size)) self.b_att = self.model.add_parameters((lstm_size, 1)) self.b_direct = self.model.add_parameters((len(self.C2I), 1)) self.E_lang = self.model.add_lookup_parameters((7, EMBEDDING_SIZE)) if optimizer == "sgd": self.trainer = dy.SimpleSGDTrainer(self.model) elif optimizer == "rms": self.trainer = dy.RMSPropTrainer(self.model) if optimizer == "cyclic": self.trainer = dy.CyclicalSGDTrainer(self.model) elif optimizer == "adam": self.trainer = dy.AdamTrainer(self.model) else: self.trainer = dy.AdagradTrainer(self.model)
def training(model, train_set, val_set, val_ints, args, fsa_builder=None): """ Training function given a training and validation set. Inputs: train_set (list of examples): List of training examples. val_set (list of examples): List of validation examples. fsa_builder (ExecutableFSA): Builder for the FSA. """ trainer = dy.RMSPropTrainer(model.get_params()) trainer.set_clip_threshold(1) best_val_accuracy = 0.0 best_token_accuracy = 0.0 best_model = None patience = args.patience countdown = patience for epoch in range(args.max_epochs): token_acc, int_acc = do_one_epoch(model, train_set, val_set, val_ints, fsa_builder, args, epoch, trainer) # Save model. model_file_name = '%s/model-epoch%d.dy' % (args.logdir, epoch) model.save_params(model_file_name) # Stopping. if token_acc > best_token_accuracy: best_token_accuracy = token_acc patience *= 1.005 countdown = patience print('Validation token accuracy increased to ' + str(token_acc)) print('Countdown reset and patience set to %f' % (patience)) else: countdown -= 1 if int_acc > best_val_accuracy or best_model is None: best_model = model_file_name best_val_accuracy = int_acc print('Interaction accuracy increased to ' + str(int_acc)) if countdown <= 0: print('Patience ran out -- stopping') break print('Loading parameters from best model: %s' % (best_model)) model.load_params(best_model)
def set_trainer(self, optimization): if optimization == 'MomentumSGD': self.trainer = dy.MomentumSGDTrainer( self.model, learning_rate=self.hp.learning_rate) if optimization == 'CyclicalSGD': self.trainer = dy.CyclicalSGDTrainer( self.model, learning_rate_max=self.hp.learning_rate_max, learning_rate_min=self.hp.learning_rate_min) if optimization == 'Adam': self.trainer = dy.AdamTrainer(self.model) if optimization == 'RMSProp': self.trainer = dy.RMSPropTrainer(self.model) else: # 'SimpleSGD' self.trainer = dy.SimpleSGDTrainer( self.model, learning_rate=self.hp.learning_rate)
def train(args, builder, params): trainer = dynet.RMSPropTrainer(params, args.learning_rate) trainer.set_clip_threshold(args.clip_threshold) for group_no in range(args.iterations): print('batch group #%d...' % (group_no + 1)) batch_group_loss = 0.0 for batch_no in range(args.batch_group_size): # Sample a new batch of training data length = random.randint(*args.training_length_range) batch = [ random_sequence(length, args.source_alphabet_size) for i in range(args.batch_size) ] # Arrange the input and output halves of the sequences into batches # of individual symbols input_sequence_batch = transpose(s.input_sequence() for s in batch) output_sequence_batch = transpose(s.output_sequence() for s in batch) # Start building the computation graph for this batch dynet.renew_cg() state = builder.initial_state(args.batch_size) # Feed everything up to the separator symbol into the model; ignore # outputs for symbol_batch in input_sequence_batch: index_batch = [input_symbol_to_index(s) for s in symbol_batch] state = state.next(index_batch, StackLSTMBuilder.INPUT_MODE) # Feed the rest of the sequence into the model and sum up the loss # over the predicted symbols symbol_losses = [] for symbol_batch in output_sequence_batch: index_batch = [output_symbol_to_index(s) for s in symbol_batch] symbol_loss = dynet.pickneglogsoftmax_batch( state.output(), index_batch) symbol_losses.append(symbol_loss) state = state.next(index_batch, StackLSTMBuilder.OUTPUT_MODE) loss = dynet.sum_batches(dynet.esum(symbol_losses)) # Forward pass loss_value = loss.value() batch_group_loss += loss_value # Backprop loss.backward() # Update parameters trainer.update() avg_loss = batch_group_loss / (args.batch_size * args.batch_group_size) print(' average loss: %0.2f' % avg_loss)
def __init__(self, model, optim='sgd', clip=5, mom=0.9, **kwargs): super(ClassifyTrainerDynet, self).__init__() self.model = model eta = kwargs.get('eta', kwargs.get('lr', 0.01)) print("Using eta [{:.4f}]".format(eta)) print("Using optim [{}]".format(optim)) self.labels = model.labels if optim == 'adadelta': self.optimizer = dy.AdadeltaTrainer(model.pc) elif optim == 'adam': self.optimizer = dy.AdamTrainer(model.pc) elif optim == 'rmsprop': self.optimizer = dy.RMSPropTrainer(model.pc, learning_rate=eta) else: print("using mom {:.3f}".format(mom)) self.optimizer = dy.MomentumSGDTrainer(model.pc, learning_rate=eta, mom=mom) self.optimizer.set_clip_threshold(clip)
def optimizer(model, optim='sgd', eta=0.01, clip=None, mom=0.9, **kwargs): if 'lr' in kwargs: eta = kwargs['lr'] print('Using eta [{:.4f}]'.format(eta)) print('Using optim [{}]'.format(optim)) if optim == 'adadelta': opt = dy.AdadeltaTrainer(model.pc) elif optim == 'adam': opt = dy.AdamTrainer(model.pc) elif optim == 'rmsprop': opt = dy.RMSPropTrainer(model.pc, learning_rate=eta) else: if mom == 0 or mom is None: opt = dy.SimpleSGDTrainer(model.pc, learning_rate=eta) else: print('Using mom {:.3f}'.format(mom)) opt = dy.MomentumSGDTrainer(model.pc, learning_rate=eta, mom=mom) if clip is not None: opt.set_clip_threshold(clip) opt.set_sparse_updates(False) return opt
def mse_loss(predictions, target): diff = predictions - target square = dy.square(diff) mean = dy.mean_elems(square) return mean m = dy.ParameterCollection() W = m.add_parameters((1, 1)) b = m.add_parameters((1, )) dy.renew_cg() optimizer = dy.RMSPropTrainer(m) BATCH_SIZE = 250 EPOCHS = 20000 TARGET_UPDATE = 1 x, y = generate_data() # Training loop losses = list() for epoch in range(EPOCHS): # Sample a minibatch indices = random.choice(5000, BATCH_SIZE, False) mb_x, mb_y = x[indices], y[indices] mb_x = x
def reinforcement_learning(model, train_set, val_set, val_interactions, log_dir, fsa_builder, reward_fn, entropy_function, args, batch_size=1, epochs=20, single_head=True, explore_with_fsa=False): """Performs training with exploration. Inputs: model (Model): Model to train. train_set (list of Examples): The set of training examples. val_set (list of Examples): The set of validation examples. val_interactions (list of Interactions): Full interactions for validation. log_dir (str): Location to log. """ trainer = dy.RMSPropTrainer(model.get_params()) trainer.set_clip_threshold(1) mode = get_rl_mode(args.rl_mode) best_val_accuracy = 0.0 best_val_reward = -float('inf') best_model = None try: from pycrayon import CrayonClient crayon = CrayonClient(hostname="localhost") experiment = crayon.create_experiment(log_dir) except ValueError or ImportError: print( "If you want to use Crayon, please use `pip install pycrayon` to install it. " ) experiment = None num_batches = 0 train_file = open(os.path.join(log_dir, "train.log"), "w") patience = args.patience countdown = patience for epoch in range(epochs): random.shuffle(train_set) batches = chunks(train_set, batch_size) num_examples = 0 num_tokens = 0 num_tokens_zero = 0 progbar = progressbar.ProgressBar(maxval=len(batches), widgets=[ "Epoch " + str(epoch), progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage(), ' ', progressbar.ETA() ]) progbar.start() for i, batch in enumerate(batches): dy.renew_cg() prob_seqs, predictions = model.sample_sequences( batch, length=args.sample_length_limit, training=True, fsa_builder=fsa_builder) batch_entropy_sum = dy.inputTensor([0.]) batch_rewards = [] processed_predictions = [] train_file.write("--- NEW BATCH # " + str(num_batches) + " ---\n") action_probabilities = {} for action in model.output_action_vocabulary: if action != BEG: action_probabilities[action] = [] for example, prob_seq, prediction in zip(batch, prob_seqs, predictions): # Get reward (and other evaluation information) prediction = process_example(example, prediction, prob_seq, reward_fn, entropy_function, model, args, fsa_builder) for distribution in prob_seq: action_probability = model.action_probabilities( distribution) for action, prob_exp in action_probability.items(): action_probabilities[action].append(prob_exp) batch_rewards.extend(prediction.reward_expressions) batch_entropy_sum += dy.esum(prediction.entropies) processed_predictions.append(prediction) num_examples += 1 # Now backpropagate given these rewards batch_action_probabilities = {} for action, prob_exps in action_probabilities.items(): batch_action_probabilities[action] = dy.esum(prob_exps) / len( batch_rewards) num_reward_exps = len(batch_rewards) loss = dy.esum(batch_rewards) if args.entropy_coefficient > 0: loss += args.entropy_coefficient * batch_entropy_sum loss = -loss / num_reward_exps loss.backward() try: trainer.update() except RuntimeError as r: print(loss.npvalue()) for lookup_param in model._pc.lookup_parameters_list(): print(lookup_param.name()) print(lookup_param.grad_as_array()) for param in model._pc.parameters_list(): print(param.name()) print(param.grad_as_array()) print(r) exit() # Calculate metrics stop_tok = (EOS if single_head else (EOS, NO_ARG, NO_ARG)) per_token_metrics = compute_metrics(processed_predictions, num_reward_exps, ["entropy", "reward"], args) gold_token_metrics = compute_metrics( processed_predictions, sum([len(ex.actions) for ex in batch]) + len(batch), ["gold_probability"], args, model=model) per_example_metrics = compute_metrics( processed_predictions, len(batch), [ "distance", "completion", "invalid", "num_tokens", "prefix_length" ], args, model=model) for prediction in processed_predictions: train_file.write(str(prediction) + "\n") train_file.write("=====\n") log_metrics({"loss": loss.npvalue()[0]}, train_file, experiment, num_batches) log_metrics(per_token_metrics, train_file, experiment, num_batches) log_metrics(gold_token_metrics, train_file, experiment, num_batches) log_metrics(per_example_metrics, train_file, experiment, num_batches) train_file.flush() num_batches += 1 progbar.update(i) progbar.finish() train_acc, _, _ = utterance_accuracy(model, train_set, fsa_builder=fsa_builder, logfile=log_dir + "/rl-train" + str(epoch) + ".log") val_acc, val_reward, _ = utterance_accuracy( model, val_set, fsa_builder=fsa_builder, logfile=log_dir + "/rl-val-" + str(epoch) + ".log", args=args, reward_function=reward_fn) val_int_acc = interaction_accuracy(model, val_interactions, fsa_builder=fsa_builder, logfile=log_dir + "/rl-val-int-" + str(epoch) + ".log") log_metrics( { "train_accuracy": train_acc, "validation_accuracy": val_acc, "validation_int_acc": val_int_acc, "validation_reward": val_reward, "countdown": countdown }, train_file, experiment, num_batches) if experiment is not None: experiment.to_zip( os.path.join(log_dir, "crayon-" + str(epoch) + ".zip")) model_file_name = log_dir + "/model-rl-epoch" + str(epoch) + ".dy" model.save_params(model_file_name) if val_int_acc > best_val_accuracy or best_model is None: best_model = model_file_name best_val_accuracy = val_int_acc if val_reward > best_val_reward: patience *= 1.005 countdown = patience best_val_reward = val_reward else: countdown -= 1 if countdown <= 0: print("Patience ran out -- stopping") break train_file.close() print('Loading parameters from best model: %s' % (best_model)) model.load_params(best_model) model.save_params(log_dir + "/best_rl_model.dy") print(train_set[0]) print( model.generate(train_set[0].utterance, train_set[0].initial_state, train_set[0].history)[0])
def train(opt): # Load data ========================================================= if opt.verbose: print('Reading corpora') # Read vocabs if opt.dic_src: widss, ids2ws = data.load_dic(opt.dic_src) else: widss, ids2ws = data.read_dic(opt.train_src, max_size=opt.src_vocab_size) data.save_dic(opt.exp_name + '_src_dic.txt', widss) if opt.dic_dst: widst, ids2wt = data.load_dic(opt.dic_dst) else: widst, ids2wt = data.read_dic(opt.train_dst, max_size=opt.trg_vocab_size) data.save_dic(opt.exp_name + '_trg_dic.txt', widst) # Read training trainings_data = data.read_corpus(opt.train_src, widss) trainingt_data = data.read_corpus(opt.train_dst, widst) # Read validation valids_data = data.read_corpus(opt.valid_src, widss) validt_data = data.read_corpus(opt.valid_dst, widst) # Create model ====================================================== if opt.verbose: print('Creating model') sys.stdout.flush() s2s = seq2seq.Seq2SeqModel(opt.emb_dim, opt.hidden_dim, opt.att_dim, widss, widst, model_file=opt.model, bidir=opt.bidir, word_emb=opt.word_emb, dropout=opt.dropout_rate, max_len=opt.max_len) if s2s.model_file is not None: s2s.load() s2s.model_file = opt.exp_name+'_model.txt' # Trainer ========================================================== if opt.trainer == 'sgd': trainer = dy.SimpleSGDTrainer( s2s.model, e0=opt.learning_rate, edecay=opt.learning_rate_decay) if opt.trainer == 'clr': trainer = dy.CyclicalSGDTrainer(s2s.model, e0_min=opt.learning_rate / 10, e0_max=opt.learning_rate, edecay=opt.learning_rate_decay) elif opt.trainer == 'momentum': trainer = dy.MomentumSGDTrainer( s2s.model, e0=opt.learning_rate, edecay=opt.learning_rate_decay) elif opt.trainer == 'rmsprop': trainer = dy.RMSPropTrainer(s2s.model, e0=opt.learning_rate, edecay=opt.learning_rate_decay) elif opt.trainer == 'adam': trainer = dy.AdamTrainer(s2s.model, opt.learning_rate, edecay=opt.learning_rate_decay) else: print('Trainer name invalid or not provided, using SGD', file=sys.stderr) trainer = dy.SimpleSGDTrainer( s2s.model, e0=opt.learning_rate, edecay=opt.learning_rate_decay) if opt.verbose: print('Using '+opt.trainer+' optimizer') trainer.set_clip_threshold(opt.gradient_clip) # Print configuration =============================================== if opt.verbose: options.print_config(opt, src_dict_size=len(widss), trg_dict_size=len(widst)) sys.stdout.flush() # Creat batch loaders =============================================== if opt.verbose: print('Creating batch loaders') sys.stdout.flush() trainbatchloader = data.BatchLoader(trainings_data, trainingt_data, opt.batch_size) devbatchloader = data.BatchLoader(valids_data, validt_data, opt.dev_batch_size) # Start training ==================================================== if opt.verbose: print('starting training') sys.stdout.flush() start = time.time() train_loss = 0 processed = 0 best_bleu = 0 i = 0 for epoch in range(opt.num_epochs): for x, y in trainbatchloader: processed += sum(map(len, y)) bsize = len(y) # Compute loss loss = s2s.calculate_loss(x, y) # Backward pass and parameter update loss.backward() trainer.update() train_loss += loss.scalar_value() * bsize if (i+1) % opt.check_train_error_every == 0: # Check average training error from time to time logloss = train_loss / processed ppl = np.exp(logloss) elapsed = time.time()-start trainer.status() print(" Training_loss=%f, ppl=%f, time=%f s, tokens processed=%d" % (logloss, ppl, elapsed, processed)) start = time.time() train_loss = 0 processed = 0 sys.stdout.flush() if (i+1) % opt.check_valid_error_every == 0: # Check generalization error on the validation set from time to time dev_loss = 0 dev_processed = 0 dev_start = time.time() for x, y in devbatchloader: dev_processed += sum(map(len, y)) bsize = len(y) loss = s2s.calculate_loss(x, y, test=True) dev_loss += loss.scalar_value() * bsize dev_logloss = dev_loss/dev_processed dev_ppl = np.exp(dev_logloss) dev_elapsed = time.time()-dev_start print("[epoch %d] Dev loss=%f, ppl=%f, time=%f s, tokens processed=%d" % (epoch, dev_logloss, dev_ppl, dev_elapsed, dev_processed)) sys.stdout.flush() start = time.time() if (i+1) % opt.valid_bleu_every == 0: # Check BLEU score on the validation set from time to time print('Start translating validation set, buckle up!') sys.stdout.flush() bleu_start = time.time() with open(opt.valid_out, 'w+') as f: for x in valids_data: y_hat = s2s.translate(x, beam_size=opt.beam_size) translation = [ids2wt[w] for w in y_hat[1:-1]] print(' '.join(translation), file=f) bleu, details = evaluation.bleu_score(opt.valid_dst, opt.valid_out) bleu_elapsed = time.time()-bleu_start print('Finished translating validation set', bleu_elapsed, 'elapsed.') print(details) # Early stopping : save the latest best model if bleu > best_bleu: best_bleu = bleu print('Best BLEU score up to date, saving model to', s2s.model_file) s2s.save() sys.stdout.flush() start = time.time() i = i+1 trainer.update_epoch()
def __init__(self, word_count, tag_count, word_dims, tag_dims, lstm_units, hidden_units, struct_out, label_out, droprate=0, struct_spans=4, label_spans=3, optimizer=1): self.word_count = word_count self.tag_count = tag_count self.word_dims = word_dims self.tag_dims = tag_dims self.lstm_units = lstm_units self.hidden_units = hidden_units self.struct_out = struct_out self.label_out = label_out self.droprate = droprate self.model = dynet.Model() if optimizer == 1: self.trainer = dynet.SimpleSGDTrainer(self.model) elif optimizer == 2: self.trainer = dynet.MomentumSGDTrainer(self.model) elif optimizer == 3: self.trainer = dynet.AdagradTrainer(self.model, learning_rate=0.01, eps=0.001) elif optimizer == 4: self.trainer = dynet.RMSPropTrainer(self.model) elif optimizer == 5: self.trainer = dynet.AdamTrainer(self.model) random.seed(1) self.activation = dynet.rectify self.word_embed = self.model.add_lookup_parameters( (word_count, word_dims), ) self.tag_embed = self.model.add_lookup_parameters( (tag_count, tag_dims), ) self.fwd_lstm1 = LSTM(word_dims + tag_dims, lstm_units, self.model) self.back_lstm1 = LSTM(word_dims + tag_dims, lstm_units, self.model) self.fwd_lstm2 = LSTM(2 * lstm_units, lstm_units, self.model) self.back_lstm2 = LSTM(2 * lstm_units, lstm_units, self.model) self.struct_hidden_W = self.model.add_parameters( (hidden_units, 4 * struct_spans * lstm_units), dynet.UniformInitializer(0.01), ) self.struct_hidden_b = self.model.add_parameters( (hidden_units, ), dynet.ConstInitializer(0), ) self.struct_output_W = self.model.add_parameters( (struct_out, hidden_units), dynet.ConstInitializer(0), ) self.struct_output_b = self.model.add_parameters( (struct_out, ), dynet.ConstInitializer(0), ) self.label_hidden_W = self.model.add_parameters( (hidden_units, 4 * label_spans * lstm_units), dynet.UniformInitializer(0.01), ) self.label_hidden_b = self.model.add_parameters( (hidden_units, ), dynet.ConstInitializer(0), ) self.label_output_W = self.model.add_parameters( (label_out, hidden_units), dynet.ConstInitializer(0), ) self.label_output_b = self.model.add_parameters( (label_out, ), dynet.ConstInitializer(0), )