def training_start(self, model, data): training_start_time = time.time() logger.info("Start training") # Print a model summary to make sure everything is ok with it model_summary = torch_summarize(model) logger.debug(model_summary) evaluator = BaseEvaluator(self.config) logger.debug("Preparing training data") train_batches = data.prepare_training_data(data.train, self.batch_size) dev_batches = data.prepare_training_data(data.dev, self.batch_size) id2word = data.vocab.id2tok dev_lexicalizations = data.lexicalizations['dev'] dev_multi_ref_fn = '%s.multi-ref' % data.fnames['dev'] self.set_optimizer(model, self.config['optimizer']) self.set_train_criterion(len(id2word), PAD_ID) #print(data.dev[0]) #exit() # Moving the model to GPU, if available if self.use_cuda: model = model.cuda() for epoch_idx in range(1, self.n_epochs + 1): epoch_start = time.time() pred_fn = os.path.join(self.model_dir, 'predictions.epoch%d' % epoch_idx) train_loss = self.train_epoch(epoch_idx, model, train_batches) dev_loss = self.compute_val_loss(model, dev_batches) predicted_ids, attention_weights = evaluator.evaluate_model(model, data.dev[0], data.uni_mr['dev']) predicted_tokens = evaluator.lexicalize_predictions(predicted_ids, dev_lexicalizations, id2word) save_predictions_txt(predicted_tokens, pred_fn) self.record_loss(train_loss, dev_loss) if self.evaluate_prediction: self.run_external_eval(dev_multi_ref_fn, pred_fn) if self.save_model: save_model(model, os.path.join(self.model_dir, 'weights.epoch%d' % epoch_idx)) logger.info('Epoch %d/%d: time=%s' % (epoch_idx, self.n_epochs, asMinutes(time.time() - epoch_start))) self.plot_lcurve() if self.evaluate_prediction: score_fname = os.path.join(self.model_dir, 'scores.csv') scores = self.get_scores_to_save() save_scores(scores, self.score_file_header, score_fname) self.plot_training_results() logger.info('End training time=%s' % (asMinutes(time.time() - training_start_time)))
def setup(self): self.set_flags() self.set_embeddings() self.set_encoder() self.set_decoder() self.set_output_layer() # Print a model summary to make sure everything is as planned model_summary = torch_summarize(self) logger.debug('Model summary:\n %s', model_summary)
def setup(self): assert self.src_vocabsize is not None, \ logger.error('Set data-dependent params first!') self.embedding_dim = self.config["embedding_dim"] self.embedding_mat = get_embed_matrix(self.src_vocabsize, self.embedding_dim) # At runtime: after looking up numerical ids in the embedding matrix, we will end up with a 3D tensor. # We reshape it into a 2D matrix with a shape = (-1, self.embedding_dim * self.num_embedding_feats) # This reshaped vector is further fed to a projection layer which reduces the dimension of the embeddings self.embedding_projection_dim = self.config.get( 'embedding_proj_dim', self.embedding_dim // 4) self.embedding_projection_layer = nn.Linear( self.num_embedding_feats * self.embedding_dim, self.num_embedding_feats * self.embedding_projection_dim) # After embeddings are projected onto a lower dimensional space, # we can squeeze them further by feeding to dense layers self.dense1_dim = self.config["dense1_dim"] self.dense1_layer = nn.Linear( self.num_embedding_feats * self.embedding_projection_dim, self.dense1_dim) # activation function for dense1_layer self.lrelu1 = nn.LeakyReLU() # Output layer # Since it is a binary prediction task -> one unit with sigmoid function on top self.out_layer = nn.Linear(self.dense1_dim, self.output_size) self.softmax = nn.LogSoftmax(dim=1) # Print a model summary to make sure everything is ok with it model_summary = torch_summarize(self) logger.debug('Model summary:\n %s', model_summary)