def training_start(self, model, data):

        training_start_time = time.time()
        logger.info("Start training")

        # Print a model summary to make sure everything is ok with it
        model_summary = torch_summarize(model)
        logger.debug(model_summary)

        evaluator = BaseEvaluator(self.config)
        logger.debug("Preparing training data")

        train_batches = data.prepare_training_data(data.train, self.batch_size)
        dev_batches = data.prepare_training_data(data.dev, self.batch_size)

        id2word = data.vocab.id2tok
        dev_lexicalizations = data.lexicalizations['dev']
        dev_multi_ref_fn = '%s.multi-ref' % data.fnames['dev']

        self.set_optimizer(model, self.config['optimizer'])
        self.set_train_criterion(len(id2word), PAD_ID)
        #print(data.dev[0])
        #exit()
        # Moving the model to GPU, if available
        if self.use_cuda:
            model = model.cuda()

        for epoch_idx in range(1, self.n_epochs + 1):

            epoch_start = time.time()
            pred_fn = os.path.join(self.model_dir, 'predictions.epoch%d' % epoch_idx)

            train_loss = self.train_epoch(epoch_idx, model, train_batches)
            dev_loss = self.compute_val_loss(model, dev_batches)

            predicted_ids, attention_weights = evaluator.evaluate_model(model, data.dev[0], data.uni_mr['dev'])
            predicted_tokens = evaluator.lexicalize_predictions(predicted_ids,
                                                                dev_lexicalizations,
                                                                id2word)

            save_predictions_txt(predicted_tokens, pred_fn)
            self.record_loss(train_loss, dev_loss)

            if self.evaluate_prediction:
                self.run_external_eval(dev_multi_ref_fn, pred_fn)

            if self.save_model:
                save_model(model, os.path.join(self.model_dir, 'weights.epoch%d' % epoch_idx))

            logger.info('Epoch %d/%d: time=%s' % (epoch_idx, self.n_epochs, asMinutes(time.time() - epoch_start)))

        self.plot_lcurve()

        if self.evaluate_prediction:
            score_fname = os.path.join(self.model_dir, 'scores.csv')
            scores = self.get_scores_to_save()
            save_scores(scores, self.score_file_header, score_fname)
            self.plot_training_results()

        logger.info('End training time=%s' % (asMinutes(time.time() - training_start_time)))
Exemple #2
0
    def setup(self):
        self.set_flags()
        self.set_embeddings()
        self.set_encoder()
        self.set_decoder()
        self.set_output_layer()

        # Print a model summary to make sure everything is as planned
        model_summary = torch_summarize(self)
        logger.debug('Model summary:\n %s', model_summary)
Exemple #3
0
    def setup(self):

        assert self.src_vocabsize is not None, \
            logger.error('Set data-dependent params first!')

        self.embedding_dim = self.config["embedding_dim"]
        self.embedding_mat = get_embed_matrix(self.src_vocabsize,
                                              self.embedding_dim)

        # At runtime: after looking up numerical ids in the embedding matrix, we will end up with a 3D tensor.
        # We reshape it into a 2D matrix with a shape = (-1, self.embedding_dim * self.num_embedding_feats)
        # This reshaped vector is further fed to a projection layer which reduces the dimension of the embeddings
        self.embedding_projection_dim = self.config.get(
            'embedding_proj_dim', self.embedding_dim // 4)
        self.embedding_projection_layer = nn.Linear(
            self.num_embedding_feats * self.embedding_dim,
            self.num_embedding_feats * self.embedding_projection_dim)

        # After embeddings are projected onto a lower dimensional space,
        # we can squeeze them further by feeding to dense layers
        self.dense1_dim = self.config["dense1_dim"]
        self.dense1_layer = nn.Linear(
            self.num_embedding_feats * self.embedding_projection_dim,
            self.dense1_dim)

        # activation function for dense1_layer
        self.lrelu1 = nn.LeakyReLU()

        # Output layer
        # Since it is a binary prediction task -> one unit with sigmoid function on top
        self.out_layer = nn.Linear(self.dense1_dim, self.output_size)
        self.softmax = nn.LogSoftmax(dim=1)

        # Print a model summary to make sure everything is ok with it
        model_summary = torch_summarize(self)
        logger.debug('Model summary:\n %s', model_summary)