def run_epoch(self, session, train_set, train_raw, epoch):
     total_batches = int(len(train_set) / self.config.batch_size)
     train_minibatches = minibatches(train_set, self.config.batch_size,
                                     self.config.dataset)
     training_loss = 0.0
     training_accuracy = 0.0
     infer_label = []
     prediction_all = []
     for batch in tqdm(train_minibatches,
                       desc="Trainings",
                       total=total_batches):
         if len(batch[0]) != self.config.batch_size:
             continue
         session.run(self.model.inc_step)
         loss, accuracy, summary, global_step, infer_label_batch, prediction = self.train_single_batch(
             session, *batch)
         _ = [infer_label.append(x) for x in infer_label_batch]
         _ = [prediction_all.append(x) for x in prediction]
         self.train_writer.add_summary(summary, global_step)
         self.saver_embed.save(session, './temp/embedding_test.ckpt', 1)
         training_accuracy += accuracy
         training_loss += loss
     training_loss = training_loss / total_batches
     training_accuracy = training_accuracy / total_batches
     print(
         classification_report(infer_label,
                               prediction_all,
                               target_names=['can\'t', 'can']))
     score = f1_score(y_true=infer_label, y_pred=prediction_all)
     print("Loss", training_loss)
     print("F1_score", score)
     return score
 def validate(self, session, validation_set, validation_raw, epoch):
     total_batches = int(len(validation_set) / self.config.batch_size)
     validation_accuracy = 0.0
     validation_loss = 0.0
     infer_label = []
     prediction_all = []
     validate_minibatches = minibatches(validation_set,
                                        self.config.batch_size,
                                        self.config.dataset)
     for batch in tqdm(validate_minibatches,
                       total=total_batches,
                       desc="Validate"):
         if len(batch[0]) != self.config.batch_size:
             continue
         loss, accuracy, summary, global_step, infer_label_batch, prediction = self.validate_single_batch(
             session, *batch)
         self.valid_writer.add_summary(summary, global_step)
         validation_accuracy += accuracy
         validation_loss += loss
         _ = [infer_label.append(x) for x in infer_label_batch]
         _ = [prediction_all.append(x) for x in prediction]
     validation_loss = validation_loss / total_batches
     validation_accuracy = validation_accuracy / total_batches
     print(
         classification_report(infer_label,
                               prediction_all,
                               target_names=['can\'t', 'can']))
     score = f1_score(y_true=infer_label,
                      y_pred=prediction_all,
                      average='weighted')
     print("Loss", validation_loss)
     print("F1_score", score)
     return score
 def validate(self, session, dataset):
     batch_num = int(np.ceil(len(dataset) * 1.0 / self.config.batch_size))
     valid_minibatch = minibatches(dataset, self.config.batch_size)
     valid_loss = 0
     valid_accuracy = 0
     for i, batch in enumerate(valid_minibatch):
         loss, accuracy, prediction = self.test(session, batch)
         valid_loss += loss
         valid_accuracy += accuracy
     valid_loss = valid_loss / self.config.batch_size
     valid_accuracy = valid_accuracy / self.config.batch_size
     return valid_loss, valid_accuracy
Exemple #4
0
    def run_epoch(self, sess, train_examples, dev_set, train_examples_raw, dev_set_raw):
        prog = Progbar(target=1 + int(len(train_examples) / self.config.batch_size))
        for i, batch in enumerate(minibatches(train_examples, self.config.batch_size)):
            loss = self.train_on_batch(sess, *batch)
            prog.update(i + 1, [("train loss", loss)])
            if self.report: self.report.log_train_loss(loss)
        print("")

        logger.info("Evaluating on development data")
        token_cm, entity_scores = self.evaluate(sess, dev_set, dev_set_raw)
        logger.debug("Token-level confusion matrix:\n" + token_cm.as_table())
        logger.debug("Token-level scores:\n" + token_cm.summary())
        logger.info("Entity level P/R/F1: %.2f/%.2f/%.2f", *entity_scores)

        f1 = entity_scores[-1]
        return f1
Exemple #5
0
    def output(self, sess, inputs_raw, inputs=None):
        """
        Reports the output of the model on examples (uses helper to featurize each example).
        """
        if inputs is None:
            inputs = self.preprocess_sequence_data(self.helper.vectorize(inputs_raw))

        preds = []
        prog = Progbar(target=1 + int(len(inputs) / self.config.batch_size))
        for i, batch in enumerate(minibatches(inputs, self.config.batch_size, shuffle=False)):
            # Ignore predict
            batch = batch[:1] + batch[2:]
            preds_ = self.predict_on_batch(sess, *batch)
            preds += list(preds_)
            prog.update(i + 1, [])
        return self.consolidate_predictions(inputs_raw, inputs, preds)
 def run_epoch(self, sess, train_set, valid_set, train_raw, valid_raw,
               epoch):
     train_minibatch = minibatches(train_set, self.config.batch_size)
     global_loss = 0
     global_accuracy = 0
     set_num = len(train_set)
     batch_size = self.config.batch_size
     batch_count = int(np.ceil(set_num * 1.0 / batch_size))
     for i, batch in enumerate(train_minibatch):
         loss, summary = self.train_on_batch(sess, *batch)
         self.writer.add_summary(summary, epoch * batch_count + i)
         print("Loss-", loss)
         #logging.info('-' + "EVALUATING ON TRAINING" + '-')
         train_dataset = [train_set, train_raw]
         train_score = self.evaluate_answer(sess, train_dataset)
         #print("training-accuracy",train_score)
         #logging.info('-' + "EVALUATING ON VALIDATION" + '-')
         valid_dataset = [train_set, train_raw]
         score = self.evaluate_answer(sess, valid_dataset)
         #print("validation-accuracy",score)
         global_loss += loss
     return global_loss, summary
 def predict_on_batch(self, session, dataset):
     predict_minibatch = minibatches(dataset, self.config.batch_size)
     preds = []
     for i, batch in enumerate(predict_minibatch):
         preds.append(self.answer(session, batch))
     return preds