Beispiel #1
0
    def run_epoch(self, sess, train_examples, dev_set, epoc_num):
        """ Run a single epoch on the given train_examples. 
            Then evaluate on the dev_set.
            Both train_examples and dev_set should be of format:
            [headlines, articles, labels]
        """
        # Train the Epoch
        prog = Progbar(target=1 +
                       len(train_examples[0]) / self.config.batch_size)
        for i, (headlines_batch, articles_batch, h_seq_lengths, a_seq_lengths,
                labels_batch) in enumerate(
                    minibatches(train_examples, self.config.batch_size)):
            loss, summary_str = self.train_on_batch(sess, headlines_batch,
                                                    articles_batch,
                                                    h_seq_lengths,
                                                    a_seq_lengths,
                                                    labels_batch)
            self.summary_writer.add_summary(summary_str, (epoc_num + 1) *
                                            len(train_examples[0]) /
                                            self.config.batch_size)
            prog.update(i + 1, [("train loss", loss)])

        # Evaluate on the Dev Set
        print "\nEvaluating on dev set"
        dev_score, _, dev_confusion_matrix_str = self.predict(sess, dev_set)
        print "- dev Score: {:.2f}".format(dev_score)
        self.dev_scores.append(dev_score)

        print "\nEvaluating on train set"
        train_score, _, train_confusion_matrix_str = self.predict(
            sess, train_examples)
        print "- train Score: {:.2f}".format(train_score)
        self.train_scores.append(train_score)
        return dev_score, train_score, dev_confusion_matrix_str, train_confusion_matrix_str
Beispiel #2
0
    def predict(self, sess, data_set, save_preds=False, UseShuffle=True):
        """ Compute predictions on a given data set.
            data_set = [headlines, articles, labels]
            Return predictions and score
        """
        # Compute Predictions
        if data_set[0].size == 0:
            return 0, []

        prog = Progbar(target=1 + len(data_set[0]) / self.config.batch_size)
        actual = []
        preds = []
        for i, (headlines_batch, articles_batch, h_seq_lengths, a_seq_lengths,
                labels_batch) in enumerate(
                    minibatches(data_set, self.config.batch_size, UseShuffle)):
            predictions_batch = list(
                self.predict_on_batch(sess, headlines_batch, articles_batch,
                                      h_seq_lengths, a_seq_lengths))
            preds.extend(predictions_batch)
            actual.extend(vectorize_stances(labels_batch))
            prog.update(i + 1)

        if save_preds:
            print "Writing predictions to {}".format(self.preds_fn)
            with open(self.preds_fn, 'w') as f:
                cPickle.dump(preds, f, -1)

        # Compute Score
        score, confusion_matrix_str = self.scoring_function(actual, preds)
        return score, preds, confusion_matrix_str
    def run_epoch(self, sess, train_examples, dev_set, train_examples_raw,
                  dev_set_raw, epoch, last_epoch):

        prog = Progbar(target=1 +
                       int(len(train_examples) / self.config.batch_size))
        curr_loss = 0.
        num_encountered = 0
        for i, batch in enumerate(
                minibatches(train_examples, self.config.batch_size)):
            loss = self.train_on_batch(sess, *batch)
            curr_loss += loss
            num_encountered += 1
            prog.update(i + 1, [("train loss", loss)])
            if self.report: self.report.log_train_loss(loss)
        train_loss.append(curr_loss / num_encountered)
        # print curr_loss/num_encountered
        # epochs.append(epoch+1)
        print("")

        logger.info("Evaluating on development data")
        divergence = self.evaluate(sess, dev_set, dev_set_raw, last_epoch)
        logger.info("KL- divergence: %.2f", divergence)
        dev_loss.append(divergence)

        return divergence
Beispiel #4
0
    def evaluate(self, session, dataset):
        input_feed = {self.train_phase: False}
        output_feed = [self.loss, self.psnr, self.out]

        test, loader = dataset

        total_loss = 0.
        metrics = []

        prog = Progbar(target=(len(test) - 1) / self.flags.batch_size + 1)
        for i, batch in enumerate(
                get_minibatches(test, self.flags.batch_size, shuffle=False)):
            input_feed[self.im_placeholder] = [loader(b[0]) for b in batch]
            input_feed[self.gt_placeholder] = [loader(b[1]) for b in batch]

            loss, psnr, out = session.run(output_feed, input_feed)
            total_loss += loss * len(batch)
            all_ssim = [
                ssim(im - resid, gt, multichannel=True)
                for resid, im, gt in zip(out, input_feed[self.im_placeholder],
                                         input_feed[self.gt_placeholder])
            ]
            metrics.extend(zip([b[0] for b in batch], psnr, all_ssim))
            prog.update(i + 1, exact=[("total loss", total_loss)])

        return total_loss, metrics
    def test_time_predict(self, sess, test_examples_raw):
        test_examples = self.preprocess_sequence_data(test_examples_raw)

        num_examples = len(test_examples[0])
        num_batches = int(np.ceil(num_examples * 1.0 / self.config.batch_size))
        prog = Progbar(target=num_batches)

        preds = []

        for i, batch in enumerate(self.minibatch(test_examples,
                                                 shuffle=False)):
            # Ignore labels
            sentence1_batch, sentence2_batch, labels_batch = batch
            preds_ = self.test_time_predict_on_batch(sess, sentence1_batch,
                                                     sentence2_batch)
            preds += list(preds_)

            prog.update(i + 1)

        # here we have a list of predictions
        with open('../../final.csv', 'w') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow(['test_id', 'is_duplicate'])
            for i in range(len(preds)):
                writer.writerow([str(i), preds[i]])
            print("Generated new submission.csv")
Beispiel #6
0
def gather_observations(learning_model, expert_model, envname, obs_mean,
                        obs_sdev):

    env = gym.make(envname)
    max_steps = env.spec.timestep_limit
    target_size = 5000
    observations = []
    expert_actions = []
    all_steps = []
    returns = []
    prog = Progbar(target=1 + target_size)
    while len(observations) < target_size:
        resets = 0
        obs = env.reset()
        obsize = obs.shape
        done = False
        totalr = 0.
        steps = 0
        while not done:
            expert_action = expert_model(obs[None, :])
            observations.append(obs)
            std_obs = normalize(obs, obs_mean, obs_sdev)
            expert_actions.append(expert_action)
            action = learning_model.predict(std_obs, batch_size=1)
            obs, r, done, _ = env.step(action)
            steps += 1
            totalr += r
            env.render()
            if steps >= max_steps:
                break
        prog.update(len(observations), [("in the gym", resets)])
        returns.append(totalr)

    return returns, np.array(observations), np.array(expert_actions)
    def run_epoch(self, sess, train_examples, dev_set, test_set):
        """
        Args:
            sess: TFSession
            train_examples: [ numpy array (num_examples, max_length) of all sentence 1,
                        numpy array (num_examples, max_length) of all sentence 2,
                        numpy array (num_examples, ) of all labels ]
            dev_set: same as train_examples, except for the dev set
        Returns:
            avg loss across all minibatches
        """
        num_examples = len(train_examples[0])
        num_batches = int(np.ceil(num_examples * 1.0 / self.config.batch_size))
        prog = Progbar(target=num_batches)

        total_loss = 0.0

        for i, batch in enumerate(self.minibatch(train_examples,
                                                 shuffle=True)):
            sentence1_batch, sentence2_batch, labels_batch = batch
            feed = self.create_feed_dict(sentence1_batch,
                                         sentence2_batch,
                                         labels_batch,
                                         dropout=self.config.dropout)
            _, loss = sess.run([self.train_op, self.loss], feed_dict=feed)
            total_loss += loss
            prog.update(i + 1, [("train loss", loss)])
        print("")
        return total_loss / num_batches
Beispiel #8
0
    def run_epoch(self, sess, train_examples, dev_examples):
        def eval_helper(sess, examples, msg_prefix):
            token_cm, entity_scores, ratios = self.evaluate(sess, examples)
            logger.debug("Token-level confusion matrix:\n" +
                         token_cm.as_table())
            logger.debug("Token-level scores:\n" + token_cm.summary())
            logger.info("Entity level P/R/F1: %.2f/%.2f/%.2f", *entity_scores)
            logger.info("%s FNC Score: %.2f", msg_prefix, ratios[1])
            logger.info("Unrelated Score: %.2f", ratios[0])
            fnc_score = ratios[1]
            return fnc_score

        prog = Progbar(target=1 +
                       int(len(train_examples) / self.config.batch_size))
        for i, batch in enumerate(
                minibatches(train_examples, self.config.batch_size)):
            if self.config.similarity_metric_feature:
                loss = self.train_on_batch(sess,
                                           batch[0],
                                           batch[1],
                                           batch[2],
                                           sim_scores_batch=batch[3])
            else:
                loss = self.train_on_batch(sess, *batch)
            prog.update(i + 1, [("train loss", loss)])
        print("")

        train_fnc_score = None
        if self.verbose:
            logger.info("Evaluating on training data")
            train_fnc_score = eval_helper(sess, train_examples, "Train")

        logger.info("Evaluating on development data")
        fnc_score = eval_helper(sess, dev_examples, "Dev")
        return train_fnc_score, fnc_score
Beispiel #9
0
    def _run_epoch(self, sess, train, train_labels, dev, dev_labels):
        prog = Progbar(target=1 + int(len(train) / self.config.batch_size))
        for i, batch in enumerate(minibatches(train, self.config.batch_size)):
            feed = self.create_feed_dict(*batch, dropout=self.config.dropout)
            _, loss = sess.run([self.train_op, self.loss], feed_dict=feed)
            prog.update(i + 1, [("train loss", loss)])

            if self.report: self.report.log_train_loss(loss)
        print("")

        logger.info("Evaluating on training data: 10k sample")
        n_train_evaluate = 10000
        train_entity_scores = self._evaluate(sess, train[:n_train_evaluate], train_labels[:n_train_evaluate])
        train_entity_scores = train_entity_scores[:5]
        logger.info("acc/P/R/F1/loss: %.3f/%.3f/%.3f/%.3f/%.4f", *train_entity_scores)

        logger.info("Evaluating on development data")
        entity_scores = self._evaluate(sess, dev, dev_labels, isDev=True)
        entity_scores = entity_scores[:5]
        logger.info("acc/P/R/F1/loss: %.3f/%.3f/%.3f/%.3f/%.4f", *entity_scores)

        with open(self.config.eval_output, 'a') as f:
            f.write('%.4f %.4f %.3f %.3f %.3f %.3f %.3f %.3f %.3f\n' % (train_entity_scores[4], entity_scores[4], train_entity_scores[0], entity_scores[0], train_entity_scores[3], entity_scores[3], entity_scores[0], entity_scores[1], entity_scores[2]))

        f1 = entity_scores[-2]
        return f1
    def run_epoch(self, sess, train_examples, dev_set, train_examples_raw,
                  dev_set_raw):
        prog = Progbar(target=1 +
                       int(len(train_examples) / self.config.batch_size))
        for i, batch in enumerate(
                minibatches(train_examples, self.config.batch_size)):
            loss = self.train_on_batch(sess, *batch)
            prog.update(i + 1, [("train loss", loss)])
            if self.report:
                self.report.log_train_loss(loss)
        print("")

        #logger.info("Evaluating on training data")
        #token_cm, entity_scores = self.evaluate(sess, train_examples, train_examples_raw)
        #logger.debug("Token-level confusion matrix:\n" + token_cm.as_table())
        #logger.debug("Token-level scores:\n" + token_cm.summary())
        #logger.info("Entity level P/R/F1: %.2f/%.2f/%.2f", *entity_scores)

        logger.info("Evaluating on development data")
        token_cm, entity_scores = self.evaluate(sess, dev_set, dev_set_raw)
        logger.debug("Token-level confusion matrix:\n" + token_cm.as_table())
        logger.debug("Token-level scores:\n" + token_cm.summary())
        logger.info("Entity level P/R/F1: %.2f/%.2f/%.2f", *entity_scores)

        f1 = entity_scores[-1]
        return f1
Beispiel #11
0
    def run_epoch(self, sess, train_data, dev_data):
        train_input_batches, train_truth_batches, train_mask_batches, train_input_sequence = train_data
        dev_input_batches, dev_truth_batches, dev_mask_batches, dev_input_sequence = dev_data

        logger.info("number of train input batches: %d",
                    int(len(train_input_batches)))
        prog = Progbar(target=1 + len(train_input_batches))

        loss = 0
        for i, input_batch in enumerate(train_input_batches):
            loss = self.train_on_batch(sess, input_batch,
                                       train_truth_batches[i],
                                       train_mask_batches[i],
                                       train_input_sequence[i])
            prog.update(i + 1, [("train loss", loss)])
        logger.info("\nTrain loss: " + str(loss))

        #	if self.report: self.report.log_train_loss(loss)
        #print("")

        dev_loss = self.compute_dev_loss(
            sess, dev_input_batches, dev_truth_batches,
            dev_mask_batches)  # print loss on dev set

        return dev_loss  # TODO: to check where the return value is used
Beispiel #12
0
    def run_epoch(self, optimizer, train):
        prog = Progbar(target=1 + int(len(train) / self.config.batch_size))
        losses, grad_norms = [], []
        for i, [x, y] in enumerate(minibatches(train, self.config.batch_size)):
            optimizer.zero_grad()
            train_x = torch.from_numpy(x)
            train_y = torch.from_numpy(y)
            pred = self(train_x)
            loss = self.take_loss(train_y, pred)

            loss.backward()

            if self.config.clip_gradients:
                grad_norm = torch.nn.utils.clip_grad_norm(
                    self.parameters(recurse=True), self.config.max_grad_norm)
            else:
                grad_norm = []
                for param in self.parameters(recurse=True):
                    if param.grad is not None:
                        grad_norm.append(param.grad.norm())
                grad_norm = np.sum(grad_norm)
            optimizer.step()
            loss = loss.detach().numpy()

            losses.append(loss)
            grad_norms.append(grad_norm)
            prog.update(i + 1, [("train loss", loss),
                                ("grad norm", grad_norm)])

        return losses, grad_norms
Beispiel #13
0
    def run_epoch(self, sess, train_examples, dev_set, train_examples_raw, dev_set_raw,dev_raw):
        prog = Progbar(target=1 + int(len(train_examples) / self.config.batch_size))
        for i, batch in enumerate(minibatches(train_examples, self.config.batch_size)):
            loss = self.train_on_batch(sess, *batch)
            prog.update(i + 1, [("train loss", loss)])
            if self.report: self.report.log_train_loss(loss)
        print("")

        #logger.info("Evaluating on training data")
        #token_cm, entity_scores = self.evaluate(sess, train_examples, train_examples_raw)
        #logger.debug("Token-level confusion matrix:\n" + token_cm.as_table())
        #logger.debug("Token-level scores:\n" + token_cm.summary())
        #logger.info("Entity level P/R/F1: %.2f/%.2f/%.2f", *entity_scores)

        logger.info("Evaluating on development data")
        
        sentence, labels, labels_ = zip(*self.output(sess, dev_raw))
        predictions = [[LBLS[l] for l in preds] for preds in labels_]
        sentences = [[s[0] for s in sent] for sent in sentence]
        output = zip(sentences, labels, predictions)    
        eval_lines = test_ner(output,self.config.output_path)
        for line in eval_lines:
            logger.info(line)
        test_f1 = float(eval_lines[1].strip().split()[-1])   
        return test_f1
Beispiel #14
0
    def run_epoch(self, session, dataset, valset, epoch):
        total_loss = 0.0
        prog = Progbar(target=1 + int(Config.num_batches))
        minibatchLoss = "./results/minibatch_loss.txt"
        valScore = "./results/val_score.txt"

        numPoints = range(int(Config.num_batches))
        randomSampling = random.sample(numPoints, len(numPoints))

        print("number of sampling points is: ", len(randomSampling))
        for i in range(int(Config.num_batches)):
            #        for i in range(5):
            currIndex = randomSampling[i]
            #train_x, train_y = self.get_ith_minibatch(dataset, 0)
            train_x, train_y = self.get_ith_minibatch(dataset, currIndex)
            #train_x, train_y = self.get_debug_minibatch(dataset)
            loss = self.optimize(session, train_x, train_y)
            print("loss on this batch: ", loss, " \n")
            prog.update(i + 1, [("avg batch loss", loss)])
            total_loss += loss

            if i % 20 == 0:
                #f1, em = self.evaluate_answer(session, self.get_debug_minibatch_val(dataset), sample=32)
                f1, em = self.evaluate_answer(session, valset, training=True)
                with open(minibatchLoss, "a") as f:
                    f.write(str((epoch, i, loss)) + "\n")
                    #f.write("epoch number is: " + str(self.epoch) + "minibatch number is: " + str(i) + " and loss is: " + str(loss) + "\n")
                    f.flush()
                with open(valScore, "a") as f:
                    f.write(str((f1, em)) + "\n")
                    #f.write(str(f1) + " , " + str(em) + "\n") #making this almost a CSV
                    f.flush()
                print("iteration number is: ", i, "f1 is: ", f1, "and em is: ",
                      em)
        return total_loss
    def run_epoch(self, sess, saver, train, dev):
        prog = Progbar(target=int(len(train) / self.config.batch_size))
        losses, grad_norms = [], []
        for i, batch in enumerate(minibatches(train, self.config.batch_size)):
            loss, grad_norm, summ = self.train_on_batch(sess, *batch)
            losses.append(loss)
            grad_norms.append(grad_norm)
            prog.update(i + 1, [("train loss", loss)])

        print("\nEvaluating on dev set...")
        predictions = []
        references = []
        for batch in minibatches(dev, self.config.batch_size):
            inputs_batch, targets_batch = batch
            prediction = list(self.predict_on_batch(sess, inputs_batch))
            predictions += prediction
            references += list(targets_batch)

        predictions = [
            tokens_to_sentences(pred, self.config.idx2word)
            for pred in predictions
        ]
        references = [
            tokens_to_sentences(ref, self.config.idx2word)
            for ref in references
        ]

        f1, _, _ = rouge_n(predictions, references)
        print("- dev rouge f1: {}".format(f1))
        return losses, grad_norms, summ, predictions, f1
Beispiel #16
0
    def preds_and_loss(
            self, sess,
            saver):  # not sure which of these params we actually need
        # TODO: make sure what we're working with is actually 'test.ids.article'
        test_input, _, test_input_len = tokenize_data(
            'test.ids.article', self.config.max_sentence_len, False)
        test_truth, test_truth_mask, test_truth_len = tokenize_data(
            'test.ids.title', self.config.max_sentence_len, True)

        test_input_batches = get_stacked_minibatches(test_input,
                                                     self.config.batch_size)
        test_truth_batches = get_stacked_minibatches(test_truth,
                                                     self.config.batch_size)
        test_mask_batches = get_reg_minibatches(test_truth_mask,
                                                self.config.batch_size)

        # run through once (don't need multiple epochs)

        prog = Progbar(target=1 +
                       int(len(test_input_batches) / self.config.batch_size))

        total_test_loss = 0
        self.save_predictions = True
        for i, input_batch in enumerate(test_input_batches):
            loss = self.predict_on_batch(sess,
                                         input_batch,
                                         test_truth_batches[i],
                                         test_mask_batches[i],
                                         num_of_batch=i,
                                         using_dev=False)
            total_test_loss += loss
            prog.update(i + 1, [("test loss on batch", loss)])

        return total_test_loss
Beispiel #17
0
    def output(self, sess, inputs_raw, inputs=None, summarize=False):
        """
        Reports the output of the model on examples (uses helper to featurize each example).
        """
        if inputs is None:
            inputs = self.preprocess_sequence_data(
                self.helper.vectorize(inputs_raw))

        preds = []
        probs = [] if summarize else None
        prog = Progbar(target=1 + int(len(inputs) / self.config.batch_size))
        for i, batch in enumerate(
                minibatches(inputs, self.config.batch_size, shuffle=False)):
            # Ignore predict
            batch = batch[:1] + batch[2:]
            if summarize:
                preds_, probs_ = self.predict_on_batch(sess,
                                                       *batch,
                                                       summarize=True)
                preds += list(preds_)
                probs += list(probs_)
            else:
                preds_ = self.predict_on_batch(sess, *batch)
                preds += list(preds_)
            prog.update(i + 1, [])

        return self.consolidate_predictions(inputs_raw, inputs, preds, probs)
Beispiel #18
0
 def run_epoch(self, sess, train):
     prog = Progbar(target=1 + int(len(train) / self.config.batch_size))
     losses = []
     for i, batch in enumerate(minibatches(train, self.config.batch_size)):
         loss = self.train_on_batch(sess, *batch)
         losses.append(loss)
         # grad_norms.append(grad_norm)
         prog.update(i + 1, [("train loss", loss)])
     return losses
Beispiel #19
0
    def run_epoch(self, sess, train):
        prog = Progbar(target=1 + int(len(train) / self.config.batch_size))
        losses, grad_norms = [], []
        for i, batch in enumerate(minibatches(train, self.config.batch_size)):
            loss, grad_norm = self.train_on_batch(sess, *batch)
            losses.append(loss)
            grad_norms.append(grad_norm)
            prog.update(i + 1, [("train loss", loss)])

        return losses, grad_norms
    def evaluate(self, sess, examples):
        """
        Args:
            sess: a TFSession
            examples: [ numpy array (num_examples, max_length) of all sentence 1,
                        numpy array (num_examples, max_length) of all sentence 2,
                        numpy array (num_examples, ) of all labels ]
        Returns:
            fraction of correct predictions
            TODO: maybe return the actual predictions as well
        """
        correct_preds = 0.0
        tp = 0.0
        fp = 0.0
        fn = 0.0

        preds = []
        confusion_matrix = np.zeros((2, 2), dtype=np.float64)

        num_examples = len(examples[0])
        num_batches = int(np.ceil(num_examples * 1.0 / self.config.batch_size))
        prog = Progbar(target=num_batches)

        for i, batch in enumerate(self.minibatch(examples, shuffle=False)):
            # Ignore labels
            sentence1_batch, sentence2_batch, labels_batch = batch
            preds_ = self.predict_on_batch(sess, sentence1_batch,
                                           sentence2_batch)
            preds += list(preds_)
            labels_batch = np.array(labels_batch)

            for j in range(preds_.shape[0]):
                confusion_matrix[labels_batch[j], preds_[j]] += 1

            prog.update(i + 1)

        ## CONFUSION MATRIX (is indeed hella confusing)
        #            pred -   pred +
        # label -  |   tn   |   fp   |
        # label +  |   fn   |   tp   |
        tn = confusion_matrix[0, 0]
        fp = confusion_matrix[0, 1]
        fn = confusion_matrix[1, 0]
        tp = confusion_matrix[1, 1]
        correct_preds = tp + tn
        accuracy = correct_preds / num_examples
        precision = (tp) / (tp + fp) if tp > 0 else 0
        recall = (tp) / (tp + fn) if tp > 0 else 0

        print("\ntp: %f, fp: %f, fn: %f" % (tp, fp, fn))
        f1 = 2 * precision * recall / (precision + recall) if tp > 0 else 0

        return (preds, accuracy, precision, recall, f1)
Beispiel #21
0
    def fit(self, sess, saver, train_examples_raw, dev_set_raw):
        best_score = 0.

        train_examples = self.preprocess_sequence_data(train_examples_raw)
        dev_set = self.preprocess_sequence_data(dev_set_raw)

        for epoch in range(self.config.n_epochs):
            logger.info("Epoch %d out of %d", epoch + 1, self.config.n_epochs)
            # You may use the progress bar to monitor the training progress
            # Addition of progress bar will not be graded, but may help when debugging
            prog = Progbar(target=1 +
                           int(len(train_examples) / self.config.batch_size))

            # The general idea is to loop over minibatches from train_examples, and run train_on_batch inside the loop
            # Hint: train_examples could be a list containing the feature data and label data
            # Read the doc for utils.get_minibatches to find out how to use it.
            # Note that get_minibatches could either return a list, or a list of list
            # [features, labels]. This makes expanding tuples into arguments (* operator) handy

            ### YOUR CODE HERE (2-3 lines)
            for i, batch in enumerate(
                    minibatches(train_examples,
                                self.config.batch_size,
                                shuffle=True)):
                loss = self.train_on_batch(sess, *batch)
                prog.update(i + 1, [('train loss', loss)])
            # token_cm, entity_scores = self.evaluate(sess, train_examples, train_examples_raw)
            # print 'Training time -- Entity level P/R/F1: %.2f/%.2f/%.2f' % entity_scores

            ### END YOUR CODE

            logger.info("Evaluating on development data")
            token_cm, entity_scores = self.evaluate(sess, dev_set, dev_set_raw)
            logger.debug("Token-level confusion matrix:\n" +
                         token_cm.as_table())
            logger.debug("Token-level scores:\n" + token_cm.summary())
            logger.info("Entity level P/R/F1: %.2f/%.2f/%.2f", *entity_scores)

            score = entity_scores[-1]

            if score > best_score:
                best_score = score
                if saver:
                    logger.info("New best score! Saving model in %s",
                                self.config.model_output)
                    saver.save(sess, self.config.model_output)
            print("")
            if self.report:
                self.report.log_epoch()
                self.report.save()
        return best_score
Beispiel #22
0
    def run_epoch(self, sess, train, label):
        """
        """

        prog = Progbar(target=1 + int(len(train) / self.config.batch_size))
        losses, grad_norms = [], []
        for inputs_minibatch, labels_minibatch in get_minibatches(
            [train, label], self.config.batch_size):
            #for i, batch in enumerate(minibatches(train, label, self.config.batch_size)):
            loss, grad_norm = self.train_on_batch(sess, inputs_minibatch,
                                                  labels_minibatch)
            losses.append(loss)
            grad_norms.append(grad_norm)
            prog.update(i + 1, [("train loss", loss)])
Beispiel #23
0
    def run_epoch(self, epoch, frameTrain, frameVal):
        """
        Run 1 epoch. Train on training examples, evaluate on validation set.
        """
        path = self.options['path']
        train_losses = []
        numTrain = frameTrain.shape[0]
        prog = Progbar(target=1 + int(numTrain / self.options["batch_size"]))
        for i, frameBatch in enumerate(
                get_minibatches(frameTrain, self.options["batch_size"])):
            batch = loadData(frameBatch, **(self.options))
            loss, lr, gs = self.optimize(*batch)
            train_losses.append(loss)
            if (self.global_step % self.options["print_every"]) == 0:
                logging.info("Iteration {0}: with minibatch training l2_loss = {1:.3g} and mse of {2:.2g}"\
                      .format(self.global_step, loss, loss/options["batch_size"]))
            prog.update(i + 1, [("train loss", loss)], [("learning rate", lr),
                                                        ("global step", gs)])
        total_train_mse = np.sum(train_losses) / numTrain

        val_losses = []
        if epoch >= 11:
            numVal = frameVal.shape[0]
            prog = Progbar(target=1 + int(numVal / self.options["batch_size"]))
            for i, frameBatch in enumerate(
                    get_minibatches(frameVal, self.options["batch_size"])):
                batch = loadData(frameBatch, **(self.options))
                loss = self.validate(*batch)
                val_losses.append(loss)
                prog.update(i + 1, [("validation loss", loss)])
            total_val_mse = np.sum(val_losses) / numVal
        else:
            total_val_mse = -1

        return total_train_mse, train_losses, total_val_mse, val_losses
Beispiel #24
0
    def run_epoch(self, sess, train):
        prog = Progbar(target=1 + int(len(train) / self.config.batch_size))
        losses, grad_norms = [], []
        for i, batch in enumerate(minibatches(train, self.config.batch_size)):
            if batch[0].shape[0] != 100:
                continue
            pred = self.predict_on_batch(sess, batch[0])
            loss, grad_norm = self.train_on_batch(sess, *batch)
            losses.append(loss)
            grad_norms.append(grad_norm)
            prog.update(i + 1, [("train loss", loss),
                                ("train grad", grad_norm)])

        return losses, grad_norms
Beispiel #25
0
    def run_epoch(self, sess, train_set, val_set, context):
        prog_train = Progbar(target=1 +
                             int(len(train_set) / self.flags.batch_size))
        for i, batch in enumerate(minibatches(train_set,
                                              self.flags.batch_size)):
            loss = self.optimize(sess, *batch)
            prog_train.update(i + 1, [("train loss", loss)])
        print("")

        if self.flags.debug == 0 or self.flags.debug == 1:
            prog_val = Progbar(target=1 +
                               int(len(val_set) / self.flags.batch_size))
            for i, batch in enumerate(
                    minibatches(val_set, self.flags.batch_size)):
                val_loss = self.validate(sess, *batch)
                prog_val.update(i + 1, [("val loss", val_loss)])
            print("")
            train_f1, train_em = self.evaluate_answer(sess,
                                                      train_set,
                                                      context=context[0],
                                                      sample=100,
                                                      log=True,
                                                      eval_set="-Epoch TRAIN-")
            val_f1, val_em = self.evaluate_answer(sess,
                                                  val_set,
                                                  context=context[1],
                                                  sample=100,
                                                  log=True,
                                                  eval_set="-Epoch VAL-")
Beispiel #26
0
    def run_epoch(self, sess, train_set, val_set):
        prog_train = Progbar(target=1 +
                             int(len(train_set[0]) / self.flags.batch_size))
        for i, batch in enumerate(
                self.minibatches(train_set, self.flags.batch_size)):
            loss = self.optimize(sess, *batch)
            prog_train.update(i + 1, [("train loss", loss)])
        print("")

        #if self.flags.debug == 0:
        prog_val = Progbar(target=1 +
                           int(len(val_set[0]) / self.flags.batch_size))
        for i, batch in enumerate(
                self.minibatches(val_set, self.flags.batch_size)):
            val_loss = self.validate(sess, *batch)
            prog_val.update(i + 1, [("val loss", val_loss)])
        print("")

        self.evaluate_answer(session=sess,
                             dataset=train_set,
                             sample=len(val_set[0]),
                             log=True,
                             eval_set="-Epoch TRAIN-")

        self.evaluate_answer(session=sess,
                             dataset=val_set,
                             sample=None,
                             log=True,
                             eval_set="-Epoch VAL-")
Beispiel #27
0
    def answer(self, session, data):

        scores = []
        prog_train = Progbar(target=1 +
                             int(len(data[0]) / self.flags.batch_size))
        for i, batch in enumerate(
                self.minibatches(data, self.flags.batch_size, shuffle=False)):
            score = self.forward_pass(session, *batch)
            scores.append(score)
            prog_train.update(i + 1, [("Predicting Images....", 0.0)])
        print("")
        scores = np.vstack(scores)
        predictions = np.argmax(scores, axis=-1)
        return predictions
Beispiel #28
0
def summary_success_epoch(train_data, model, session):
    num_train_batches = int(len(train_data['q']) / FLAGS.batch_size)
    prog = Progbar(target=num_train_batches)
    permutation = np.random.permutation(num_train_batches*FLAGS.batch_size)
    successes = []
    for i in range(num_train_batches):
        if i >= FLAGS.train_batch >= 0:
            break
        data_batch = get_batch(train_data, i, permutation=permutation)
        successes.append(model.summary_success(sess=session, data_batch=data_batch))
        prog.update(i+1, [("retained", sum(successes))])

    logger.debug("Summarization: %d out of %d answers are retained", sum(successes), int(len(train_data['q'])))
    logger.debug("Retain rate: %.2f%%", 100. * sum(successes) / len(train_data['q']))
    return sum(successes)
Beispiel #29
0
 def run_epoch(self, sess, batch_gen, info):
     # use 3301 for 24 batch size
     # use 2476 for 32 batch size
     prog = Progbar(target=4952)
     (i1, i2, i3, i4, i5, i6) = info
     batch_epoch = batch_gen(i1, i2, i3, i4, i5, i6)
     for i in range(4952):
         batch = batch_epoch.next()
         loss, grad_norm, EM = self.train_on_batch(sess, batch)
         logging.info("loss is %f, grad_norm is %f" % (loss, grad_norm))
         prog.update(i + 1, [("train loss", loss), ("grad_norm", grad_norm),
                             ("EM", EM)])
         if math.isnan(loss):
             logging.info("loss nan")
             assert False
Beispiel #30
0
    def fit(self, sess, saver, train_examples_raw, dev_set_raw):
        best_score = 0.

        train_examples = self.preprocess_sequence_data(train_examples_raw)
        dev_set = self.preprocess_sequence_data(dev_set_raw)

        for epoch in range(self.config.n_epochs):
            logger.info("Epoch %d out of %d", epoch + 1, self.config.n_epochs)
            # You may use the progress bar to monitor the training progress
            # Addition of progress bar will not be graded, but may help when debugging
            prog = Progbar(target=1 + int(len(train_examples) / self.config.batch_size))
			
            # The general idea is to loop over minibatches from train_examples, and run train_on_batch inside the loop
			# Hint: train_examples could be a list containing the feature data and label data
			# Read the doc for utils.get_minibatches to find out how to use it.
                        # Note that get_minibatches could either return a list, or a list of list
                        # [features, labels]. This makes expanding tuples into arguments (* operator) handy

            ### YOUR CODE HERE (2-3 lines)
            # get_minibatches actually returns a generator, which can be considered as an iterator
            # the difference is, it will generate result upon request but not storing it in memory

            for i, batch in enumerate(minibatches(train_examples, self.config.batch_size)):
                # q1_windown.py and q2_rnn.py has a differnt train_on_batch functions that accept different
                # number of the arguments. It might be clear to leave the implementation to the subclass
                loss = self.train_on_batch(sess, *batch)
                prog.update(current=i+1, values=[('loss',loss)])

            ### END YOUR CODE

            logger.info("Evaluating on development data")
            token_cm, entity_scores = self.evaluate(sess, dev_set, dev_set_raw)
            logger.debug("Token-level confusion matrix:\n" + token_cm.as_table())
            logger.debug("Token-level scores:\n" + token_cm.summary())
            logger.info("Entity level P/R/F1: %.2f/%.2f/%.2f", *entity_scores)

            score = entity_scores[-1]
            
            if score > best_score:
                best_score = score
                if saver:
                    logger.info("New best score! Saving model in %s", self.config.model_output)
                    saver.save(sess, self.config.model_output)
            print("")
            if self.report:
                self.report.log_epoch()
                self.report.save()
        return best_score
Beispiel #31
0
    def output(self, sess, inputs_raw, inputs=None):
        """
        Reports the output of the model on examples (uses helper to featurize each example).
        """
        if inputs is None:
            inputs = self.preprocess_sequence_data(self.helper.vectorize(inputs_raw))

        preds = []
        prog = Progbar(target=1 + int(len(inputs) / self.config.batch_size))
        for i, batch in enumerate(minibatches(inputs, self.config.batch_size, shuffle=False)):
            # Ignore predict
            batch = batch[:1] + batch[2:]
            preds_ = self.predict_on_batch(sess, *batch)
            preds += list(preds_)
            prog.update(i + 1, [])
        return self.consolidate_predictions(inputs_raw, inputs, preds)
Beispiel #32
0
    def run_epoch(self, sess, train):
        batches = self.build_batches(self.train_qas)
        if not FLAGS.is_prod: batches = batches[:5]
        prog = Progbar(target=len(batches))
        losses = []
        for i, batch in enumerate(batches):
            loss = self.train_on_batch(sess, zip(*batch))
            losses.append(loss)
            prog.update(i + 1, [("train loss", loss)])

        logging.info("Evaluation on training data")
        self.evaluate_answer(sess, self.train_qas, log=True)
        logging.info("Evaluation on dev data")
        f1, em = self.evaluate_answer(sess, self.dev_qas, log=True)

        return f1
Beispiel #33
0
    def run_epoch(self, sess, train_examples, dev_set, train_examples_raw, dev_set_raw):
        prog = Progbar(target=1 + int(len(train_examples) / self.config.batch_size))
        for i, batch in enumerate(minibatches(train_examples, self.config.batch_size)):
            loss = self.train_on_batch(sess, *batch)
            prog.update(i + 1, [("train loss", loss)])
            if self.report: self.report.log_train_loss(loss)
        print("")

        #logger.info("Evaluating on training data")
        #token_cm, entity_scores = self.evaluate(sess, train_examples, train_examples_raw)
        #logger.debug("Token-level confusion matrix:\n" + token_cm.as_table())
        #logger.debug("Token-level scores:\n" + token_cm.summary())
        #logger.info("Entity level P/R/F1: %.2f/%.2f/%.2f", *entity_scores)

        logger.info("Evaluating on development data")
        token_cm, entity_scores = self.evaluate(sess, dev_set, dev_set_raw)
        logger.debug("Token-level confusion matrix:\n" + token_cm.as_table())
        logger.debug("Token-level scores:\n" + token_cm.summary())
        logger.info("Entity level P/R/F1: %.2f/%.2f/%.2f", *entity_scores)

        f1 = entity_scores[-1]
        return f1