コード例 #1
0
 def fit(self, sess, train, dev=None):
     train_batches = data_utils.batch_iter(train, self.batch_size, self.num_epochs)
     data_size = len(train)
     num_batches_per_epoch = int((data_size - 1) / self.batch_size) + 1
     best_dev_pacc = 0.0
     best_dev_eacc = 0.0
     best_dev_loss = 1e10
     best_dev_epoch = 0
     for batch in train_batches:
         words_batch, textlen_batch, mentions_batch, mentionlen_batch, positions_batch, labels_batch = zip(*batch)
         self.train_on_batch(sess, words_batch, textlen_batch, mentions_batch, mentionlen_batch, positions_batch, labels_batch)
         current_step = tf.train.global_step(sess, self.global_step)
         if (current_step % num_batches_per_epoch == 0) and (dev is not None):
             print("\nEvaluation:")
             print("previous best dev epoch {}, best exact acc {:g} with partial acc {:g}".format(best_dev_epoch, best_dev_eacc, best_dev_pacc))
             loss, pacc, eacc = self.evaluation_on_dev(sess, dev)
             print("")
             if eacc > best_dev_eacc:
                 best_dev_loss = loss
                 best_dev_pacc = pacc
                 best_dev_eacc = eacc
                 best_dev_epoch = current_step // num_batches_per_epoch
             if current_step // num_batches_per_epoch - best_dev_epoch > 3:
                 break
     return best_dev_epoch, best_dev_loss, best_dev_pacc, best_dev_eacc
コード例 #2
0
 def predict(self, sess, test):
     batches = data_utils.batch_iter(test,
                                     self.batch_size,
                                     1,
                                     shuffle=False)
     all_probs = np.zeros((0, self.num_classes - 1))
     all_labels = []
     total_cnt = 0
     total_size = 0
     for batch in batches:
         words_batch, textlen_batch, positions_batch, heads_batch, tails_batch, labels_batch = zip(
             *batch)  # noqa
         feed = self.create_feed_dict(words_batch, textlen_batch,
                                      positions_batch, heads_batch,
                                      tails_batch, labels_batch)
         loss, probs, size, cnt = sess.run(
             [self.loss, self.probs, self.valid_size, self.correct_num],
             feed_dict=feed)
         total_cnt += cnt
         total_size += size
         all_probs = np.concatenate((all_probs, probs[:, 1:]))
         for l in labels_batch:
             tmp = np.zeros(self.num_classes - 1)
             if l > 0:
                 tmp[l - 1] = 1.0
             all_labels.append(tmp)
     all_probs = np.reshape(all_probs, (-1))
     all_labels = np.reshape(np.array(all_labels), (-1))
     return all_labels, all_probs, total_cnt / total_size
コード例 #3
0
ファイル: model.py プロジェクト: billy-inn/refe
 def fit(self, sess, train_triples, valid_triples=None):
     train_batches = data_utils.batch_iter(train_triples, self.batch_size,
                                           self.num_epochs)
     data_size = len(train_triples)
     num_batches_per_epoch = int((data_size - 1) / self.batch_size) + 1
     best_valid_acc = 0.0
     best_valid_loss = 1e10
     best_valid_epoch = 0
     for batch in train_batches:
         self.train_on_batch(sess, batch)
         current_step = tf.train.global_step(sess, self.global_step)
         if (current_step % num_batches_per_epoch == 0) and (valid_triples
                                                             is not None):
             print("\nValidation:")
             print(
                 "previous best valid epoch %d, best valid acc %.3f with loss %.3f"
                 % (best_valid_epoch, best_valid_acc, best_valid_loss))
             loss, acc = self.validation(sess, valid_triples)
             print("")
             if acc > best_valid_acc:
                 best_valid_loss = loss
                 best_valid_acc = acc
                 best_valid_epoch = current_step // num_batches_per_epoch
             if current_step // num_batches_per_epoch - best_valid_epoch >= 3:
                 break
     return best_valid_epoch, best_valid_loss, best_valid_acc
コード例 #4
0
 def fit(self, sess, train, valid=None):
     train_batches = data_utils.batch_iter(train, self.batch_size,
                                           self.num_epochs)
     data_size = len(train)
     num_batches_per_epoch = int((data_size - 1) / self.batch_size) + 1
     best_valid_acc = 0.0
     best_valid_loss = 1e10
     best_valid_ap = 0.0
     best_valid_epoch = 0
     for batch in train_batches:
         words_batch, textlen_batch, positions_batch, heads_batch, tails_batch, labels_batch = zip(
             *batch)  # noqa
         self.train_on_batch(sess, words_batch, textlen_batch,
                             positions_batch, heads_batch, tails_batch,
                             labels_batch)
         current_step = tf.train.global_step(sess, self.global_step)
         if (current_step % num_batches_per_epoch == 0) and (valid
                                                             is not None):
             print("\nEvaluation:")
             print(
                 "previous best valid epoch %d, best valid ap %.3f with loss %.3f acc %.3f"
                 % (best_valid_epoch, best_valid_ap, best_valid_loss,
                    best_valid_acc))
             loss, acc, ap = self.validation(sess, valid)
             print("")
             if ap > best_valid_ap:
                 best_valid_loss = loss
                 best_valid_acc = acc
                 best_valid_ap = ap
                 best_valid_epoch = current_step // num_batches_per_epoch
             if current_step // num_batches_per_epoch - best_valid_epoch > 3:
                 break
     return best_valid_epoch, best_valid_loss, best_valid_acc, best_valid_ap
コード例 #5
0
def visdial_evaluate(dataloader, params, eval_batch_size):
    sparse_metrics = SparseGTMetrics()
    ndcg = NDCG()
    dialog_encoder.eval()
    batch_idx = 0
    with torch.no_grad():
        batch_size = 500 * (params['n_gpus'] / 8)
        batch_size = min([1, 2, 4, 5, 100, 1000, 200, 8, 10, 40, 50, 500, 20, 25, 250, 125], \
                         key=lambda x: abs(x - batch_size) if x <= batch_size else float("inf"))
        if params['overfit']:
            batch_size = 100
        for epoch_id, _, batch in batch_iter(dataloader, params):
            if epoch_id == 1:
                break
            tokens = batch['tokens']
            num_rounds = tokens.shape[1]
            num_options = tokens.shape[2]
            tokens = tokens.view(-1, tokens.shape[-1])
            segments = batch['segments']
            segments = segments.view(-1, segments.shape[-1])
            sep_indices = batch['sep_indices']
            sep_indices = sep_indices.view(-1, sep_indices.shape[-1])
            mask = batch['mask']
            mask = mask.view(-1, mask.shape[-1])
            hist_len = batch['hist_len']
            hist_len = hist_len.view(-1)
            gt_option_inds = batch['gt_option_inds']
            gt_relevance = batch['gt_relevance']
            gt_relevance_round_id = batch['round_id'].squeeze(1)
            
            assert tokens.shape[0] == segments.shape[0] == sep_indices.shape[0] == mask.shape[0] == \
                   hist_len.shape[0] == num_rounds * num_options * eval_batch_size
            output = []
            assert (eval_batch_size * num_rounds * num_options) // batch_size == (
                        eval_batch_size * num_rounds * num_options) / batch_size
            for j in range((eval_batch_size * num_rounds * num_options) // batch_size):
                # create chunks of the original batch
                item = {}
                item['tokens'] = tokens[j * batch_size:(j + 1) * batch_size, :]
                item['segments'] = segments[j * batch_size:(j + 1) * batch_size, :]
                item['sep_indices'] = sep_indices[j * batch_size:(j + 1) * batch_size, :]
                item['mask'] = mask[j * batch_size:(j + 1) * batch_size, :]
                item['hist_len'] = hist_len[j * batch_size:(j + 1) * batch_size]
                _, _, _, nsp_scores = forward(dialog_encoder, item, params, output_nsp_scores=True, evaluation=True)
                # normalize nsp scores
                nsp_probs = F.softmax(nsp_scores, dim=1)
                output.append(nsp_probs[:, 0])
            
            output = torch.cat(output, 0).view(eval_batch_size, num_rounds, num_options)
            sparse_metrics.observe(output, gt_option_inds)
            output = output[torch.arange(output.size(0)), gt_relevance_round_id - 1, :]
            ndcg.observe(output, gt_relevance)
            batch_idx += 1
    
    dialog_encoder.train()
    all_metrics = {}
    all_metrics.update(sparse_metrics.retrieve(reset=True))
    all_metrics.update(ndcg.retrieve(reset=True))
    
    return all_metrics
コード例 #6
0
    def get_rank_loss(self, test):
        batches = data_utils.batch_iter(test,
                                        self.batch_size,
                                        1,
                                        shuffle=False)
        cnt = 0.0
        total_loss = 0.0
        for batch in batches:
            words_batch, labels_batch = zip(*batch)
            batch_size = len(words_batch)

            words = self._variable(words_batch)

            probs = self.model(words, batch_size)
            if self.use_cuda:
                probs = probs.cpu()
            probs = probs.data.numpy()
            cnt += batch_size
            for i in range(batch_size):
                s1 = self.get_score(probs[i])
                s2 = self.get_score(labels_batch[i])
                # s1 = np.argmax(probs[i])
                # s2 = labels_batch[i]
                total_loss += abs(s1 - s2)
        return total_loss / cnt
コード例 #7
0
    def predict(self, sess, test):
        batches = data_utils.batch_iter(test, self.batch_size, 1, shuffle=False)
        all_predictions = []
        for batch in batches:
            words_batch, textlen_batch, mentions_batch, mentionlen_batch, positions_batch, labels_batch = zip(*batch)
            feed = self.create_feed_dict(words_batch, textlen_batch, mentions_batch, mentionlen_batch, positions_batch)
            batch_predictions = sess.run(self.predictions, feed_dict=feed)
            all_predictions = np.concatenate([all_predictions, batch_predictions])

        return all_predictions
コード例 #8
0
 def evaluate(self, sess, train, test):
     train_batches = data_utils.batch_iter(train, self.batch_size, self.num_epochs)
     data_size = len(train)
     num_batches_per_epoch = int((data_size - 1) / self.batch_size) + 1
     for batch in train_batches:
         words_batch, textlen_batch, mentions_batch, mentionlen_batch, positions_batch, labels_batch = zip(*batch)
         self.train_on_batch(sess, words_batch, textlen_batch, mentions_batch, mentionlen_batch, positions_batch, labels_batch)
         current_step = tf.train.global_step(sess, self.global_step)
         if current_step % num_batches_per_epoch == 0:
             yield self.predict(sess, test)
コード例 #9
0
        def test_accuracy(test_x, test_y):
            '''

            :param test_x: testing dataset
            :param test_y: testing label
            :return:
                eval_loss: loss
                accuracy: accuracy
                ave_precison_score: average precison

            '''

            true_onehot_labels = []
            predicted_onehot_scores = []

            predicted_onehot_labels_t2 = []

            test_batches = batch_iter(test_x, test_y, BATCH_SIZE, 1)
            eval_loss, eval_counter = 0., 0

            for test_batch_x, test_batch_y in test_batches:
                scores, cur_loss = sess.run(
                    [model.scores, model.loss],
                    feed_dict={
                        model.x: test_batch_x,
                        model.y: test_batch_y,
                        model.keep_prob: 1.0
                    })

                for i in test_batch_y:
                    true_onehot_labels.append(i)
                for j in scores:
                    predicted_onehot_scores.append(j)

                batch_predicted_onehot_labels = get_onehot_label_topk(
                    scores=scores, top_num=NUM_LABEL)

                for i in batch_predicted_onehot_labels:
                    predicted_onehot_labels_t2.append(i)

                eval_loss = eval_loss + cur_loss
                eval_counter = eval_counter + 1

            #metrics
            eval_loss = float(eval_loss / eval_counter)

            ave_precision_score = average_precision_score(
                y_true=np.array(true_onehot_labels),
                y_score=np.array(predicted_onehot_scores),
                average='micro')

            accuracy = accuracy_score(np.array(true_onehot_labels),
                                      np.array(predicted_onehot_labels_t2))

            return eval_loss, accuracy, ave_precision_score
コード例 #10
0
ファイル: model.py プロジェクト: billy-inn/refe
 def predict(self, sess, test_triples):
     batches = data_utils.batch_iter(test_triples,
                                     self.batch_size,
                                     1,
                                     shuffle=False)
     preds = []
     for batch in batches:
         feed = self.create_feed_dict(**batch)
         pred = sess.run(self.preds, feed_dict=feed)
         preds = np.concatenate([preds, pred])
     return preds
コード例 #11
0
 def evaluation_on_dev(self, sess, dev):
     batches = data_utils.batch_iter(dev, self.batch_size, 1, shuffle=False)
     total_loss = 0.0
     total_pacc = 0.0
     total_eacc = 0.0
     total_len = 0
     for batch in batches:
         words_batch, textlen_batch, mentions_batch, mentionlen_batch, positions_batch, labels_batch = zip(*batch)
         feed = self.create_feed_dict(words_batch, textlen_batch, mentions_batch, mentionlen_batch, positions_batch, labels_batch)
         loss, pacc, eacc = sess.run([self.loss, self.partial_accuracy, self.exact_accuracy], feed_dict=feed)
         total_loss += loss * len(labels_batch)
         total_pacc += pacc * len(labels_batch)
         total_eacc += eacc * len(labels_batch)
         total_len += len(labels_batch)
     time_str = datetime.datetime.now().isoformat()
     print("{}: loss {:g} partial acc {:g} exact acc {:g}".format(time_str, total_loss/total_len, total_pacc / total_len, total_eacc / total_len))
     return total_loss / total_len, total_pacc / total_len, total_eacc / total_len
コード例 #12
0
def eval_test(sess,
              model,
              dataset,
              video_features,
              configs,
              epoch=None,
              global_step=None,
              name="test"):
    num_test_batches = math.ceil(len(dataset) / configs.batch_size)
    ious = list()

    for data in tqdm(batch_iter(dataset, video_features, configs.batch_size,
                                configs.extend, False),
                     total=num_test_batches,
                     desc="evaluate {}".format(name)):

        raw_data, feed_dict = get_feed_dict(data, model, mode=name)
        start_indexes, end_indexes = sess.run(
            [model.start_index, model.end_index], feed_dict=feed_dict)

        for record, start_index, end_index in zip(raw_data, start_indexes,
                                                  end_indexes):
            start_time, end_time = convert_to_time(start_index, end_index,
                                                   record["feature_shape"],
                                                   record["duration"])
            iou = calculate_iou(i0=[start_time, end_time],
                                i1=[record["start_time"], record["end_time"]])
            ious.append(iou)

    r1i3 = calculate_iou_accuracy(ious, threshold=0.3)
    r1i5 = calculate_iou_accuracy(ious, threshold=0.5)
    r1i7 = calculate_iou_accuracy(ious, threshold=0.7)
    mi = np.mean(ious) * 100.0

    value_pairs = [("{}/Rank@1, IoU=0.3".format(name), r1i3),
                   ("{}/Rank@1, IoU=0.5".format(name), r1i5),
                   ("{}/Rank@1, IoU=0.7".format(name), r1i7),
                   ("{}/mean IoU".format(name), mi)]

    # write the scores
    score_str = "Epoch {}, Step {}:\n".format(epoch, global_step)
    score_str += "Rank@1, IoU=0.3: {:.2f}\t".format(r1i3)
    score_str += "Rank@1, IoU=0.5: {:.2f}\t".format(r1i5)
    score_str += "Rank@1, IoU=0.7: {:.2f}\t".format(r1i7)
    score_str += "mean IoU: {:.2f}\n".format(mi)
    return r1i3, r1i5, r1i7, mi, value_pairs, score_str
コード例 #13
0
ファイル: model.py プロジェクト: billy-inn/refe
 def validation(self, sess, valid_triples):
     batches = data_utils.batch_iter(valid_triples,
                                     self.batch_size,
                                     1,
                                     shuffle=False)
     total_loss = 0.0
     total_acc = 0.0
     total_len = 0
     for batch in batches:
         feed = self.create_feed_dict(**batch)
         loss, acc = sess.run([self.loss, self.accuracy], feed_dict=feed)
         total_loss += loss * len(batch["heads"])
         total_acc += acc * len(batch["heads"])
         total_len += len(batch["heads"])
     time_str = datetime.datetime.now().isoformat()
     print("{}: loss {:g} acc {:g}".format(time_str, total_loss / total_len,
                                           total_acc / total_len))
     return total_loss / total_len, total_acc / total_len
コード例 #14
0
    def fit(self, train, valid=None):
        batches = data_utils.batch_iter(train, self.batch_size,
                                        self.num_epochs)
        data_size = len(train)
        num_batches_per_epoch = int((data_size - 1) / self.batch_size) + 1
        best_valid_loss = 1e5
        best_valid_epoch = 0
        step = 0
        for batch in batches:
            step += 1
            words_batch, labels_batch = zip(*batch)
            batch_size = len(words_batch)

            self.model.zero_grad()

            words = self._variable(words_batch)
            labels = self._variable(labels_batch)

            probs = self.model(words, batch_size)
            # print(probs.data.numpy()[0])
            loss = self.loss_fn(probs, labels)
            time_str = datetime.now().isoformat()
            print("{}: step {}, loss {:g}".format(time_str, step,
                                                  loss.data[0]))

            loss.backward()
            self.optimizer.step()

            if (step % num_batches_per_epoch == 0) and (valid is not None):
                print("\nValidation:")
                print("previous best valid loss {:g} at epoch {}".format(
                    best_valid_loss, best_valid_epoch))
                rloss = self.get_rank_loss(valid)
                print("epoch: {}, loss {:g}".format(
                    step // num_batches_per_epoch, rloss))
                print("")
                if rloss < best_valid_loss:
                    best_valid_loss = rloss
                    best_valid_epoch = step // num_batches_per_epoch
                # if step // num_batches_per_epoch - best_valid_epoch > 3:
                #    break
        return best_valid_epoch, best_valid_loss
コード例 #15
0
 def save_preds(self, sess, test):
     batches = data_utils.batch_iter(test,
                                     self.batch_size,
                                     1,
                                     shuffle=False)
     all_labels = []
     all_preds = []
     for batch in batches:
         words_batch, textlen_batch, positions_batch, labels_batch = zip(
             *batch)
         feed = self.create_feed_dict(words_batch, textlen_batch,
                                      positions_batch, labels_batch)
         preds = sess.run(self.predictions, feed_dict=feed)
         all_labels = np.concatenate((all_labels, labels_batch))
         all_preds = np.concatenate((all_preds, preds))
     outfile = open("preds.txt", "w")
     for x, y in zip(all_preds, all_labels):
         if y == 0:
             continue
         outfile.write("%d %d\n" % (x, y))
     outfile.close()
コード例 #16
0
 def validation(self, sess, valid):
     batches = data_utils.batch_iter(valid,
                                     self.batch_size,
                                     1,
                                     shuffle=False)
     total_loss = 0.0
     total_len = 0
     total_cnt = 0
     total_size = 0
     all_probs = np.zeros((0, self.num_classes - 1))
     all_labels = []
     for batch in batches:
         words_batch, textlen_batch, positions_batch, heads_batch, tails_batch, labels_batch = zip(
             *batch)  # noqa
         feed = self.create_feed_dict(words_batch, textlen_batch,
                                      positions_batch, heads_batch,
                                      tails_batch, labels_batch)
         loss, size, cnt, probs = sess.run(
             [self.loss, self.valid_size, self.correct_num, self.probs],
             feed_dict=feed)
         total_loss += loss * len(labels_batch)
         total_len += len(labels_batch)
         total_cnt += cnt
         total_size += size
         all_probs = np.concatenate((all_probs, probs[:, 1:]))
         for l in labels_batch:
             tmp = np.zeros(self.num_classes - 1)
             if l > 0:
                 tmp[l - 1] = 1.0
             all_labels.append(tmp)
     all_probs = np.reshape(all_probs, (-1))
     all_labels = np.reshape(np.array(all_labels), (-1))
     average_precision = average_precision_score(all_labels, all_probs)
     time_str = datetime.datetime.now().isoformat()
     print("{}: loss {:g} acc {:g} ap {:g}".format(time_str,
                                                   total_loss / total_len,
                                                   total_cnt / total_size,
                                                   average_precision))
     return total_loss / total_len, total_cnt / total_size, average_precision
コード例 #17
0
def get_types(model_name, input_file, dev_file, output_file, options):

    checkpoint_file = os.path.join(config.CHECKPOINT_DIR, model_name)
    type2id, typeDict = pkl_utils._load(config.WIKI_TYPE)
    id2type = {type2id[x]: x for x in type2id.keys()}

    #different way? -> data is different!
    # words, mentions, positions, labels = data_utils.load(input_file)
    # n = len(words)

    embedding = embedding_utils.Embedding.restore(checkpoint_file)

    test_set, test_labels, test_tokenized = create_labelset_input(
        *data_utils.load(input_file), embedding)
    dev_set, dev_labels, dev_tokenized = create_labelset_input(
        *data_utils.load(dev_file), embedding)

    store = StructuredLogitsStore(
        model_name,
        idx2label=id2type,
        hierarchical=True if "hier" in model_name else False,
        nested=False)

    graph = tf.Graph()
    with graph.as_default():
        sess = tf.Session()
        saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
        saver.restore(sess, checkpoint_file)

        # DEFINE operations
        input_words = graph.get_operation_by_name("input_words").outputs[0]
        input_textlen = graph.get_operation_by_name("input_textlen").outputs[0]
        input_mentions = graph.get_operation_by_name(
            "input_mentions").outputs[0]
        input_mentionlen = graph.get_operation_by_name(
            "input_mentionlen").outputs[0]
        input_positions = graph.get_operation_by_name(
            "input_positions").outputs[0]
        phase = graph.get_operation_by_name("phase").outputs[0]
        dense_dropout = graph.get_operation_by_name("dense_dropout").outputs[0]
        rnn_dropout = graph.get_operation_by_name("rnn_dropout").outputs[0]

        pred_op = graph.get_operation_by_name("output/predictions").outputs[0]
        #proba_op = graph.get_operation_by_name("output/proba").outputs[0] #proba
        logit_op = graph.get_operation_by_name("output/scores").outputs[
            0]  #proba
        tune_op = graph.get_operation_by_name("tune").outputs[0]  # K x K
        # results_op = graph.get_operation_by_name("results").outputs[0] # require labels

        # DO THE SAME FOR DEV set!

        test_batches = data_utils.batch_iter(test_set, 512, 1, shuffle=False)

        all_predictions = []
        all_logits = []
        for batch in test_batches:
            words_batch, textlen_batch, mentions_batch, mentionlen_batch, positions_batch, labels_batch = zip(
                *batch)
            feed = {
                input_words: words_batch,
                input_textlen: textlen_batch,
                input_mentions: mentions_batch,
                input_mentionlen: mentionlen_batch,
                input_positions: positions_batch,
                phase: False,
                dense_dropout: 1.0,
                rnn_dropout: 1.0
            }
            batch_predictions = sess.run(pred_op, feed_dict=feed)
            all_predictions = np.concatenate(
                [all_predictions, batch_predictions])

            #probas = sess.run(logit_op, feed_dict=feed)
            logit_predictions = sess.run(logit_op, feed_dict=feed)

            if all_logits == []:
                all_logits = logit_predictions
            else:
                all_logits = np.concatenate([all_logits, logit_predictions])

        store.create_labelset(
            StructuredLogits(f_x=all_logits,
                             y_true=test_labels,
                             tokenized=test_tokenized,
                             y_hat=None,
                             probas=None,
                             c=None,
                             document_masks=None,
                             idx2label=id2type), "test")
        store.score_set("test")

        dev_batches = data_utils.batch_iter(dev_set, 512, 1, shuffle=False)

        all_predictions = []
        all_logits = []
        for batch in dev_batches:
            words_batch, textlen_batch, mentions_batch, mentionlen_batch, positions_batch, labels_batch = zip(
                *batch)
            feed = {
                input_words: words_batch,
                input_textlen: textlen_batch,
                input_mentions: mentions_batch,
                input_mentionlen: mentionlen_batch,
                input_positions: positions_batch,
                phase: False,
                dense_dropout: 1.0,
                rnn_dropout: 1.0
            }
            batch_predictions = sess.run(pred_op, feed_dict=feed)
            all_predictions = np.concatenate(
                [all_predictions, batch_predictions])

            #probas = sess.run(logit_op, feed_dict=feed)
            logit_predictions = sess.run(logit_op, feed_dict=feed)

            if all_logits == []:
                all_logits = logit_predictions
            else:
                all_logits = np.concatenate([all_logits, logit_predictions])

        store.create_labelset(
            StructuredLogits(f_x=all_logits,
                             y_true=dev_labels,
                             tokenized=dev_tokenized,
                             y_hat=None,
                             probas=None,
                             c=None,
                             document_masks=None,
                             idx2label=id2type), "dev")
        store.score_set("dev")

        #np.transpose(prior_utils.create_prior(type_info, hparams.alpha)
        # all_logits.append(logit_predictions)

    # save as pickle
    with open(os.path.join(os.path.dirname(checkpoint_file), "logits.pickle"),
              "wb") as f:
        pickle.dump(store, f)
    """     
コード例 #18
0
def visdial_evaluate(dataloader, params, eval_batch_size, dialog_encoder):
    sparse_metrics = SparseGTMetrics()
    ndcg = NDCG()
    dialog_encoder.eval()
    batch_idx = 0
    with torch.no_grad():
        # we can fit approximately 500 sequences of length 256 in 8 gpus with 12 GB of memory during inference.
        batch_size = 500 * (params['n_gpus'] / 8)
        batch_size = min([1, 2, 4, 5, 100, 1000, 200, 8, 10, 40, 50, 500, 20, 25, 250, 125], \
             key=lambda x: abs(x-batch_size) if x <= batch_size else float("inf"))
        print("batch size for evaluation", batch_size)
        for epoch_id, _, batch in batch_iter(dataloader, params):
            if epoch_id == 1:
                break
            tokens = batch['tokens']
            num_rounds = tokens.shape[1]
            num_options = tokens.shape[2]
            tokens = tokens.view(-1, tokens.shape[-1])
            segments = batch['segments']
            segments = segments.view(-1, segments.shape[-1])
            sep_indices = batch['sep_indices']
            sep_indices = sep_indices.view(-1, sep_indices.shape[-1])
            mask = batch['mask']
            mask = mask.view(-1, mask.shape[-1])
            hist_len = batch['hist_len']
            hist_len = hist_len.view(-1)
            gt_option_inds = batch['gt_option_inds']
            gt_relevance = batch['gt_relevance']
            gt_relevance_round_id = batch['round_id'].squeeze(1)

            # get image features
            features = batch['image_feat']
            spatials = batch['image_loc']
            image_mask = batch['image_mask']
            max_num_regions = features.shape[-2]
            features = features.unsqueeze(1).unsqueeze(1).expand(
                eval_batch_size, num_rounds, num_options, max_num_regions,
                2048).contiguous()
            spatials = spatials.unsqueeze(1).unsqueeze(1).expand(
                eval_batch_size, num_rounds, num_options, max_num_regions,
                5).contiguous()
            image_mask = image_mask.unsqueeze(1).unsqueeze(1).expand(
                eval_batch_size, num_rounds, num_options,
                max_num_regions).contiguous()

            features = features.view(-1, max_num_regions, 2048)
            spatials = spatials.view(-1, max_num_regions, 5)
            image_mask = image_mask.view(-1, max_num_regions)

            assert tokens.shape[0] == segments.shape[0] == sep_indices.shape[0] == mask.shape[0] == \
                hist_len.shape[0] == features.shape[0] == spatials.shape[0] == \
                    image_mask.shape[0] == num_rounds * num_options * eval_batch_size

            output = []
            assert (eval_batch_size * num_rounds *
                    num_options) // batch_size == (eval_batch_size * num_rounds
                                                   * num_options) / batch_size
            for j in range(
                (eval_batch_size * num_rounds * num_options) // batch_size):
                # create chunks of the original batch
                item = {}
                item['tokens'] = tokens[j * batch_size:(j + 1) * batch_size, :]
                item['segments'] = segments[j * batch_size:(j + 1) *
                                            batch_size, :]
                item['sep_indices'] = sep_indices[j * batch_size:(j + 1) *
                                                  batch_size, :]
                item['mask'] = mask[j * batch_size:(j + 1) * batch_size, :]
                item['hist_len'] = hist_len[j * batch_size:(j + 1) *
                                            batch_size]

                item['image_feat'] = features[j * batch_size:(j + 1) *
                                              batch_size, :, :]
                item['image_loc'] = spatials[j * batch_size:(j + 1) *
                                             batch_size, :, :]
                item['image_mask'] = image_mask[j * batch_size:(j + 1) *
                                                batch_size, :]

                _, _, _, _, nsp_scores = forward(dialog_encoder,
                                                 item,
                                                 params,
                                                 output_nsp_scores=True,
                                                 evaluation=True)
                # normalize nsp scores
                nsp_probs = F.softmax(nsp_scores, dim=1)
                assert nsp_probs.shape[-1] == 2
                output.append(nsp_probs[:, 0])

            output = torch.cat(output, 0).view(eval_batch_size, num_rounds,
                                               num_options)
            sparse_metrics.observe(output, gt_option_inds)
            output = output[torch.arange(output.size(0)),
                            gt_relevance_round_id - 1, :]
            ndcg.observe(output, gt_relevance)
            batch_idx += 1

    dialog_encoder.train()
    print("tot eval batches", batch_idx)
    all_metrics = {}
    all_metrics.update(sparse_metrics.retrieve(reset=True))
    all_metrics.update(ndcg.retrieve(reset=True))

    return all_metrics
コード例 #19
0
            del pretrained_dict, pretrained_dict_model, pretrained_dict_optimizer, pretrained_dict_scheduler, \
                model_dict, optimizer_dict
            torch.cuda.empty_cache()

    num_iter_epoch = dataset.numDataPoints['train'] // (params['batch_size'] // params['sequences_per_image'] if (params['batch_size'] // params['sequences_per_image']) \
         else 1 if not params['overfit'] else 5 )
    print('\n%d iter per epoch.' % num_iter_epoch)

    dialog_encoder = nn.DataParallel(dialog_encoder)
    dialog_encoder.to(device)

    start_t = timer()
    optimizer.zero_grad()

    for epoch_id, idx, batch in batch_iter(dataloader, params):

        iter_id = start_iter_id + idx + (epoch_id * num_iter_epoch)
        dialog_encoder.train()
        # expand image features,
        orig_features = batch['image_feat']
        orig_spatials = batch['image_loc']
        orig_image_mask = batch['image_mask']
        orig_image_target = batch['image_target']
        orig_image_label = batch['image_label']

        num_rounds = batch["tokens"].shape[1]
        num_samples = batch["tokens"].shape[2]

        features = orig_features.unsqueeze(1).unsqueeze(1).expand(
            orig_features.shape[0], num_rounds, num_samples,
コード例 #20
0
ファイル: predict.py プロジェクト: julietang1123/NFETC-CLSC
def get_types(model_name, input_file, output_file):
	checkpoint_file = os.path.join(config.CHECKPOINT_DIR, model_name)
	type2id, typeDict = pkl_utils._load(config.WIKI_TYPE)
	id2type = {type2id[x]:x for x in type2id.keys()}

	df = pd.read_csv(input_file, sep="\t", names=["r", "e1", "x1", "y1", "e2", "x2", "y2", "s"]) 
	n = df.shape[0]
	words1 = np.array(df.s)
	mentions1 = np.array(df.e1)
	positions1 = np.array([[x, y] for x, y in zip(df.x1, df.y1+1)])
	words2 = np.array(df.s)
	mentions2 = np.array(df.e2)
	positions2 = np.array([[x, y] for x, y in zip(df.x2, df.y2+1)])
	
	words = np.concatenate([words1, words2])
	mentions = np.concatenate([mentions1, mentions2])
	positions = np.concatenate([positions1, positions2])

	embedding = embedding_utils.Embedding.restore(checkpoint_file)

	textlen = np.array([embedding.len_transform1(x) for x in words])
	words = np.array([embedding.text_transform1(x) for x in words])
	mentionlen = np.array([embedding.len_transform2(x) for x in mentions])
	mentions = np.array([embedding.text_transform2(x) for x in mentions])
	positions = np.array([embedding.position_transform(x) for x in positions])
	labels = np.zeros(2*n)
	test_set = list(zip(words, textlen, mentions, mentionlen, positions, labels))

	graph = tf.Graph()
	with graph.as_default():
		sess = tf.Session()
		saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
		saver.restore(sess, checkpoint_file)

		input_words = graph.get_operation_by_name("input_words").outputs[0]
		input_textlen = graph.get_operation_by_name("input_textlen").outputs[0]
		input_mentions = graph.get_operation_by_name("input_mentions").outputs[0]
		input_mentionlen = graph.get_operation_by_name("input_mentionlen").outputs[0]
		input_positions = graph.get_operation_by_name("input_positions").outputs[0]
		phase = graph.get_operation_by_name("phase").outputs[0]
		dense_dropout = graph.get_operation_by_name("dense_dropout").outputs[0]
		rnn_dropout = graph.get_operation_by_name("rnn_dropout").outputs[0]

		pred_op = graph.get_operation_by_name("output/predictions").outputs[0]
		batches = data_utils.batch_iter(test_set, 512, 1, shuffle=False)
		all_predictions = []
		for batch in batches:
			words_batch, textlen_batch, mentions_batch, mentionlen_batch, positions_batch, labels_batch = zip(*batch)
			feed = {
				input_words: words_batch,
				input_textlen: textlen_batch,
				input_mentions: mentions_batch,
				input_mentionlen: mentionlen_batch,
				input_positions: positions_batch,
				phase: False,
				dense_dropout: 1.0,
				rnn_dropout: 1.0
			}
			batch_predictions = sess.run(pred_op, feed_dict=feed)
			all_predictions = np.concatenate([all_predictions, batch_predictions])
	
	df["t1"] = all_predictions[:n]
	df["t2"] = all_predictions[n:]
	df["t1"] = df["t1"].map(id2type)
	df["t2"] = df["t2"].map(id2type)
	df.to_csv(output_file, sep="\t", header=False, index=False)
コード例 #21
0
ファイル: test.py プロジェクト: aayux/quinn
                                  log_device_placement=False)
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        # Load the saved meta graph and restore variables
        saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
        saver.restore(sess, checkpoint_file)

        # Get the placeholders from the graph by name
        input_x = graph.get_operation_by_name("input_x").outputs[0]
        attention_map = graph.get_operation_by_name("attention_map").outputs[0]

        # Tensors we want to evaluate
        scores = graph.get_operation_by_name("output/scores").outputs[0]

        # Generate batches for one epoch
        batches = utils.batch_iter(list(zip(x_test, x_test_map)), batch_size,
                                   1)

        # Collect the prediction scores here
        pred_scores = []

        for batch in batches:
            x_test_batch, x_test_batch_map = zip(*batch)
            batch_scores = sess.run(scores, {
                input_x: x_test_batch,
                attention_map: x_test_batch_map
            })
            pred_scores = np.concatenate([pred_scores, batch_scores])

predictions = np.array([1 if score > 0.05 else 0 for score in pred_scores])

mae = mean_absolute_error(y_test_prob, pred_scores)
コード例 #22
0
ファイル: evaluate.py プロジェクト: hudaAlamri/visdial-bert
def eval_ai_generate(dataloader, params, eval_batch_size, split='test'):
    sparse_metrics = SparseGTMetrics()
    ranks_json = []
    dialog_encoder.eval()
    batch_idx = 0
    with torch.no_grad():
        batch_size = 500 * (params['n_gpus'] / 8)
        batch_size = min([1, 2, 4, 5, 100, 1000, 200, 8, 10, 40, 50, 500, 20, 25, 250, 125], \
            key=lambda x: abs(x-batch_size) if x <= batch_size else float("inf"))
        print("batch size for evaluation", batch_size)
        for epochId, _, batch in tqdm(batch_iter(dataloader, params)):
            if epochId == 1:
                break

            tokens = batch['tokens']
            num_rounds = tokens.shape[1]
            num_options = tokens.shape[2]
            tokens = tokens.view(-1, tokens.shape[-1])
            segments = batch['segments']
            segments = segments.view(-1, segments.shape[-1])
            sep_indices = batch['sep_indices']
            sep_indices = sep_indices.view(-1, sep_indices.shape[-1])
            mask = batch['mask']
            mask = mask.view(-1, mask.shape[-1])
            hist_len = batch['hist_len']
            hist_len = hist_len.view(-1)
            gt_option_inds = batch['gt_option_inds']
            output = []

            assert (eval_batch_size * num_rounds *
                    num_options) // batch_size == (eval_batch_size * num_rounds
                                                   * num_options) / batch_size
            for j in range(
                (eval_batch_size * num_rounds * num_options) // batch_size):
                # create chunks of the original batch
                item = {}
                item['tokens'] = tokens[j * batch_size:(j + 1) * batch_size, :]
                item['segments'] = segments[j * batch_size:(j + 1) *
                                            batch_size, :]
                item['sep_indices'] = sep_indices[j * batch_size:(j + 1) *
                                                  batch_size, :]
                item['mask'] = mask[j * batch_size:(j + 1) * batch_size, :]
                item['hist_len'] = hist_len[j * batch_size:(j + 1) *
                                            batch_size]
                _, _, _, nsp_scores = forward(dialog_encoder,
                                              item,
                                              params,
                                              output_nsp_scores=True,
                                              evaluation=True)
                # normalize nsp scores
                nsp_probs = F.softmax(nsp_scores, dim=1)
                assert nsp_probs.shape[-1] == 2
                output.append(nsp_probs[:, 0])

            output = torch.cat(output, 0).view(eval_batch_size, num_rounds,
                                               num_options)
            sparse_metrics.observe(output, gt_option_inds)
            batch_idx += 1

    all_metrics = {}
    all_metrics.update(sparse_metrics.retrieve(reset=True))
    '''
            for i in range(eval_batch_size):
                ranks_json.append(
                    {
                        #"image_id": batch["image_id"][i].item(),
                        #"round_id": int(batch["round_id"][i].item()),
                        "ranks": [rank.item() for rank in ranks[i][:]],
                    }
                    )
    '''
    #return ranks_json
    return all_metrics
コード例 #23
0
ファイル: evaluate.py プロジェクト: vmurahari3/visdial-bert
def eval_ai_generate(dataloader, params, eval_batch_size, split='test'):
    ranks_json = []
    dialog_encoder.eval()
    batch_idx = 0
    with torch.no_grad():
        batch_size = 500 * (params['n_gpus'] / 8)
        batch_size = min([1, 2, 4, 5, 100, 1000, 200, 8, 10, 40, 50, 500, 20, 25, 250, 125], \
             key=lambda x: abs(x-batch_size) if x <= batch_size else float("inf"))
        print("batch size for evaluation", batch_size)
        for epochId, _, batch in batch_iter(dataloader, params):
            if epochId == 1:
                break

            tokens = batch['tokens']
            num_rounds = tokens.shape[1]
            num_options = tokens.shape[2]
            tokens = tokens.view(-1, tokens.shape[-1])
            segments = batch['segments']
            segments = segments.view(-1, segments.shape[-1])
            sep_indices = batch['sep_indices']
            sep_indices = sep_indices.view(-1, sep_indices.shape[-1])
            mask = batch['mask']
            mask = mask.view(-1, mask.shape[-1])
            hist_len = batch['hist_len']
            hist_len = hist_len.view(-1)

            # get image features
            features = batch['image_feat']
            spatials = batch['image_loc']
            image_mask = batch['image_mask']

            # expand the image features to match those of tokens etc.
            max_num_regions = features.shape[-2]
            features = features.unsqueeze(1).unsqueeze(1).expand(
                eval_batch_size, num_rounds, num_options, max_num_regions,
                2048).contiguous()
            spatials = spatials.unsqueeze(1).unsqueeze(1).expand(
                eval_batch_size, num_rounds, num_options, max_num_regions,
                5).contiguous()
            image_mask = image_mask.unsqueeze(1).unsqueeze(1).expand(
                eval_batch_size, num_rounds, num_options,
                max_num_regions).contiguous()

            features = features.view(-1, max_num_regions, 2048)
            spatials = spatials.view(-1, max_num_regions, 5)
            image_mask = image_mask.view(-1, max_num_regions)

            assert tokens.shape[0] == segments.shape[0] == sep_indices.shape[0] == mask.shape[0] == \
                hist_len.shape[0] == features.shape[0] == spatials.shape[0] == \
                    image_mask.shape[0] == num_rounds * num_options * eval_batch_size

            output = []
            assert (eval_batch_size * num_rounds *
                    num_options) // batch_size == (eval_batch_size * num_rounds
                                                   * num_options) / batch_size
            for j in range(
                (eval_batch_size * num_rounds * num_options) // batch_size):
                # create chunks of the original batch
                item = {}
                item['tokens'] = tokens[j * batch_size:(j + 1) * batch_size, :]
                item['segments'] = segments[j * batch_size:(j + 1) *
                                            batch_size, :]
                item['sep_indices'] = sep_indices[j * batch_size:(j + 1) *
                                                  batch_size, :]
                item['mask'] = mask[j * batch_size:(j + 1) * batch_size, :]
                item['hist_len'] = hist_len[j * batch_size:(j + 1) *
                                            batch_size]

                item['image_feat'] = features[j * batch_size:(j + 1) *
                                              batch_size, :, :]
                item['image_loc'] = spatials[j * batch_size:(j + 1) *
                                             batch_size, :, :]
                item['image_mask'] = image_mask[j * batch_size:(j + 1) *
                                                batch_size, :]

                _, _, _, _, nsp_scores = forward(dialog_encoder,
                                                 item,
                                                 params,
                                                 output_nsp_scores=True,
                                                 evaluation=True)
                # normalize nsp scores
                nsp_probs = F.softmax(nsp_scores, dim=1)
                assert nsp_probs.shape[-1] == 2
                output.append(nsp_probs[:, 0])

            # print("output shape",torch.cat(output,0).shape)
            output = torch.cat(output, 0).view(eval_batch_size, num_rounds,
                                               num_options)
            ranks = scores_to_ranks(output)
            ranks = ranks.squeeze(1)
            for i in range(eval_batch_size):
                ranks_json.append({
                    "image_id":
                    batch["image_id"][i].item(),
                    "round_id":
                    int(batch["round_id"][i].item()),
                    "ranks": [rank.item() for rank in ranks[i][:]],
                })

            batch_idx += 1
    return ranks_json
コード例 #24
0
def train_cnn():
    # Data Preparation
    # ==================================================
    if FLAGS.init_embedding_path is not None:
        embedding = np.load(FLAGS.init_embedding_path)
        print("Using pre-trained word embedding which shape is {}\n".format(
            embedding.shape))
        FLAGS.vocab_size = embedding.shape[0]
        FLAGS.embedding_size = embedding.shape[1]
    if FLAGS.init_model_path is not None:
        assert os.path.isdir(
            FLAGS.init_model_path), "init_model_path must be a directory\n"
        ckpt = tf.train.get_checkpoint_state(FLAGS.init_model_path)
        assert ckpt, "No checkpoint found in {}\n".format(
            FLAGS.init_model_path)
        assert ckpt.model_checkpoint_path, "No model_checkpoint_path found in checkpoint\n"

    # Create root directory
    timestamp = str(int(time.time()))
    root_dir = os.path.join(os.path.curdir, 'runs', 'textcnn',
                            'trained_result_' + timestamp)
    os.makedirs(root_dir)

    # Load data
    print("Loading data...\n")
    x_data = np.loadtxt(FLAGS.x_data_file)
    x_data = x_data.reshape(20480, 20, 30)
    x_data = x_data.reshape(20480, 600)
    y_data = np.loadtxt(FLAGS.y_data_file)
    print("data load finished")

    # Split dataset
    # x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=FLAGS.test_size, stratify=y_data, random_state=0)
    # x_val, x_test, y_val, y_test = train_test_split(x_test, y_test, test_size=0.5, random_state=0)

    # Training
    # ==================================================
    with tf.Graph().as_default():
        tf_config = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        tf_config.gpu_options.allow_growth = FLAGS.gpu_allow_growth

        with tf.Session(config=tf_config).as_default() as sess:
            cnn = TextCNN(vocab_size=FLAGS.vocab_size,
                          embedding_size=FLAGS.embedding_size,
                          sequence_length=FLAGS.sequence_length,
                          filter_sizes=list(
                              map(int, FLAGS.filter_sizes.split(","))),
                          num_filters=FLAGS.num_filters,
                          num_classes=FLAGS.num_classes,
                          learning_rate=FLAGS.learning_rate,
                          grad_clip=FLAGS.grad_clip,
                          l2_reg_lambda=FLAGS.l2_reg_lambda)

            # Output directory for models and summaries
            out_dir = os.path.abspath(root_dir)
            print("Writing to {}...\n".format(out_dir))

            # Summaries for loss and accuracy
            tf.summary.scalar("loss", cnn.loss)
            tf.summary.scalar("accuracy", cnn.accuracy)
            merged_summary = tf.summary.merge_all()

            # Summaries dictionary
            train_summary_dir = os.path.join(out_dir, 'summaries', 'train')
            val_summary_dir = os.path.join(out_dir, 'summaries', 'val')
            train_summary_writer = tf.summary.FileWriter(
                train_summary_dir, sess.graph)
            val_summary_writer = tf.summary.FileWriter(val_summary_dir,
                                                       sess.graph)

            # Checkpoint directory, will not create itself
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, 'checkpoints'))
            checkpoint_prefix = os.path.join(checkpoint_dir, 'model.ckpt')
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            # Using pre-trained word embedding
            # if FLAGS.init_embedding_path is not None:
            #     sess.run(cnn.embedding.assign(embedding))
            #     del embedding

            # Continue training from saved model
            if FLAGS.init_model_path is not None:
                saver.restore(sess, ckpt.model_checkpoint_path)

            # Training start
            print("Start training...\n")
            best_at_step = 0
            best_val_accuracy = 0

            #****************************************
            # Generate train batches
            train_batches = data_utils.batch_iter(list(zip(x_data, y_data)),
                                                  FLAGS.batch_size)
            start = time.time()

            cnn_feature_temp = []
            for batch in train_batches:
                # Training model on x_batch and y_batch
                x_batch, y_batch = zip(*batch)
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.keep_prob: FLAGS.dropout_keep_prob,
                    cnn.is_training: True
                }
                pooled_concat_flat, _, global_step, train_summaries, train_loss, train_accuracy = sess.run(
                    [
                        cnn.pooled_concat_flat, cnn.train_op, cnn.global_step,
                        merged_summary, cnn.loss, cnn.accuracy
                    ],
                    feed_dict=feed_dict)
                cnn_feature_temp.append(pooled_concat_flat.tolist())

            np.savetxt(
                "../data/char_data/char_dim/char_cnn_embeddings_20_30_dim256.txt",
                np.array(cnn_feature_temp).reshape(20480, 192))
            # cnn_feature.append(cnn_feature_temp)
            # with open('./embeddings.txt','w', encoding='utf-8')as f:
            #     for line in cnn_feature_temp:
            #         for content in line :
            #                 f.write(str(content).lstrip('[').rstrip(']') + '\n')

            print('finished training')
コード例 #25
0
ファイル: main.py プロジェクト: Hbenmazi/AL-ITS
def train(args: dict):
    model_name = args['--model']
    batch_size = int(args['--batch-size'])
    max_patience = int(args['--max-patience'])
    max_num_trial = int(args['--max-num-trial'])
    lr_decay = float(args['--lr-decay'])
    train_ratio = float(args['--train-ratio'])
    model_save_to = args['--model-save-to']
    max_epoch = int(args['--max-epoch'])

    model_path = args['model-path']
    train_data_path = args['train-data-path']
    pid2pidx_path = args['pid2pidx-path']
    cid2cidx_path = args['cid2cidx-path']
    pidx2cidx_path = args['pidx2cidx-path']
    cidx2cname_path = args['cidx2cname-path']

    raw_data, problem_map = read_data_from_file(train_data_path, pid2pidx_path,
                                                cid2cidx_path, pidx2cidx_path,
                                                cidx2cname_path)

    num_problems = len(problem_map)
    num_concepts = problem_map.num_concepts

    train_size = int(train_ratio * len(raw_data))
    val_size = len(raw_data) - train_size

    train_dataset, val_dataset = random_split(raw_data, [train_size, val_size])
    unpacked_train_dataset = unpack_data(train_dataset)
    unpacked_val_dataset = unpack_data(val_dataset)

    if model_name == 'DKT':
        model = DKT(num_problems=num_problems,
                    hidden_size=int(args['--hidden-size']),
                    dropout_rate=float(args['--dropout']))
        criterion = torch.nn.BCELoss(reduction='mean')
        optimizer = optim.Adam(model.parameters(), lr=float(args['--lr']))
    elif model_name == "DKTEnhanced":
        model = DKTEnhanced(problem_map=problem_map,
                            hidden_size=int(args['--hidden-size']),
                            dropout_rate=float(args['--dropout']))
        criterion = PredictionConsistentBCELoss(
            lambda_r=float(args['--lambda-r']))
        optimizer = optim.Adam(model.parameters(), lr=float(args['--lr']))
    else:
        raise ValueError("wrong value of model_name[{}]".format(model_name))

    if args['--gpu'] is not None:
        device = torch.device(
            "cuda:" + args['--gpu'] if torch.cuda.is_available() else "cpu")
    else:
        device = torch.device("cpu")

    logging.info("Using {}".format(device))
    model.to(device)

    # for p in model.parameters():
    #     p.data.uniform_(-0.1, 0.1)

    train_iter = 0
    patience = 0
    num_trial = 0
    hist_valid_scores = []

    # print statistic infomation
    with torch.no_grad():
        model.eval()
        t_auc_n, t_auc_c, t_acc, t_rmse, t_mae = evaluate(
            unpacked_train_dataset, num_problems, num_concepts, model)
        v_auc_n, v_auc_c, v_acc, v_rmse, v_mae = evaluate(
            unpacked_val_dataset, num_problems, num_concepts, model)
        print("Init:")
        print("valid_auc(n) %.6f\ttrain_auc(n) %.6f\t" % (v_auc_n, t_auc_n))
        print("valid_auc(c) %.6f\ttrain_auc(c) %.6f\t" % (v_auc_c, t_auc_c))
        print("valid_acc    %.6f\ttrain_acc    %.6f\t" % (v_acc, t_acc))
        print("valid_rmse   %.6f\ttrain_rmse   %.6f\t" % (v_rmse, t_rmse))
        print("valid_mae    %.6f\ttrain_mae    %.6f\t" % (v_mae, t_mae))

    batch_num = math.ceil(len(train_dataset) / batch_size)
    for epoch in range(max_epoch):  # loop over the dataset multiple times
        for batch_data in tqdm.tqdm(batch_iter(train_dataset,
                                               batch_size=batch_size),
                                    desc="Epoch[{}]".format(epoch + 1),
                                    total=batch_num):
            train_iter += 1

            model.train()
            problem_seqs, concept_seqs, answer_seqs = batch_data

            # get y_next_true ans y_cur_true
            next_answer_seqs = copy.deepcopy(answer_seqs)
            for n_ans_seq in next_answer_seqs:
                del n_ans_seq[0]

            y_next_true = list(itertools.chain.from_iterable(next_answer_seqs))
            y_next_true = torch.tensor(y_next_true,
                                       dtype=torch.float,
                                       device=model.device)
            y_cur_true = list(itertools.chain.from_iterable(answer_seqs))
            y_cur_true = torch.tensor(y_cur_true,
                                      dtype=torch.float,
                                      device=model.device)

            # process the input data
            response_data, concept_data, seq_lengths = process_data(
                batch_data, num_problems, num_concepts, device=model.device)

            # forward + backward + optimize
            y_next_pred, y_cur_pred, batch_future_pred, batch_concept_mastery = model(
                response_data, concept_data, seq_lengths)

            if model_name == 'DKT':
                loss = criterion(y_next_pred, y_next_true)
            elif model_name == 'DKTEnhanced':
                loss = criterion(y_next_pred, y_next_true, y_cur_pred,
                                 y_cur_true)
            else:
                raise ValueError

            loss.backward()

            # clip gradient
            nn.utils.clip_grad_norm_(model.parameters(),
                                     max_norm=float(args['--clip-grad']))
            optimizer.step()

            # zero the parameter gradients
            optimizer.zero_grad()

        # print statistic information
        with torch.no_grad():
            model.eval()
            t_auc_n, t_auc_c, t_acc, t_rmse, t_mae = evaluate(
                unpacked_train_dataset, num_problems, num_concepts, model)
            v_auc_n, v_auc_c, v_acc, v_rmse, v_mae = evaluate(
                unpacked_val_dataset, num_problems, num_concepts, model)
            print("valid_auc(n) %.6f\ttrain_auc(n) %.6f\t" %
                  (v_auc_n, t_auc_n))
            print("valid_auc(c) %.6f\ttrain_auc(c) %.6f\t" %
                  (v_auc_c, t_auc_c))
            print("valid_acc    %.6f\ttrain_acc    %.6f\t" % (v_acc, t_acc))
            print("valid_rmse   %.6f\ttrain_rmse   %.6f\t" % (v_rmse, t_rmse))
            print("valid_mae    %.6f\ttrain_mae    %.6f\t" % (v_mae, t_mae))

        # check for early stop
        is_better = len(
            hist_valid_scores) == 0 or v_auc_n > max(hist_valid_scores)
        hist_valid_scores.append(v_auc_n)

        if is_better:
            patience = 0
            if not os.path.exists(model_save_to):
                os.makedirs(model_save_to)
            model.save(model_path)

            # also save the optimizers' state
            torch.save(optimizer.state_dict(), model_path + '.optim')

        elif patience < int(max_patience):
            patience += 1
            print('hit patience %d' % patience)

            if patience == int(max_patience):
                num_trial += 1
                print('hit #%d trial' % num_trial)
                if num_trial == int(max_num_trial):
                    print('early stop!')
                    break

                # decay lr, and restore from previously best checkpoint
                lr = optimizer.param_groups[0]['lr'] * float(lr_decay)

                # load model
                params = torch.load(model_path,
                                    map_location=lambda storage, loc: storage)
                model.load_state_dict(params['state_dict'])
                model = model.to(device)

                optimizer.load_state_dict(torch.load(model_path + '.optim'))

                # set new lr
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr

                # reset patience
                patience = 0

    print('Finished Training')
def train_rnn():
    # Data Preparation
    # ==================================================

    if FLAGS.init_embedding_path is not None:
        embedding = np.load(FLAGS.init_embedding_path)
        print("Using pre-trained word embedding which shape is {}\n".format(embedding.shape))
        FLAGS.vocab_size = embedding.shape[0]
        FLAGS.embedding_size = embedding.shape[1]

    if FLAGS.init_model_path is not None:
        assert os.path.isdir(FLAGS.init_model_path), "init_model_path must be a directory\n"
        ckpt = tf.train.get_checkpoint_state(FLAGS.init_model_path)
        assert ckpt, "No checkpoint found in {}\n".format(FLAGS.init_model_path)
        assert ckpt.model_checkpoint_path, "No model_checkpoint_path found in checkpoint\n"

    # Create root directory
    timestamp = str(int(time.time()))
    root_dir = os.path.join(os.path.curdir, 'runs', 'textrnn', 'trained_result_' + timestamp)
    os.makedirs(root_dir)

    # Load data
    # print("Loading data...\n")
    # x, y = data_helpers.load_data(FLAGS.data_file, FLAGS.sequence_length, FLAGS.vocab_size, root_dir=root_dir)
    # FLAGS.num_classes = len(y[0])
    print("Loading data...\n")
    x_data = np.loadtxt(FLAGS.x_data_file)
    y_data = np.loadtxt(FLAGS.y_data_file)
    print("data load finished")

    # Split dataset
    # x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=FLAGS.test_size, stratify=y_data, random_state=0)
    # x_val, x_test, y_val, y_test = train_test_split(x_test, y_test, test_size=0.5, random_state=0)

    # Training
    # ==================================================
    with tf.Graph().as_default():
        tf_config = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        tf_config.gpu_options.allow_growth = FLAGS.gpu_allow_growth

        with tf.Session(config=tf_config).as_default() as sess:
            rnn = TextRNN(
		vocab_size=FLAGS.vocab_size,
		embedding_size=FLAGS.embedding_size,
		sequence_length=FLAGS.sequence_length,
		rnn_size=FLAGS.rnn_size,
                num_layers=FLAGS.num_layers,
                attention_size=FLAGS.attention_size,
            	num_classes=FLAGS.num_classes,
		learning_rate=FLAGS.learning_rate,
		grad_clip=FLAGS.grad_clip)

            # Output directory for models and summaries
            out_dir = os.path.abspath(root_dir)
            print("Writing to {}...\n".format(out_dir))

            # Summaries for loss and accuracy
            tf.summary.scalar("loss", rnn.loss)
            tf.summary.scalar("accuracy", rnn.accuracy)
            merged_summary = tf.summary.merge_all()

            # Summaries dictionary
            train_summary_dir = os.path.join(out_dir, 'summaries', 'train')
            val_summary_dir = os.path.join(out_dir, 'summaries', 'val')
            train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)
            val_summary_writer = tf.summary.FileWriter(val_summary_dir, sess.graph)

            # Checkpoint directory, will not create itself
            checkpoint_dir = os.path.abspath(os.path.join(out_dir, 'checkpoints'))
            checkpoint_prefix = os.path.join(checkpoint_dir, 'model.ckpt')
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            # Using pre-trained word embedding
            if FLAGS.init_embedding_path is not None:
                sess.run(rnn.embedding.assign(embedding))
                del embedding

            # Continue training from saved model
            if FLAGS.init_model_path is not None:
                saver.restore(sess, ckpt.model_checkpoint_path)

            # Training start
            print("Start training...\n")
            best_at_step = 0
            best_val_accuracy = 0
            train_batches = data_utils.batch_iter(list(zip(x_data, y_data)), FLAGS.batch_size)
            start = time.time()
            rnn_feature_temp = []
            for batch in train_batches:
                # Training model on x_batch and y_batch
                x_batch, y_batch = zip(*batch)
                # seq_len_train = data_helpers.real_len(x_batch)
                seq_len_train = data_utils.real_len(x_batch)
                feed_dict = {rnn.input_x: x_batch, rnn.input_y: y_batch, rnn.seq_len: seq_len_train, rnn.keep_prob: FLAGS.dropout_keep_prob}
                attention_output, _, global_step, train_summaries, train_loss, train_accuracy = sess.run([rnn.attention_output,rnn.train_op, rnn.global_step,
                        merged_summary, rnn.loss, rnn.accuracy], feed_dict=feed_dict)
                rnn_feature_temp.append(attention_output.tolist())
            print(rnn_feature_temp[0:2])
            print(len(rnn_feature_temp))
            np.savetxt("../data/word_data/word_dim/word_rnn_attention_embeddings_600_dim256.txt", np.array(rnn_feature_temp).reshape(20480,200))
コード例 #27
0
                                     log_device_placement=False)
        sess_config.gpu_options.allow_growth = True

        with tf.Session(config=sess_config) as sess:
            saver = tf.train.Saver(max_to_keep=5)
            writer = tf.summary.FileWriter(log_dir)
            sess.run(tf.global_variables_initializer())

            best_r1i7 = -1.0
            score_writer = open(os.path.join(model_dir, "eval_results.txt"),
                                mode="w",
                                encoding="utf-8")

            for epoch in range(configs.epochs):
                for data in tqdm(
                        batch_iter(train_set, video_features,
                                   configs.batch_size, configs.extend, True),
                        total=num_train_batches,
                        desc="Epoch %d / %d" % (epoch + 1, configs.epochs)):

                    # run the model
                    feed_dict = get_feed_dict(data, model, configs.drop_rate)
                    _, loss, h_loss, global_step = sess.run(
                        [
                            model.train_op, model.loss, model.highlight_loss,
                            model.global_step
                        ],
                        feed_dict=feed_dict)

                    if global_step % configs.period == 0:
                        write_tf_summary(writer,
                                         [("train/loss", loss),
コード例 #28
0
def train_rnn():
    # Data Preparation
    # ==================================================

    if FLAGS.init_embedding_path is not None:
        embedding = np.load(FLAGS.init_embedding_path)
        print("Using pre-trained word embedding which shape is {}\n".format(embedding.shape))
        FLAGS.vocab_size = embedding.shape[0]
        FLAGS.embedding_size = embedding.shape[1]

    if FLAGS.init_model_path is not None:
        assert os.path.isdir(FLAGS.init_model_path), "init_model_path must be a directory\n"
        ckpt = tf.train.get_checkpoint_state(FLAGS.init_model_path)
        assert ckpt, "No checkpoint found in {}\n".format(FLAGS.init_model_path)
        assert ckpt.model_checkpoint_path, "No model_checkpoint_path found in checkpoint\n"

    # Create root directory
    timestamp = str(int(time.time()))
    root_dir = os.path.join(os.path.curdir, 'runs', 'textrnn', 'trained_result_' + timestamp)
    os.makedirs(root_dir)

    # Load data
    # print("Loading data...\n")
    # x, y = data_helpers.load_data(FLAGS.data_file, FLAGS.sequence_length, FLAGS.vocab_size, root_dir=root_dir)
    # FLAGS.num_classes = len(y[0])
    print("Loading data...\n")
    x_data = np.loadtxt(FLAGS.x_data_file)
    y_data = np.loadtxt(FLAGS.y_data_file)
    print("data load finished")

    # Split dataset
    x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=FLAGS.test_size, stratify=y_data, random_state=0)
    x_val, x_test, y_val, y_test = train_test_split(x_test, y_test, test_size=0.5, random_state=0)

    # Training
    # ==================================================
    with tf.Graph().as_default():
        tf_config = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        tf_config.gpu_options.allow_growth = FLAGS.gpu_allow_growth

        with tf.Session(config=tf_config).as_default() as sess:
            rnn = TextRNN(
		vocab_size=FLAGS.vocab_size,
		embedding_size=FLAGS.embedding_size,
		sequence_length=FLAGS.sequence_length,
		rnn_size=FLAGS.rnn_size,
                num_layers=FLAGS.num_layers,
                attention_size=FLAGS.attention_size,
            	num_classes=FLAGS.num_classes,
		learning_rate=FLAGS.learning_rate,
		grad_clip=FLAGS.grad_clip)

            # Output directory for models and summaries
            out_dir = os.path.abspath(root_dir)
            print("Writing to {}...\n".format(out_dir))

            # Summaries for loss and accuracy
            tf.summary.scalar("loss", rnn.loss)
            tf.summary.scalar("accuracy", rnn.accuracy)
            merged_summary = tf.summary.merge_all()

            # Summaries dictionary
            train_summary_dir = os.path.join(out_dir, 'summaries', 'train')
            val_summary_dir = os.path.join(out_dir, 'summaries', 'val')
            train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)
            val_summary_writer = tf.summary.FileWriter(val_summary_dir, sess.graph)

            # Checkpoint directory, will not create itself
            checkpoint_dir = os.path.abspath(os.path.join(out_dir, 'checkpoints'))
            checkpoint_prefix = os.path.join(checkpoint_dir, 'model.ckpt')
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            # Using pre-trained word embedding
            if FLAGS.init_embedding_path is not None:
                sess.run(rnn.embedding.assign(embedding))
                del embedding

            # Continue training from saved model
            if FLAGS.init_model_path is not None:
                saver.restore(sess, ckpt.model_checkpoint_path)

            # Training start
            print("Start training...\n")
            best_at_step = 0
            best_val_accuracy = 0
            for epoch in range(FLAGS.num_epochs):
                # Generate train batches
                train_batches = data_utils.batch_iter(list(zip(x_train, y_train)), FLAGS.batch_size)
                start = time.time()
                for batch in train_batches:
                    # Training model on x_batch and y_batch
                    x_batch, y_batch = zip(*batch)
                    # seq_len_train = data_helpers.real_len(x_batch)
                    seq_len_train = data_utils.real_len(x_batch)
                    feed_dict = {rnn.input_x: x_batch, rnn.input_y: y_batch, rnn.seq_len: seq_len_train, rnn.keep_prob: FLAGS.dropout_keep_prob}
                    _, global_step, train_summaries, train_loss, train_accuracy = sess.run([rnn.train_op, rnn.global_step,
                        merged_summary, rnn.loss, rnn.accuracy], feed_dict=feed_dict)

                    # Evaluates model on val set
                    if global_step % FLAGS.evaluate_every == 0:
                        end = time.time()
                        train_summary_writer.add_summary(train_summaries, global_step)
                        seq_len_val = data_utils.real_len(x_val)
                        feed_dict = {rnn.input_x: x_val, rnn.input_y: y_val, rnn.seq_len: seq_len_val, rnn.keep_prob: 1.0}
                        val_summaries, val_loss, val_accuracy = sess.run([merged_summary, rnn.loss, rnn.accuracy], feed_dict=feed_dict)
                        val_summary_writer.add_summary(val_summaries, global_step)
                        print("Epoch: {}, global step: {}, training speed: {:.3f}sec/batch".format(epoch,
                            global_step, (end - start) / FLAGS.evaluate_every))
                        print("train loss: {:.3f}, train accuracy: {:.3f}, val loss: {:.3f}, val accuracy: {:.3f}\n".format(train_loss,
                            train_accuracy, val_loss, val_accuracy))
                        # If improved, save the model
                        if val_accuracy > best_val_accuracy:
                            print("Get a best val accuracy at step {}, model saving...\n".format(global_step))
                            saver.save(sess, checkpoint_prefix, global_step=global_step)
                            best_val_accuracy = val_accuracy
                            best_at_step = global_step
                        start = time.time()

            # Rename the checkpoint
            best_model_prefix = checkpoint_prefix + '-' + str(best_at_step)
            os.rename(best_model_prefix + '.index', os.path.join(checkpoint_dir, 'best_model.index'))
            os.rename(best_model_prefix + '.meta', os.path.join(checkpoint_dir, 'best_model.meta'))
            os.rename(best_model_prefix + '.data-00000-of-00001', os.path.join(checkpoint_dir, 'best_model.data-00000-of-00001'))

            # Testing on test set
            print("\nTraining complete, testing the best model on test set...\n")
            saver.restore(sess, os.path.join(checkpoint_dir, 'best_model'))
            seq_len_test = data_utils.real_len(x_test)
            feed_dict = {rnn.input_x: x_test, rnn.input_y: y_test, rnn.seq_len: seq_len_test, rnn.keep_prob: 1.0}
            # y_logits, test_accuracy = sess.run([rnn.logits, rnn.accuracy], feed_dict=feed_dict)
            # label_transformer = joblib.load(os.path.join(out_dir, 'label_transformer.pkl'))
            # y_test_original = label_transformer.inverse_transform(y_test)
            # y_logits_original = label_transformer.inverse_transform(y_logits)
            # print("Precision, Recall and F1-Score:\n\n", classification_report(y_test_original, y_logits_original))
            y_pred, test_accuracy = sess.run([rnn.predictions, rnn.accuracy], feed_dict=feed_dict)
            print("Testing Accuracy: {:.3f}\n".format(test_accuracy))
            y_test_original = np.argmax(y_test, 1)
            print(y_test_original.shape)
            print("Precision, Recall and F1-Score:\n\n",
                  classification_report(y_test_original, y_pred, target_names=['体育', '军事', '医学', '文化', '汽车', '经济']))
コード例 #29
0
def train(train_x, train_y, test_x, test_y, vocab_size, embedding_size,
          pretrained_embedding, trainset_embedding_matrix, args):
    '''
    traing process + testing process

    :param train_x: training dataset
    :param train_y: training label
    :param test_x: testing dataset
    :param test_y: testing label
    :param vocab_size: number of vocabulary in embedding matrx
    :param embedding_size: embedding size for each word
    :param pretrained_embedding: pretrained word embedding matrix
    :param trainset_embedding_matrix: oov word(from train set) embedding matrix
    :param args:

    :return:
            print testing result by fixed epoch interval
    '''

    with tf.Session() as sess:

        model = WordRNN(MAX_DOCUMENT_LEN,
                        NUM_CLASS,
                        vocab_size=vocab_size,
                        embedding_size=embedding_size,
                        trainset_embedding=trainset_embedding_matrix)

        # Define training procedure
        global_step = tf.Variable(0, trainable=False)
        params = tf.trainable_variables()
        gradients = tf.gradients(model.loss, params)
        clipped_gradients, _ = tf.clip_by_global_norm(gradients, 5.0)
        optimizer = tf.train.AdamOptimizer(model.lr)
        train_op = optimizer.apply_gradients(zip(clipped_gradients, params),
                                             global_step=global_step)

        # Initialize all variables
        feed_dict_emb = {model.x_init: np.float32(pretrained_embedding)}
        sess.run(tf.global_variables_initializer(), feed_dict=feed_dict_emb)

        def train_step(batch_x, batch_y):
            feed_dict = {
                model.x: batch_x,
                model.y: batch_y,
                model.keep_prob: 0.8,
            }
            _, step, loss = sess.run([train_op, global_step, model.loss],
                                     feed_dict=feed_dict)

            return loss

        def test_accuracy(test_x, test_y):
            '''

            :param test_x: testing dataset
            :param test_y: testing label
            :return:
                eval_loss: loss
                accuracy: accuracy
                ave_precison_score: average precison

            '''

            true_onehot_labels = []
            predicted_onehot_scores = []

            predicted_onehot_labels_t2 = []

            test_batches = batch_iter(test_x, test_y, BATCH_SIZE, 1)
            eval_loss, eval_counter = 0., 0

            for test_batch_x, test_batch_y in test_batches:
                scores, cur_loss = sess.run(
                    [model.scores, model.loss],
                    feed_dict={
                        model.x: test_batch_x,
                        model.y: test_batch_y,
                        model.keep_prob: 1.0
                    })

                for i in test_batch_y:
                    true_onehot_labels.append(i)
                for j in scores:
                    predicted_onehot_scores.append(j)

                batch_predicted_onehot_labels = get_onehot_label_topk(
                    scores=scores, top_num=NUM_LABEL)

                for i in batch_predicted_onehot_labels:
                    predicted_onehot_labels_t2.append(i)

                eval_loss = eval_loss + cur_loss
                eval_counter = eval_counter + 1

            #metrics
            eval_loss = float(eval_loss / eval_counter)

            ave_precision_score = average_precision_score(
                y_true=np.array(true_onehot_labels),
                y_score=np.array(predicted_onehot_scores),
                average='micro')

            accuracy = accuracy_score(np.array(true_onehot_labels),
                                      np.array(predicted_onehot_labels_t2))

            return eval_loss, accuracy, ave_precision_score

        # Training loop
        start = time.time()

        batches = batch_iter(train_x, train_y, BATCH_SIZE, NUM_EPOCHS)

        st = time.time()
        steps_per_epoch = int(num_train / BATCH_SIZE)
        for batch_x, batch_y in batches:
            step = tf.train.global_step(sess, global_step)
            num_epoch = int(step / steps_per_epoch)

            loss = train_step(batch_x, batch_y)

            if step % 50 == 0:

                eval_loss, acc, eval_prc = test_accuracy(test_x, test_y)

                mode = "w" if step == 0 else "a"
                with open(args.summary_dir + "-accuracy.txt", mode) as f:
                    print("epo: {}, step: {}, loss: {}, accuracy: {}".format(
                        num_epoch, step, eval_loss, acc),
                          file=f)

                print(
                    "epoch: {}, step: {}, loss: {}, steps_per_epoch: {}, batch size: {}"
                    .format(num_epoch, step, eval_loss, steps_per_epoch,
                            BATCH_SIZE))

                print("Accuracy:{},Avg_Precision: {}, loss:{}".format(
                    acc, eval_prc, eval_loss))
                print("time of one epoch: {}\n".format(time.time() - st))
                st = time.time()

        print('training time', time.time() - start)
        # #
        test_start_time = time.time()
        eval_loss, acc, eval_prc = test_accuracy(test_x, test_y)
        print('testing time', time.time() - test_start_time)
        print(eval_loss, acc)
コード例 #30
0
ファイル: train.py プロジェクト: aayux/quinn
            feed_dict = {
                quinn.input_x: x_batch,
                quinn.input_y: y_batch,
                quinn.attention_map: x_map
            }
            step, loss, mae, _update_op = sess.run(
                [global_step, quinn.loss, quinn.mae, quinn.update_op],
                feed_dict)

            time_str = datetime.datetime.now().isoformat()
            print("{}: step {}, loss {:g}, mae {:g}".format(
                time_str, step, loss, mae))

        # Generate batches
        batches = utils.batch_iter(
            list(zip(x_train, x_train_map, y_train_prob)), batch_size,
            num_epochs)

        # Training loop. For each batch...
        for batch in batches:
            x_batch, x_map, y_batch = zip(*batch)
            train_step(x_batch, x_map, y_batch)
            current_step = tf.train.global_step(sess, global_step)

            epoch_step = (int((len(x_train[0]) - 1) / batch_size) + 1)

            if current_step % epoch_step == 0:
                print("\nValidation:")

                # Randomly draw a validation batch
                shuff_idx = np.random.permutation(np.arange(batch_size))