Example #1
0
    def eval(self, data_set, eval_tensor_names=(), eval_ph_names=(), num_batches=None):
        # TODO : eval_ph_names
        assert isinstance(data_set, DataSet)
        assert self.initialized, "Initialize tower before training."

        params = self.params
        sess = self.sess
        epoch_op = self.tensors['epoch']
        epoch = sess.run(epoch_op)
        progress = params.progress
        num_batches = num_batches or data_set.get_num_batches(partial=True)
        num_iters = int(np.ceil(num_batches / self.num_towers))
        num_corrects, total, total_loss = 0, 0, 0.0
        eval_values = []
        idxs = []
        N = data_set.batch_size * num_batches
        if N > data_set.num_examples:
            N = data_set.num_examples
        eval_args = self._get_eval_args(epoch)
        string = "eval on %s, N=%d|" % (data_set.name, N)
        if progress:
            pbar = get_pbar(num_iters, prefix=string).start()
        for iter_idx in range(num_iters):
            batches = []
            for _ in range(self.num_towers):
                if data_set.has_next_batch(partial=True):
                    idxs.extend(data_set.get_batch_idxs(partial=True))
                    batches.append(data_set.get_next_labeled_batch(partial=True))
            (cur_num_corrects, cur_avg_loss, _, global_step), eval_value_batches = \
                self._eval_batches(batches, eval_tensor_names=eval_tensor_names, **eval_args)
            num_corrects += cur_num_corrects
            cur_num = sum(len(batch[0]) for batch in batches)
            total += cur_num
            for eval_value_batch in eval_value_batches:
                eval_values.append([x.tolist() for x in eval_value_batch])  # numpy.array.toList
            total_loss += cur_avg_loss * cur_num
            if progress:
                pbar.update(iter_idx)
        if progress:
            pbar.finish()
        loss = float(total_loss) / total
        data_set.reset()

        acc = float(num_corrects) / total
        print("%s at epoch %d: acc = %.2f%% = %d / %d, loss = %.4f" %
              (data_set.name, epoch, 100 * acc, num_corrects, total, loss))

        # For outputting eval json files
        if len(eval_tensor_names) > 0:
            ids = [data_set.idx2id[idx] for idx in idxs]
            zipped_eval_values = [list(itertools.chain(*each)) for each in zip(*eval_values)]
            values = {name: values for name, values in zip(eval_tensor_names, zipped_eval_values)}
            out = {'ids': ids, 'values': values}
            eval_path = os.path.join(params.eval_dir, "%s_%s.json" % (data_set.name, str(epoch).zfill(4)))
            json.dump(out, open(eval_path, 'w'))
        return loss, acc
Example #2
0
    def eval(self, data_set, eval_tensor_names=(), eval_ph_names=(), num_batches=None):
        # TODO : eval_ph_names
        assert isinstance(data_set, DataSet)
        assert self.initialized, "Initialize tower before training."

        params = self.params
        sess = self.sess
        epoch_op = self.tensors['epoch']
        epoch = sess.run(epoch_op)
        progress = params.progress
        num_batches = num_batches or data_set.get_num_batches(partial=True)
        num_iters = int(np.ceil(num_batches / self.num_towers))
        num_corrects, total, total_loss = 0, 0, 0.0
        eval_values = []
        idxs = []
        N = data_set.batch_size * num_batches
        if N > data_set.num_examples:
            N = data_set.num_examples
        eval_args = self._get_eval_args(epoch)
        string = "eval on %s, N=%d|" % (data_set.name, N)
        if progress:
            pbar = get_pbar(num_iters, prefix=string).start()
        for iter_idx in range(num_iters):
            batches = []
            for _ in range(self.num_towers):
                if data_set.has_next_batch(partial=True):
                    idxs.extend(data_set.get_batch_idxs(partial=True))
                    batches.append(data_set.get_next_labeled_batch(partial=True))
            (cur_num_corrects, cur_avg_loss, _, global_step), eval_value_batches = \
                self._eval_batches(batches, eval_tensor_names=eval_tensor_names, **eval_args)
            num_corrects += cur_num_corrects
            cur_num = sum(len(batch[0]) for batch in batches)
            total += cur_num
            for eval_value_batch in eval_value_batches:
                eval_values.append([x.tolist() for x in eval_value_batch])  # numpy.array.toList
            total_loss += cur_avg_loss * cur_num
            if progress:
                pbar.update(iter_idx)
        if progress:
            pbar.finish()
        loss = float(total_loss) / total
        data_set.reset()

        acc = float(num_corrects) / total
        print("%s at epoch %d: acc = %.2f%% = %d / %d, loss = %.4f" %
              (data_set.name, epoch, 100 * acc, num_corrects, total, loss))

        # For outputting eval json files
        ids = [data_set.idx2id[idx] for idx in idxs]
        zipped_eval_values = [list(itertools.chain(*each)) for each in zip(*eval_values)]
        values = {name: values for name, values in zip(eval_tensor_names, zipped_eval_values)}
        out = {'ids': ids, 'values': values}
        eval_path = os.path.join(params.eval_dir, "%s_%s.json" % (data_set.name, str(epoch).zfill(4)))
        json.dump(out, open(eval_path, 'w'))
        return loss, acc
Example #3
0
    def train(self, train_data_set, num_epochs, val_data_set=None, eval_ph_names=(),
              eval_tensor_names=(), num_batches=None, val_num_batches=None):
        assert isinstance(train_data_set, DataSet)
        assert self.initialized, "Initialize tower before training."

        sess = self.sess
        writer = self.writer
        params = self.params
        progress = params.progress
        val_acc = None
        # if num batches is specified, then train only that many
        num_batches = num_batches or train_data_set.get_num_batches(partial=False)
        num_iters_per_epoch = int(num_batches / self.num_towers)
        num_digits = int(np.log10(num_batches))

        epoch_op = self.tensors['epoch']
        epoch = sess.run(epoch_op)
        print("training %d epochs ... " % num_epochs)
        logging.info("num iters per epoch: %d" % num_iters_per_epoch)
        logging.info("starting from epoch %d." % (epoch+1))
        while epoch < num_epochs:
            train_args = self._get_train_args(epoch)
            if progress:
                pbar = get_pbar(num_iters_per_epoch, "epoch %s|" % str(epoch+1).zfill(num_digits)).start()
            for iter_idx in range(num_iters_per_epoch):
                batches = [train_data_set.get_next_labeled_batch() for _ in range(self.num_towers)]
                _, summary, global_step = self._train_batches(batches, **train_args)
                if self.write_log:
                    writer.add_summary(summary, global_step)
                if progress:
                    pbar.update(iter_idx)
            if progress:
                pbar.finish()
            train_data_set.complete_epoch()

            assign_op = epoch_op.assign_add(1)
            _, epoch = sess.run([assign_op, epoch_op])

            if val_data_set and epoch % params.val_period == 0:
                self.eval(train_data_set, eval_tensor_names=eval_tensor_names, num_batches=val_num_batches)
                val_loss, val_acc = self.eval(val_data_set, eval_tensor_names=eval_tensor_names, num_batches=val_num_batches)

            if epoch % params.save_period == 0:
                self.save()

        return val_loss, val_acc
Example #4
0
    def train(self, train_data_set, num_epochs, val_data_set=None, eval_ph_names=(),
              eval_tensor_names=(), num_batches=None, val_num_batches=None):
        assert isinstance(train_data_set, DataSet)
        assert self.initialized, "Initialize tower before training."

        sess = self.sess
        writer = self.writer
        params = self.params
        progress = params.progress
        val_acc = None
        # if num batches is specified, then train only that many
        num_batches = num_batches or train_data_set.get_num_batches(partial=False)
        num_iters_per_epoch = int(num_batches / self.num_towers)
        num_digits = int(np.log10(num_batches))

        epoch_op = self.tensors['epoch']
        epoch = sess.run(epoch_op)
        print("training %d epochs ... " % num_epochs)
        logging.info("num iters per epoch: %d" % num_iters_per_epoch)
        logging.info("starting from epoch %d." % (epoch+1))
        while epoch < num_epochs:
            train_args = self._get_train_args(epoch)
            if progress:
                pbar = get_pbar(num_iters_per_epoch, "epoch %s|" % str(epoch+1).zfill(num_digits)).start()
            for iter_idx in range(num_iters_per_epoch):
                batches = [train_data_set.get_next_labeled_batch() for _ in range(self.num_towers)]
                _, summary, global_step = self._train_batches(batches, **train_args)
                writer.add_summary(summary, global_step)
                if progress:
                    pbar.update(iter_idx)
            if progress:
                pbar.finish()
            train_data_set.complete_epoch()

            assign_op = epoch_op.assign_add(1)
            _, epoch = sess.run([assign_op, epoch_op])

            if val_data_set and epoch % params.val_period == 0:
                self.eval(train_data_set, eval_tensor_names=eval_tensor_names, num_batches=val_num_batches)
                val_loss, val_acc = self.eval(val_data_set, eval_tensor_names=eval_tensor_names, num_batches=val_num_batches)

            if epoch % params.save_period == 0:
                self.save()

        return val_loss, val_acc
    def train(self, train_data_set, val_data_set=None, eval_tensor_names=()):
        assert isinstance(train_data_set, DataSet)
        assert self.initialized, "Initialize tower before training."
        # TODO : allow partial batch

        sess = self.sess
        writer = self.writer
        params = self.params
        num_epochs = params.num_epochs
        num_batches = params.train_num_batches if params.train_num_batches >= 0 else train_data_set.get_num_batches(partial=False)
        num_iters_per_epoch = int(num_batches / self.num_towers)
        num_digits = int(np.log10(num_batches))

        epoch_op = self.tensors['epoch']
        epoch = sess.run(epoch_op)
        print("training %d epochs ... " % num_epochs)
        print("num iters per epoch: %d" % num_iters_per_epoch)
        print("starting from epoch %d." % (epoch+1))
        while epoch < num_epochs:
            train_args = self._get_train_args(epoch)
            pbar = get_pbar(num_iters_per_epoch, "epoch %s|" % str(epoch+1).zfill(num_digits)).start()
            for iter_idx in range(num_iters_per_epoch):
                batches = [train_data_set.get_next_labeled_batch() for _ in range(self.num_towers)]
                _, summary, global_step = self._train_batches(batches, **train_args)
                writer.add_summary(summary, global_step)
                pbar.update(iter_idx)
            pbar.finish()
            train_data_set.complete_epoch()

            assign_op = epoch_op.assign_add(1)
            _, epoch = sess.run([assign_op, epoch_op])

            if val_data_set and epoch % params.val_period == 0:
                self.eval(train_data_set, is_val=True, eval_tensor_names=eval_tensor_names)
                self.eval(val_data_set, is_val=True, eval_tensor_names=eval_tensor_names)

            if epoch % params.save_period == 0:
                self.save()
Example #6
0
    def train(self,
              train_data_set,
              num_epochs,
              val_data_set=None,
              eval_ph_names=(),
              eval_tensor_names=(),
              num_batches=None,
              val_num_batches=None):
        assert isinstance(train_data_set, DataSet)
        assert self.initialized, "Initialize tower before training."

        sess = self.sess
        writer = self.writer
        params = self.params
        progress = params.progress
        val_acc = None
        # if num batches is specified, then train only that many
        num_batches = num_batches or train_data_set.get_num_batches(
            partial=False)
        num_iters_per_epoch = int(num_batches / self.num_towers)
        num_digits = int(np.log10(num_batches))

        epoch_op = self.tensors['epoch']
        epoch = sess.run(epoch_op)
        print("training %d epochs ... " % num_epochs)
        logging.info("num iters per epoch: %d" % num_iters_per_epoch)
        logging.info("starting from epoch %d." % (epoch + 1))
        best_global_step = self.tensors['global_step']
        best_val_acc = 0.0
        best_val_loss = 99999

        while epoch < num_epochs:
            train_args = self._get_train_args(epoch)
            if progress:
                pbar = get_pbar(num_iters_per_epoch, "epoch %s|" %
                                str(epoch + 1).zfill(num_digits)).start()
            for iter_idx in range(num_iters_per_epoch):
                batches = [
                    train_data_set.get_next_labeled_batch()
                    for _ in range(self.num_towers)
                ]
                _, summary, global_step = self._train_batches(
                    batches, **train_args)
                if self.write_log:
                    writer.add_summary(summary, global_step)
                if progress:
                    pbar.update(iter_idx)
            if progress:
                pbar.finish()
            train_data_set.complete_epoch()

            assign_op = epoch_op.assign_add(1)
            _, epoch = sess.run([assign_op, epoch_op])

            global_step = sess.run(self.tensors['global_step'])
            if val_data_set and epoch % params.val_period == 0:
                self.eval(train_data_set,
                          eval_tensor_names=eval_tensor_names,
                          num_batches=val_num_batches)
                val_loss, val_acc = self.eval(
                    val_data_set,
                    eval_tensor_names=eval_tensor_names,
                    num_batches=val_num_batches)

                if val_acc > best_val_acc or (val_acc == best_val_acc
                                              and val_loss < best_val_loss):
                    count = 0
                    best_val_acc = val_acc
                    best_val_loss = val_loss
                    best_global_step = global_step
                    self.save()

                elif val_loss < best_val_loss:
                    count = 0
                else:
                    count += 1
                    if count >= 5:
                        break
        if not best_global_step == global_step:
            save_dir = self.params.save_dir
            name = self.params.model_name
            save_path = os.path.join(save_dir, name)
            self.saver.restore(sess, '%s-%s' % (save_path, best_global_step))

        return best_val_loss, best_val_acc
Example #7
0
    def train(self, train_data_set, num_epochs, val_data_set=None, eval_ph_names=(),
              eval_tensor_names=(), num_batches=None, val_num_batches=None):
        assert isinstance(train_data_set, DataSet)
        assert self.initialized, "Initialize tower before training."

        sess = self.sess
        writer = self.writer
        params = self.params
        progress = params.progress
        val_acc = None
        # if num batches is specified, then train only that many
        num_batches = num_batches or train_data_set.get_num_batches(partial=False)
        num_iters_per_epoch = int(num_batches / self.num_towers)
        num_digits = int(np.log10(num_batches))

        epoch_op = self.tensors['epoch']
        epoch = sess.run(epoch_op)
        print("training %d epochs ... " % num_epochs)
        logging.info("num iters per epoch: %d" % num_iters_per_epoch)
        logging.info("starting from epoch %d." % (epoch+1))
        best_global_step = self.tensors['global_step']
        best_val_acc = 0.0
        best_val_loss = 99999

        while epoch < num_epochs:
            train_args = self._get_train_args(epoch)
            if progress:
                pbar = get_pbar(num_iters_per_epoch, "epoch %s|" % str(epoch+1).zfill(num_digits)).start()
            for iter_idx in range(num_iters_per_epoch):
                batches = [train_data_set.get_next_labeled_batch() for _ in range(self.num_towers)]
                _, summary, global_step = self._train_batches(batches, **train_args)
                if self.write_log:
                    writer.add_summary(summary, global_step)
                if progress:
                    pbar.update(iter_idx)
            if progress:
                pbar.finish()
            train_data_set.complete_epoch()

            assign_op = epoch_op.assign_add(1)
            _, epoch = sess.run([assign_op, epoch_op])

            global_step = sess.run(self.tensors['global_step'])
            if val_data_set and epoch % params.val_period == 0:
                self.eval(train_data_set, eval_tensor_names=eval_tensor_names, num_batches=val_num_batches)
                val_loss, val_acc = self.eval(val_data_set, eval_tensor_names=eval_tensor_names, num_batches=val_num_batches)
		
                if val_acc > best_val_acc or (val_acc == best_val_acc and val_loss < best_val_loss):
                    count = 0
                    best_val_acc = val_acc
                    best_val_loss = val_loss
                    best_global_step = global_step
                    self.save()

                elif val_loss < best_val_loss:
                    count = 0
                else:
                    count += 1
                    if count >= 5:
                        break
        if not best_global_step == global_step:
            save_dir = self.params.save_dir
            name = self.params.model_name
            save_path = os.path.join(save_dir, name)
            self.saver.restore(sess, '%s-%s'%(save_path,best_global_step))

        return best_val_loss, best_val_acc