def get_gradients_and_eval(sess,
                           model,
                           input_x,
                           input_y,
                           dim_sum,
                           batch_size,
                           get_eval=True,
                           get_grads=True):
    grad_sums = np.zeros(dim_sum)
    num_batches = int(input_y.shape[0] / batch_size)
    total_acc = 0
    total_loss = 0
    total_loss_no_reg = 0  # loss without counting l2 penalty

    for i in range(num_batches):
        # slice indices (should be large)
        s_start = batch_size * i
        s_end = s_start + batch_size

        fetch_dict = {}
        if get_eval:
            # fetch_dict['accuracy'] = model.accuracy
            # fetch_dict['loss'] = model.loss
            fetch_dict['loss_no_reg'] = model.loss_cross_ent
        if get_grads:
            fetch_dict['gradients'] = model.grads_to_compute

        result_dict = sess_run_dict(sess,
                                    fetch_dict,
                                    feed_dict={
                                        model.input_images:
                                        input_x[s_start:s_end],
                                        model.input_labels:
                                        input_y[s_start:s_end],
                                        learning_phase(): 0,
                                        batchnorm_learning_phase(): 1
                                    })

        if get_eval:
            # total_acc += result_dict['accuracy']
            # total_loss += result_dict['loss']
            total_loss_no_reg += result_dict['loss_no_reg']
        if get_grads:
            grads = result_dict[
                'gradients']  # grads should now be a list of np arrays
            flattened = np.concatenate([grad.flatten() for grad in grads])
            grad_sums += flattened

    acc = total_acc / num_batches
    loss = total_loss / num_batches
    loss_no_reg = total_loss_no_reg / num_batches

    return np.divide(grad_sums, num_batches), loss_no_reg
def eval_on_entire_dataset(sess, model, input_x, input_y, dim_sum, batch_size,
                           tb_prefix, tb_writer, iterations):
    grad_sums = np.zeros(dim_sum)
    num_batches = int(input_y.shape[0] / batch_size)
    total_acc = 0
    total_loss = 0
    total_loss_no_reg = 0  # loss without counting l2 penalty

    for i in range(num_batches):
        # slice indices (should be large)
        s_start = batch_size * i
        s_end = s_start + batch_size

        fetch_dict = {
            'accuracy': model.accuracy,
            'loss': model.loss,
            'loss_no_reg': model.loss_cross_ent
        }

        result_dict = sess_run_dict(sess,
                                    fetch_dict,
                                    feed_dict={
                                        model.input_images:
                                        input_x[s_start:s_end],
                                        model.input_labels:
                                        input_y[s_start:s_end],
                                        learning_phase(): 0,
                                        batchnorm_learning_phase(): 1
                                    })  # do not use nor update moving averages

        total_acc += result_dict['accuracy']
        total_loss += result_dict['loss']
        total_loss_no_reg += result_dict['loss_no_reg']

    acc = total_acc / num_batches
    loss = total_loss / num_batches
    loss_no_reg = total_loss_no_reg / num_batches

    # tensorboard
    if tb_writer:
        summary = tf.Summary()
        summary.value.add(tag='%s_acc' % tb_prefix, simple_value=acc)
        summary.value.add(tag='%s_loss' % tb_prefix, simple_value=loss)
        summary.value.add(tag='%s_loss_no_reg' % tb_prefix,
                          simple_value=loss_no_reg)
        tb_writer.add_summary(summary, iterations)

    return acc, loss_no_reg
Exemple #3
0
def train_and_eval(sess, model, train_x, train_y, test_x, test_y, tb_writer, dsets, args):
# def train_and_eval(sess, model, train_y_shape, train_generator, val_generator, tb_writer, dsets, args):
    # constants
    # num_batches = int(train_y_shape[0] / args.train_batch_size)
    num_batches = int(train_y.shape[0] / args.train_batch_size)
    print('Training batch size {}, number of iterations: {} per epoch, {} total'.format(
        args.train_batch_size, num_batches, args.num_epochs*num_batches))
    dim_sum = sum([tf.size(var).eval() for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)])

    # adaptive learning schedule
    curr_lr = args.lr
    decay_epochs = [int(ep) for ep in args.decay_schedule.split(',')]
    if decay_epochs[-1] > 0:
        decay_epochs.append(-1) # end with something small to stop the decay
    decay_count = 0

    # initializations
    tb_summaries = tf.summary.merge(tf.get_collection('tb_train_step'))
    shuffled_indices = np.arange(train_y.shape[0])  # for no shuffling
    iterations = 0
    chunks_written = 0 # for args.save_every batches
    timerstart = time.time()

    for epoch in range(args.num_epochs):
        # print('-' * 100)
        # print('epoch {}  current lr {:.3g}'.format(epoch, curr_lr))
        if not args.no_shuffle:
            shuffled_indices = np.random.permutation(train_y.shape[0])  # for shuffled mini-batches

        if epoch == decay_epochs[decay_count]:
            curr_lr *= 0.1
            decay_count += 1

        for i in range(num_batches):
            # store current weights and gradients
            if args.save_weights and iterations % args.save_every == 0:
                dsets['all_weights'][chunks_written] = flatten_all(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES))
                chunks_written += 1

            # less frequent, larger evals
            if iterations % args.eval_every == 0:
                # eval on entire train set
                # cur_train_acc, cur_train_loss = eval_on_entire_dataset(sess, model, train_y_shape, train_generator,
                cur_train_acc, cur_train_loss = eval_on_entire_dataset(sess, model, train_x, train_y,
                        dim_sum, args.large_batch_size, 'eval_train', tb_writer, iterations)

                # eval on entire test/val set
                # cur_test_acc, cur_test_loss = eval_on_entire_dataset(sess, model, train_y_shape, val_generator,
                cur_test_acc, cur_test_loss = eval_on_entire_dataset(sess, model, test_x, test_y,
                        dim_sum, args.test_batch_size, 'eval_test', tb_writer, iterations)

            # print status update
            if iterations % args.print_every == 0:
                print(('{}: train acc = {:.4f}, test acc = {:.4f}, '
                    + 'train loss = {:.4f}, test loss = {:.4f}, lr = {:.4f} ({:.2f} s)').format(iterations,
                    cur_train_acc, cur_test_acc, cur_train_loss, cur_test_loss, curr_lr, time.time() - timerstart))

            # current slice for input data
            batch_indices = shuffled_indices[args.train_batch_size * i : args.train_batch_size * (i + 1)]

            # Generate batch of training data according to current slice:
            # train_x_single_b, train_y_single_b = train_generator[i]

            # training
            # fetch_dict = {'accuracy': model.accuracy, 'loss': model.loss} # no longer used
            if len(args.freeze_layers) > 0 and iterations >= args.freeze_starting:
                fetch_dict = {'train_step': model.train_step_freeze}
            elif len(args.opt2_layers) > 0:
                fetch_dict = {'train_step_1': model.train_step_1,
                    'train_step_2': model.train_step_2}
            else:
                fetch_dict = {'train_step': model.train_step}
            fetch_dict.update(model.update_dict())

            if iterations % args.log_every == 0:
                fetch_dict.update({'tb': tb_summaries})
            if args.save_training_grads:
                fetch_dict['gradients'] = model.grads_to_compute

            result_train = sess_run_dict(sess, fetch_dict, feed_dict={
                model.input_images: train_x[batch_indices],
                model.input_labels: train_y[batch_indices],
                model.input_lr: curr_lr,
                learning_phase(): 1,
                batchnorm_learning_phase(): 1})

            # log to tensorboard
            if tb_writer and iterations % args.log_every == 0:
                tb_writer.add_summary(result_train['tb'], iterations)

            if args.save_training_grads:
                dsets['training_grads'][iterations] = np.concatenate(
                    [grad.flatten() for grad in result_train['gradients']])

            iterations += 1

    # save final weight values
    if args.save_weights:
        dsets['all_weights'][chunks_written] = flatten_all(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES))

    # Save model ?
    saver = tf.train.Saver()
    saver.save(sess, args.output_dir + '\\model')

    # save final evals
    if iterations % args.eval_every == 0:
        # on entire train set
        # cur_train_acc, cur_train_loss = eval_on_entire_dataset(sess, model, train_y_shape, train_generator,
        cur_train_acc, cur_train_loss = eval_on_entire_dataset(sess, model, train_x, train_y,
            dim_sum, args.large_batch_size, 'eval_train', tb_writer, iterations)

        # on entire test/val set
        # cur_test_acc, cur_test_loss = eval_on_entire_dataset(sess, model, train_y_shape, val_generator,
        cur_test_acc, cur_test_loss = eval_on_entire_dataset(sess, model, test_x, test_y,
            dim_sum, args.test_batch_size, 'eval_test', tb_writer, iterations)

    # print last status update
    print(('{}: train acc = {:.4f}, test acc = {:.4f}, '
        + 'train loss = {:.4f}, test loss = {:.4f} ({:.2f} s)').format(iterations,
        cur_train_acc, cur_test_acc, cur_train_loss, cur_test_loss, time.time() - timerstart))