Exemple #1
0
def eval_on_entire_dataset(sess, model, input_x, input_y, batch_size,
                           tb_prefix_and_iter, tb_writer):
    #grad_sums = np.zeros(dim_sum)
    num_batches = int(input_y.shape[0] / batch_size)
    total_acc = 0
    total_loss = 0
    total_loss_no_reg = 0  # loss without counting l2 penalty

    for i in range(num_batches):
        # slice indices (should be large)
        s_start = batch_size * i
        s_end = s_start + batch_size

        fetch_dict = {
            'accuracy': model.accuracy,
            'loss': model.loss,
            'loss_no_reg': model.loss_cross_ent
        }

        result_dict = sess_run_dict(sess,
                                    fetch_dict,
                                    feed_dict={
                                        model.input_images:
                                        input_x[s_start:s_end],
                                        model.input_labels:
                                        input_y[s_start:s_end],
                                        learning_phase(): 0,
                                        batchnorm_learning_phase(): 1
                                    })  # do not use nor update moving averages

        total_acc += result_dict['accuracy']
        total_loss += result_dict['loss']
        total_loss_no_reg += result_dict['loss_no_reg']

    acc = total_acc / num_batches
    loss = total_loss / num_batches
    loss_no_reg = total_loss_no_reg / num_batches

    # tensorboard
    if tb_writer:
        tb_prefix, iterations = tb_prefix_and_iter
        summary = tf.Summary()
        summary.value.add(tag='%s_acc' % tb_prefix, simple_value=acc)
        summary.value.add(tag='%s_loss' % tb_prefix, simple_value=loss)
        summary.value.add(tag='%s_loss_no_reg' % tb_prefix,
                          simple_value=loss_no_reg)
        tb_writer.add_summary(summary, iterations)

    return acc, loss_no_reg
def calc_one_iter_grads(sess, model, train_x, train_y, snip_batch_size, dsets):
    train_size = train_x.shape[0]
    batch_ind = np.random.choice(range(train_size),
                                 size=snip_batch_size,
                                 replace=False)
    fetch_dict = {}
    fetch_dict['gradients'] = model.grads_to_compute

    result_dict = sess_run_dict(sess,
                                fetch_dict,
                                feed_dict={
                                    model.input_images: train_x[batch_ind],
                                    model.input_labels: train_y[batch_ind],
                                    learning_phase(): 0,
                                    batchnorm_learning_phase(): 1
                                })

    grads = result_dict['gradients']
    flattened = np.concatenate([grad.flatten() for grad in grads])

    return flattened
Exemple #3
0
def eval_on_entire_dataset(sess, model, input_x, input_y, batch_size):
    num_batches = int(input_y.shape[0] / batch_size)
    total_acc = 0
    total_loss = 0
    total_loss_no_reg = 0  # loss without counting l2 penalty

    for i in range(num_batches):
        # slice indices (should be large)
        s_start = batch_size * i
        s_end = s_start + batch_size

        fetch_dict = {
            'accuracy': model.accuracy,
            'loss': model.loss,
            'loss_no_reg': model.loss_cross_ent
        }

        #sess_run_dict is from tfutil and it returns a dictionary
        result_dict = sess_run_dict(
            sess,
            fetch_dict,
            feed_dict={
                model.input_images: input_x[s_start:s_end],
                model.input_labels: input_y[s_start:s_end],
                learning_phase(): 0,
                batchnorm_learning_phase(): 1
            })  # do not use nor update moving averages (****??****)

        total_acc += result_dict['accuracy']
        total_loss += result_dict['loss']
        total_loss_no_reg += result_dict['loss_no_reg']

    acc = total_acc / num_batches
    loss = total_loss / num_batches
    loss_no_reg = total_loss_no_reg / num_batches

    return acc, loss_no_reg
def train_and_eval(sess, model, snip_batch_size, train_x, train_y, val_x,
                   val_y, test_x, test_y, tb_writer, dsets, args):
    # constants
    num_batches = int(train_y.shape[0] / args.train_batch_size)
    dim_sum = sum([tf.size(var).eval() for var in model.trainable_weights
                   ])  #dimention of weight matrices

    # adaptive learning schedule
    curr_lr = args.lr
    decay_schedule = [int(x) for x in args.decay_schedule.split(',')]
    print(decay_schedule)
    decay_count = 0

    # initializations
    tb_summaries = tf.summary.merge(tf.get_collection('train_step'))

    shuffled_indices = np.arange(train_y.shape[0])  # for no shuffling
    iterations = 0
    chunks_written = 0
    timerstart = time.time()
    iter_index = 0

    if args.save_weights:
        dsets['all_weights'][chunks_written] = flatten_all(
            model.trainable_weights)

    chunks_written += 1

    dsets['one_iter_grads'][0] = calc_one_iter_grads(sess, model, train_x,
                                                     train_y, snip_batch_size,
                                                     dsets)

    for epoch in range(args.num_epochs):
        if not args.no_shuffle:
            shuffled_indices = np.random.permutation(
                train_y.shape[0])  # for shuffled mini-batches

        if args.decay_lr and epoch == decay_schedule[decay_count]:
            curr_lr *= 0.1
            decay_count += 1
            print('dropping learning rate to ' + str(curr_lr))

        for i in range(num_batches):

            # less frequent, larger evals
            if iterations % args.eval_every == 0:
                # eval on entire train set
                cur_train_acc, cur_train_loss = eval_on_entire_dataset(
                    sess, model, train_x, train_y, dim_sum,
                    args.large_batch_size, 'eval_train', tb_writer, iterations)

                # eval on entire test/val set
                cur_test_acc, cur_test_loss = eval_on_entire_dataset(
                    sess, model, test_x, test_y, dim_sum, args.test_batch_size,
                    'eval_test', tb_writer, iterations)

                cur_val_acc, cur_val_loss = eval_on_entire_dataset(
                    sess, model, val_x, val_y, dim_sum, args.val_batch_size,
                    'eval_val', tb_writer, iterations)

                if args.save_loss:
                    dsets['train_accuracy'][iter_index] = cur_train_acc
                    dsets['train_loss'][iter_index] = cur_train_loss
                    dsets['val_accuracy'][iter_index] = cur_val_acc
                    dsets['val_loss'][iter_index] = cur_val_loss
                    dsets['test_accuracy'][iter_index] = cur_test_acc
                    dsets['test_loss'][iter_index] = cur_test_loss
                    iter_index += 1

            # print status update
            if iterations % args.print_every == 0:
                print((
                    '{}: train acc = {:.4f}, val acc = {:.4f}, test acc = {:.4f}, '
                    +
                    'train loss = {:.4f}, val loss = {:.4f}, test loss = {:.4f} ({:.2f} s)'
                ).format(iterations, cur_train_acc, cur_val_acc, cur_test_acc,
                         cur_train_loss, cur_val_loss, cur_test_loss,
                         time.time() - timerstart))

            # current slice for input data
            batch_indices = shuffled_indices[args.train_batch_size *
                                             i:args.train_batch_size * (i + 1)]

            # training
            fetch_dict = {
                'train_step': model.train_step,
                'accuracy': model.accuracy,
                'loss': model.loss
            }
            fetch_dict.update(model.update_dict())
            if iterations % args.log_every == 0:
                fetch_dict.update({'tb': tb_summaries})
            result_train = sess_run_dict(sess,
                                         fetch_dict,
                                         feed_dict={
                                             model.input_images:
                                             train_x[batch_indices],
                                             model.input_labels:
                                             train_y[batch_indices],
                                             model.input_lr:
                                             curr_lr,
                                             learning_phase():
                                             1,
                                             batchnorm_learning_phase():
                                             1
                                         })

            # log to tensorboard
            if tb_writer and iterations % args.log_every == 0:
                tb_writer.add_summary(result_train['tb'], iterations)

            iterations += 1

            if iterations == 1:
                dsets['all_weights'][chunks_written] = flatten_all(
                    model.trainable_weights)
                chunks_written += 1

            # store current weights and gradients
            if args.mode == 'save_all' and args.save_weights and iterations % args.eval_every == 0:
                dsets['all_weights'][chunks_written] = flatten_all(
                    model.trainable_weights)
                chunks_written += 1

    # save final weight values
    if args.save_weights and iterations % args.eval_every != 0:
        dsets['all_weights'][chunks_written] = flatten_all(
            model.trainable_weights)

    # save final evals
    # on entire train set
    cur_train_acc, cur_train_loss = eval_on_entire_dataset(
        sess, model, train_x, train_y, dim_sum, args.large_batch_size,
        'eval_train', tb_writer, iterations)

    # on entire test/val set
    cur_test_acc, cur_test_loss = eval_on_entire_dataset(
        sess, model, test_x, test_y, dim_sum, args.test_batch_size,
        'eval_test', tb_writer, iterations)

    cur_val_acc, cur_val_loss = eval_on_entire_dataset(sess, model, val_x,
                                                       val_y, dim_sum,
                                                       args.val_batch_size,
                                                       'eval_val', tb_writer,
                                                       iterations)

    if args.save_loss and iterations % args.eval_every != 0:
        dsets['train_accuracy'][iter_index] = cur_train_acc
        dsets['train_loss'][iter_index] = cur_train_loss
        dsets['test_accuracy'][iter_index] = cur_test_acc
        dsets['test_loss'][iter_index] = cur_test_loss
        dsets['val_accuracy'][iter_index] = cur_val_acc
        dsets['val_loss'][iter_index] = cur_val_loss

    # print last status update
    print((
        '{}: train acc = {:.4f}, val acc = {:.4f}, test acc = {:.4f}, ' +
        'train loss = {:.4f}, val loss = {:.4f}, test loss = {:.4f} ({:.2f} s)'
    ).format(iterations, cur_train_acc, cur_val_acc, cur_test_acc,
             cur_train_loss, cur_val_loss, cur_test_loss,
             time.time() - timerstart))
Exemple #5
0
def train_and_eval(sess, model, train_x, train_y, test_x, test_y, tb_writer,
                   dsets, args):
    # constants
    num_batches = int(train_y.shape[0] / args.train_batch_size)
    print(
        'Training batch size {}, number of iterations: {} per epoch, {} total'.
        format(args.train_batch_size, num_batches,
               args.num_epochs * num_batches))
    #dim_sum = sum([tf.size(var).eval() for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)])

    # adaptive learning schedule
    curr_lr = args.lr
    decay_epochs = [int(ep) for ep in args.decay_schedule.split(',')]
    if decay_epochs[-1] > 0:
        decay_epochs.append(
            -1)  # need to end with something small to stop the decay
    decay_count = 0

    # initializations
    tb_summaries = tf.summary.merge(tf.get_collection('tb_train_step'))
    shuffled_indices = np.arange(train_y.shape[0])  # for no shuffling
    iterations = 0
    chunks_written = 0  # for args.save_every batches
    timerstart = time.time()

    for epoch in range(args.num_epochs):
        print('-' * 100)
        print('epoch {}  current lr {:.3g}'.format(epoch, curr_lr))
        if not args.no_shuffle:
            shuffled_indices = np.random.permutation(
                train_y.shape[0])  # for shuffled mini-batches

        if epoch == decay_epochs[decay_count]:
            curr_lr *= 0.1
            decay_count += 1

        for i in range(num_batches):
            # store current weights and gradients
            if args.save_weights and iterations % args.save_every == 0:
                dsets['all_weights'][chunks_written] = flatten_all(
                    tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES))
                chunks_written += 1

            # less frequent, larger evals
            if iterations % args.eval_every == 0:
                args.verbose = True if epoch < 3 else False
                eval(sess, model, train_x, train_y, test_x, test_y, args,
                     tb_writer, iterations)

                if args.signed_constant and iterations < args.print_every * 3:  # validate 3 times
                    print('Sanity check: signed constant values')
                    if args.signed_constant_multiplier:
                        print('Note: signed constant multiplier is {}'.format(
                            args.signed_constant_multiplier))
                    if args.dynamic_scaling:
                        print('Note: dynamic signed constant multiplier')
                    for layer in list(model.layers):
                        if 'conv2D' in layer.name or 'fc' in layer.name:
                            #signed_kernel = layer.signed_kernel.eval()
                            signed_kernel = sess.run(
                                layer.kernel, feed_dict={learning_phase(): 0})
                            print(
                                'Layer {} signed kernel shape {}, has unique values {}'
                                .format(layer.name, signed_kernel.shape,
                                        np.unique(signed_kernel).tolist()))

            # current slice for input data
            batch_indices = shuffled_indices[args.train_batch_size *
                                             i:args.train_batch_size * (i + 1)]

            # training
            fetch_dict = {'train_step': model.train_step}
            fetch_dict.update(model.update_dict())
            if iterations % args.log_every == 0:
                fetch_dict.update({'tb': tb_summaries})

            result_train = sess_run_dict(sess,
                                         fetch_dict,
                                         feed_dict={
                                             model.input_images:
                                             train_x[batch_indices],
                                             model.input_labels:
                                             train_y[batch_indices],
                                             model.input_lr:
                                             curr_lr,
                                             learning_phase():
                                             1,
                                             batchnorm_learning_phase():
                                             1
                                         })

            # log to tensorboard
            if tb_writer and iterations % args.log_every == 0:
                tb_writer.add_summary(result_train['tb'], iterations)

            iterations += 1

    # save final weight values
    if args.save_weights:
        dsets['all_weights'][chunks_written] = flatten_all(
            tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES))

    # save final evals
    if iterations % args.eval_every == 0:

        eval(sess, model, train_x, train_y, test_x, test_y, args, tb_writer,
             iterations)

    if 'mask' in args.arch:
        # for supermask: eval 10 times from different random bernoullies
        testaccs = []
        testlosses = []
        for sample in range(10):

            cur_test_acc, cur_test_loss = eval_on_entire_dataset(
                sess, model, test_x, test_y, args.test_batch_size,
                ('eval_test', iterations), tb_writer)

            testaccs.append(cur_test_acc)
            testlosses.append(cur_test_loss)
        print("all test accs:", testaccs)
        print("all test losses:", testlosses)
        print('final test acc = {:.5f}, test loss = {:.5f}'.format(
            np.mean(testaccs), np.mean(testlosses)))

        percs, ones_all, size_all = [], 0, 0
        for layer in model.trainable_weights:
            mprobs = tf.stop_gradient(tf.nn.sigmoid(layer)).eval()
            num_ones = mprobs.sum()  # expected value
            percs.append(num_ones / mprobs.size)
            ones_all += num_ones
            size_all += mprobs.size
            #nparr = layer.eval()
            #num_ones = (nparr > 0).sum() + 0.5 * (nparr == 0).sum() # expected value
            #percs.append(num_ones / nparr.size)
            #ones_all += num_ones
            #size_all += nparr.size
        print('[Est] percent of 1s in mask (per layer):', percs)
        print('[Est] percent of 1s in mask (total):', ones_all / size_all)

    if args.signed_constant:  # validate in the end
        print('Sanity check: signed constant values')
        if args.dynamic_scaling:
            print('Note: dynamic signed constant multiplier')
        elif args.signed_constant_multiplier:
            print('Note: signed constant multiplier is {}'.format(
                args.signed_constant_multiplier))
        for layer in list(model.layers):
            if 'conv2D' in layer.name or 'fc' in layer.name:
                #signed_kernel = layer.signed_kernel.eval()
                signed_kernel = sess.run(layer.kernel,
                                         feed_dict={learning_phase(): 0})
                print('Layer {} signed kernel shape {}, has unique values {}'.
                      format(layer.name, signed_kernel.shape,
                             np.unique(signed_kernel).tolist()))