def get_gradients_and_eval(sess, model, input_x, input_y, dim_sum, batch_size, get_eval=True, get_grads=True): grad_sums = np.zeros(dim_sum) num_batches = int(input_y.shape[0] / batch_size) total_acc = 0 total_loss = 0 total_loss_no_reg = 0 # loss without counting l2 penalty for i in range(num_batches): # slice indices (should be large) s_start = batch_size * i s_end = s_start + batch_size fetch_dict = {} if get_eval: # fetch_dict['accuracy'] = model.accuracy # fetch_dict['loss'] = model.loss fetch_dict['loss_no_reg'] = model.loss_cross_ent if get_grads: fetch_dict['gradients'] = model.grads_to_compute result_dict = sess_run_dict(sess, fetch_dict, feed_dict={ model.input_images: input_x[s_start:s_end], model.input_labels: input_y[s_start:s_end], learning_phase(): 0, batchnorm_learning_phase(): 1 }) if get_eval: # total_acc += result_dict['accuracy'] # total_loss += result_dict['loss'] total_loss_no_reg += result_dict['loss_no_reg'] if get_grads: grads = result_dict[ 'gradients'] # grads should now be a list of np arrays flattened = np.concatenate([grad.flatten() for grad in grads]) grad_sums += flattened acc = total_acc / num_batches loss = total_loss / num_batches loss_no_reg = total_loss_no_reg / num_batches return np.divide(grad_sums, num_batches), loss_no_reg
def eval_on_entire_dataset(sess, model, input_x, input_y, dim_sum, batch_size, tb_prefix, tb_writer, iterations): grad_sums = np.zeros(dim_sum) num_batches = int(input_y.shape[0] / batch_size) total_acc = 0 total_loss = 0 total_loss_no_reg = 0 # loss without counting l2 penalty for i in range(num_batches): # slice indices (should be large) s_start = batch_size * i s_end = s_start + batch_size fetch_dict = { 'accuracy': model.accuracy, 'loss': model.loss, 'loss_no_reg': model.loss_cross_ent } result_dict = sess_run_dict(sess, fetch_dict, feed_dict={ model.input_images: input_x[s_start:s_end], model.input_labels: input_y[s_start:s_end], learning_phase(): 0, batchnorm_learning_phase(): 1 }) # do not use nor update moving averages total_acc += result_dict['accuracy'] total_loss += result_dict['loss'] total_loss_no_reg += result_dict['loss_no_reg'] acc = total_acc / num_batches loss = total_loss / num_batches loss_no_reg = total_loss_no_reg / num_batches # tensorboard if tb_writer: summary = tf.Summary() summary.value.add(tag='%s_acc' % tb_prefix, simple_value=acc) summary.value.add(tag='%s_loss' % tb_prefix, simple_value=loss) summary.value.add(tag='%s_loss_no_reg' % tb_prefix, simple_value=loss_no_reg) tb_writer.add_summary(summary, iterations) return acc, loss_no_reg
def train_and_eval(sess, model, train_x, train_y, test_x, test_y, tb_writer, dsets, args): # def train_and_eval(sess, model, train_y_shape, train_generator, val_generator, tb_writer, dsets, args): # constants # num_batches = int(train_y_shape[0] / args.train_batch_size) num_batches = int(train_y.shape[0] / args.train_batch_size) print('Training batch size {}, number of iterations: {} per epoch, {} total'.format( args.train_batch_size, num_batches, args.num_epochs*num_batches)) dim_sum = sum([tf.size(var).eval() for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)]) # adaptive learning schedule curr_lr = args.lr decay_epochs = [int(ep) for ep in args.decay_schedule.split(',')] if decay_epochs[-1] > 0: decay_epochs.append(-1) # end with something small to stop the decay decay_count = 0 # initializations tb_summaries = tf.summary.merge(tf.get_collection('tb_train_step')) shuffled_indices = np.arange(train_y.shape[0]) # for no shuffling iterations = 0 chunks_written = 0 # for args.save_every batches timerstart = time.time() for epoch in range(args.num_epochs): # print('-' * 100) # print('epoch {} current lr {:.3g}'.format(epoch, curr_lr)) if not args.no_shuffle: shuffled_indices = np.random.permutation(train_y.shape[0]) # for shuffled mini-batches if epoch == decay_epochs[decay_count]: curr_lr *= 0.1 decay_count += 1 for i in range(num_batches): # store current weights and gradients if args.save_weights and iterations % args.save_every == 0: dsets['all_weights'][chunks_written] = flatten_all(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)) chunks_written += 1 # less frequent, larger evals if iterations % args.eval_every == 0: # eval on entire train set # cur_train_acc, cur_train_loss = eval_on_entire_dataset(sess, model, train_y_shape, train_generator, cur_train_acc, cur_train_loss = eval_on_entire_dataset(sess, model, train_x, train_y, dim_sum, args.large_batch_size, 'eval_train', tb_writer, iterations) # eval on entire test/val set # cur_test_acc, cur_test_loss = eval_on_entire_dataset(sess, model, train_y_shape, val_generator, cur_test_acc, cur_test_loss = eval_on_entire_dataset(sess, model, test_x, test_y, dim_sum, args.test_batch_size, 'eval_test', tb_writer, iterations) # print status update if iterations % args.print_every == 0: print(('{}: train acc = {:.4f}, test acc = {:.4f}, ' + 'train loss = {:.4f}, test loss = {:.4f}, lr = {:.4f} ({:.2f} s)').format(iterations, cur_train_acc, cur_test_acc, cur_train_loss, cur_test_loss, curr_lr, time.time() - timerstart)) # current slice for input data batch_indices = shuffled_indices[args.train_batch_size * i : args.train_batch_size * (i + 1)] # Generate batch of training data according to current slice: # train_x_single_b, train_y_single_b = train_generator[i] # training # fetch_dict = {'accuracy': model.accuracy, 'loss': model.loss} # no longer used if len(args.freeze_layers) > 0 and iterations >= args.freeze_starting: fetch_dict = {'train_step': model.train_step_freeze} elif len(args.opt2_layers) > 0: fetch_dict = {'train_step_1': model.train_step_1, 'train_step_2': model.train_step_2} else: fetch_dict = {'train_step': model.train_step} fetch_dict.update(model.update_dict()) if iterations % args.log_every == 0: fetch_dict.update({'tb': tb_summaries}) if args.save_training_grads: fetch_dict['gradients'] = model.grads_to_compute result_train = sess_run_dict(sess, fetch_dict, feed_dict={ model.input_images: train_x[batch_indices], model.input_labels: train_y[batch_indices], model.input_lr: curr_lr, learning_phase(): 1, batchnorm_learning_phase(): 1}) # log to tensorboard if tb_writer and iterations % args.log_every == 0: tb_writer.add_summary(result_train['tb'], iterations) if args.save_training_grads: dsets['training_grads'][iterations] = np.concatenate( [grad.flatten() for grad in result_train['gradients']]) iterations += 1 # save final weight values if args.save_weights: dsets['all_weights'][chunks_written] = flatten_all(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)) # Save model ? saver = tf.train.Saver() saver.save(sess, args.output_dir + '\\model') # save final evals if iterations % args.eval_every == 0: # on entire train set # cur_train_acc, cur_train_loss = eval_on_entire_dataset(sess, model, train_y_shape, train_generator, cur_train_acc, cur_train_loss = eval_on_entire_dataset(sess, model, train_x, train_y, dim_sum, args.large_batch_size, 'eval_train', tb_writer, iterations) # on entire test/val set # cur_test_acc, cur_test_loss = eval_on_entire_dataset(sess, model, train_y_shape, val_generator, cur_test_acc, cur_test_loss = eval_on_entire_dataset(sess, model, test_x, test_y, dim_sum, args.test_batch_size, 'eval_test', tb_writer, iterations) # print last status update print(('{}: train acc = {:.4f}, test acc = {:.4f}, ' + 'train loss = {:.4f}, test loss = {:.4f} ({:.2f} s)').format(iterations, cur_train_acc, cur_test_acc, cur_train_loss, cur_test_loss, time.time() - timerstart))