def compute(learning_rate, momentum, epochs=50):
    #loading with CIFAR Data.
    X_train, y_train, X_val, y_val, X_test, y_test =\
    du.get_CIFAR10_data(num_training=10000, num_validation=10, num_test=1000)

    # Initializing NeuralNet Dimensions.
    N, D_in = X_train.shape
    H, D_out = 50, 10
    dtype = torch.FloatTensor

    # Converting NumPy Array to torch tensor.
    X_train = torch.from_numpy(X_train).float()
    y_train = torch.LongTensor(y_train)

    #Wrapping with Variable.
    x = Variable(X_train)
    y = Variable(y_train, requires_grad=False)

    # Initializing TwoLayerNeuralNet.
    model = TwoLayerNeuralNet(D_in, H, D_out)

    # Loss Function and Optimizer.
    # critirion = torch.nn.MSELoss(size_average=False)
    critirion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=learning_rate,
                                momentum=momentum)
    loss_dict = {}

    for i in range(epochs):
        y_pred = model(x)
        loss = critirion(y_pred, y)
        loss_dict[i] = loss.data[0]
        # Zero gradients, perform a backward pass, and update the weights.
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Converting Test NumPy Array to torch tensor.
    X_test = torch.from_numpy(X_test).float()
    y_test = torch.from_numpy(y_test).float()
    X_test = Variable(X_test)

    # Testing the accuracy.Max returns both values and indices.
    y_test_pred = model(X_test)
    _, y_test_pred = torch.max(y_test_pred.data, 1)

    # Calculating total and corrct. Conversion required since target y_test_pred is a long tensor.
    total_labels = y_test.size(0)
    correct = (y_test_pred.float() == y_test).sum()
    # Returning Accuracy + the Correct count of classes.
    accuracy = (100.0 * correct / total_labels, correct)

    return accuracy
Esempio n. 2
0
def main(debug=True):
    X_tr, Y_tr, X_val, Y_val, X_te, Y_te = get_CIFAR10_data()

    tf.reset_default_graph()

    model = CIFAR_Model()

    with tf.Session() as sess:
        with tf.device("/cpu:0"):
            sess.run(tf.global_variables_initializer())
            model.fit(
                sess,
                5,
                32,
                (X_tr, Y_tr),
                (X_val, Y_val),
                .6
            )
    print("")
Esempio n. 3
0
def main():
  args, remaining = parser.parse_known_args()

  lr         = args.learning_rate#INITIAL_LEARNING_RATE
  reg_weight = args.regularization_weight
  kp         = args.keep_prob
  max_steps  = args.max_steps
  decay_rate = args.decay_rate
  lr_decay_time = args.lr_decay_time
  batch_size = args.batch_size

  optimizer, opt_string = get_optimizer(args, remaining)
  print(opt_string)
  #CURRENTLY NOT Used
  print("Arguments = ", args)

  print("Loading Data;")

  data = get_CIFAR10_data()
  train_size = len(data['y_train'])
  for k, v in data.items():
      print('%s: '%(k), v.shape)

  #PLACEHOLDER VARIABLES
  keep_prob = tf.placeholder(dtype=tf.float32, shape=())
  learning_rate = tf.placeholder(dtype=tf.float32, shape=())
  regularizer_weight = tf.placeholder(dtype=tf.float32, shape=())
  is_training = tf.placeholder(dtype=tf.bool, shape=())

  X_image = tf.placeholder(dtype=tf.float32, shape=[None, 32, 32, 3])
  y_label = tf.placeholder(dtype=tf.int64, shape=[None])

  # test = tf.equal(True, is_training)
  #only do distortions on training data
  X_image = tf.cond(is_training, lambda: tf.map_fn(lambda img: tf.image.random_flip_left_right(img), X_image), lambda: X_image)
  X_image = tf.cond(is_training, lambda: tf.map_fn(lambda img: tf.image.random_flip_up_down(img), X_image), lambda: X_image)
  X_image = tf.cond(is_training, lambda: tf.map_fn(lambda img: tf.image.random_brightness(img, max_delta=60), X_image), lambda: X_image)
  X_image = tf.cond(is_training, lambda: tf.map_fn(lambda img: tf.image.random_contrast(img, lower=0.2, upper=1.8), X_image), lambda: X_image)

  # X_image = tf.map_fn(lambda img: tf.image.random_flip_left_right(img), X_image)
  # X_image = tf.map_fn(lambda img: tf.image.random_flip_up_down(img), X_image)
  # X_image = tf.map_fn(lambda img: tf.image.random_brightness(img, max_delta=60), X_image)
  # X_image = tf.map_fn(lambda img: tf.image.random_contrast(img, lower=0.2, upper=1.8), X_image)
  # def image_distortions(image, distortions):
  #     distort_left_right_random = distortions[0]
  #     mirror = tf.less(tf.pack([1.0, distort_left_right_random, 1.0]), 0.5)
  #     image = tf.reverse(image, mirror)
  #     distort_up_down_random = distortions[1]
  #     mirror = tf.less(tf.pack([distort_up_down_random, 1.0, 1.0]), 0.5)
  #     image = tf.reverse(image, mirror)
  #     return image
  # distortions = tf.random_uniform([2], 0, 1.0, dtype=tf.float32)
  # image = image_distortions(image, distortions)
  # tf.image.flip_up_down(image)
  # tf.image.flip_left_right(image)
  # tf.image.transpose_image(image)
  # tf.image.rot90(image, k=1, name=None)
  # tf.image.adjust_brightness
  # tf.image.adjust_contrast(images, contrast_factor)
  # tf.image.per_image_standardization(image)

  #MODEL related operations and values
  global_step = tf.Variable(0, trainable=False)
  use_batchnorm = args.batch_norm
  b_norm_images  = tf.contrib.layers.batch_norm(inputs=X_image, center=True, scale=True, decay=0.99, data_format="NHWC", is_training=is_training, scope="input", updates_collections=None)
  images = tf.select( use_batchnorm is True, b_norm_images, X_image)

  #MODEL construction
  logits, grad_image, grad_image_placeholder, last_layer = inference(images, keep_prob=keep_prob, regularizer_weight=regularizer_weight, is_training=is_training)
  prediction = predict(logits)
  loss_op = loss(logits, y_label)

  reg_loss = tf.reduce_sum(tf.get_collection(LOSSES_COLLECTION))
  total_loss = loss_op + reg_loss

  accuracy_op = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(logits,1), y_label), tf.float32))
  #print('decay_steps = ', lr_decay_time * (train_size // batch_size + 1))
  print("Number of batch steps till lr_decay = ", lr_decay_time * ((train_size //  batch_size) + 1))
  train_op = train(total_loss, global_step, learning_rate=lr, lr_rate_decay_factor=decay_rate, decay_steps=lr_decay_time * ((train_size //  batch_size) + 1))

  saver = tf.train.Saver(tf.global_variables())

  #Summary operation
  tf.summary.image('images', X_image)
  summary_op = tf.summary.merge_all()

  acc_summary        = tf.summary.scalar('Training_accuracy_batch', accuracy_op)
  validation_acc_summary = tf.summary.scalar('Validation_accuracy', accuracy_op)
  cross_entropy_loss = tf.summary.scalar('loss_raw', loss_op)
  reg_loss_summary   = tf.summary.scalar('regularization_loss', reg_loss)
  total_loss_summary = tf.summary.scalar('total_loss', total_loss)

  accuracy_batch = tf.placeholder(shape=(None), dtype=tf.float32)
  overfit_estimate = tf.placeholder(shape=(None), dtype=tf.float32)

  accuracy_100 = tf.reduce_mean(accuracy_batch)
  mean_summary = tf.summary.scalar('Training_accuracy_mean', accuracy_100)
  validation_mean_summary = tf.summary.scalar('Validation_accuracy_mean', accuracy_100)

  acc_summary_histogram = tf.summary.histogram('Training_accuracy_histogram', accuracy_batch)
  overfit_summary = tf.summary.scalar('overfit_estimate', overfit_estimate)

  #SESSION Construction
  init = tf.global_variables_initializer()

  config = tf.ConfigProto()
  # config.gpu_options.allow_growth = True
  # config.gpu_options.per_process_gpu_memory_fraction = 0.5
  config.log_device_placement=False

  sess = tf.Session(config=config)
  sess.run(init)
  # input_grad_image = np.zeros((1,32,32,16), dtype=np.float)
  # input_grad_image[0,15,15,:] = 1000.
  # back_image = sess.run(grad_image[0], feed_dict={X_image : 128 * np.ones((1,32,32,3)), regularizer_weight : 0., keep_prob : 1.0, grad_image_placeholder : input_grad_image})
  # print(back_image, np.max(back_image))
  # plt.figure()
  # max_value = np.max(back_image)
  # min_value = np.min(back_image)
  # print(back_image.shape)
  # plt.imshow(back_image[:,:,0], cmap=plt.get_cmap("seismic"), vmin=-1,
  #        vmax=1, interpolation="nearest")
  # plt.show()
  # sys.exit(0)


  #today = date.today()
  current_time = datetime.now()
  # LR_%f, INITIAL_LEARNING_RATE
  # REG_%f, DEFAULT_REG_WEIGHT
  # add details, relating per epoch results (and mean filtered loss etc.)
  train_dir = "cifar10_results/l1_layer/bn_" + str(int(use_batchnorm)) + "/" +"LR_" + str(lr) + "/" + "REG_" + str(reg_weight) + "/" + "KP_" + str(kp) + "/" + current_time.strftime("%B") + "_" + str(current_time.day) + "_" + str(current_time.year) + "-h" + str(current_time.hour) + "m" + str(current_time.minute)
  print("Writing summary data to :  ", train_dir)
  #probably should write parameters used to train the model to this directory
  #also pickle the named tuple
  # with open('train_dir' + '/model_parameters.txt', 'w') as outfile:
  #   #
  #should write the checkpoint files


  acc_list = []
  valid_acc_list = []

  cm_placeholder = tf.placeholder(shape=(1, None, None, 4), dtype=tf.uint8)
  confusion_summary = tf.summary.image('confusion_matrix', cm_placeholder)
  layer_output_placeholder = tf.placeholder(shape=(3,None,None,1), dtype=tf.uint8)
  layer_summary = tf.summary.image('layer_summary', layer_output_placeholder)
  print(last_layer.get_shape())
  summary_writer = tf.summary.FileWriter(train_dir, sess.graph)

  print("Starting Training.")
  print("Training for %d batches (of size %d); initial learning rate %f" % (max_steps, batch_size, lr))
  # tqdm_format_str = ('{0}: step {1:>5d}, loss = {2:2.3f}, accuracy = {3:>3.2f}, accuracy (val) = {4:>3.2f}')
  # current_step = 0
  # tqdm_loss = np.inf
  # tqdm_acc  = 0.
  # tqdm_val  = 0.
  # t = tqdm(range(max_steps), desc="Epoch %d, step %d, loss %2.2f, acc %2.2f, acc (val) %2.2f"%(epoch, current_step, tqdm_loss, tqdm_acc, tqdm_val), leave=True)
  #t = trange(max_steps, desc="Epoch %d, step %d, loss %2.2f, acc %2.2f, acc (val) %2.2f"%(epoch, current_step, tqdm_loss, tqdm_acc, tqdm_val), leave=True)
  for step in range(max_steps):
    # current_step = step
    # t.set_description(desc="Epoch %d, step %d, loss %2.2f, acc %2.2f, acc (val) %2.2f"%(epoch, current_step, tqdm_loss, tqdm_acc, tqdm_val))
    # t.refresh()
    num_train = data['X_train'].shape[0]
    if batch_size * (step - 1) // num_train < batch_size * (step) // num_train and step > 0:
      print("Completed Epoch: %d (step=%d, max_steps=%d, percentage complete= %f)" % ((batch_size * (step) // num_train ), step, max_steps, step/max_steps * 100))
      epoch = (batch_size * (step) // num_train )

    batch_mask = np.random.choice(num_train, batch_size)
    X_batch = data['X_train'][batch_mask]
    y_batch = data['y_train'][batch_mask]
    start_time = time.time()
    feed_dict = { X_image : X_batch, y_label : y_batch, keep_prob : kp, regularizer_weight : reg_weight, is_training : True }

    loss_value, accuracy, acc_str, xentropy_str, reg_loss_str, predicted_class = sess.run([total_loss, accuracy_op, acc_summary, cross_entropy_loss, reg_loss_summary, prediction], feed_dict=feed_dict)
    #print(sess.run(prediction, feed_dict=feed_dict))
    # tqdm_loss = loss_value
    # tqdm_acc = accuracy
    sess.run(train_op, feed_dict=feed_dict)

    # if step > 0 and step % 50 == 0:
    #   last_layer_out, logits_out = sess.run([last_layer, logits], feed_dict=feed_dict)
    #   #print(logits[0])
    #   logits_out = np.exp(logits_out) / np.sum(np.exp(logits_out), axis=0)

    #   #print(layer_out.shape, layer_out.mean())
    #   sliced_layer = last_layer_out[0:1,:,:,0:9]
    #   sliced_layer = np.transpose(sliced_layer, (3,1,2,0))
    #   #print(sliced_layer.shape)
    #   split_layer = np.vsplit(sliced_layer, sliced_layer.shape[0])
    #   squeezed_ = [np.squeeze(x, axis=(0,3)) for x in split_layer]
    #   vstacked = np.vstack(squeezed_)
    #   #print(vstacked.shape)
    #   plt.figure()
    #   plt.subplot(211)
    #   plt.imshow(vstacked, vmin=0, vmax=np.max(vstacked), cmap=plt.cm.Blues)
    #   plt.grid(b='off')
    #   plt.subplot(212)
    #   bar_width = 0.1
    #   print(logits_out.shape)
    #   index = np.arange(len(logits_out[0]))
    #   colors = ['blue' for x in logits_out[0]]
    #   colors[np.argmax(logits_out[0])] = 'green'
    #   sns.barplot(classes, logits_out[0], palette=colors)
    #   plt.grid(b='off')
    #   plt.show()


    acc_list.append(accuracy)
    acc_list = acc_list[-100:]
    accuracy_100_str = sess.run(mean_summary, feed_dict={accuracy_batch : np.array(acc_list)})
    #print(sess.run([accuracy_100], feed_dict={accuracy_batch : np.array(acc_list[-100:])}))
    summary_writer.add_summary(acc_str, step)
    summary_writer.add_summary(xentropy_str, step)
    summary_writer.add_summary(reg_loss_str, step)
    summary_writer.add_summary(accuracy_100_str, step)
    #image = sess.run(grad_image, feed_dict=feed_dict)
    #summary_writer.add_summary('Training_accuracy (Mean)', np.mean(acc_list[-100:]), step)
    assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
    if step % 100 == 0:
      summary_str = sess.run(summary_op, feed_dict=feed_dict)
      summary_writer.add_summary(summary_str, step)
      # plt.figure()
      # plt.imshow(image[0])
      # plt.grid(b=False)
      # plt.show()
    if step % 10 == 0:
      #print("max = %f; mean = %f" %(np.max(image), np.mean(image)))
      if step > 0:
        #print('creating image;')
        confusion_img = plot_confusion_matrix(confusion_matrix(y_batch, predicted_class),
                                              title='Confusion matrix',
                                              cmap=plt.cm.Blues,
                                              labels=classes)
        # print(img.get_shape())
        # print(img.dtype)

        summary_writer.add_summary(confusion_summary.eval(session=sess, feed_dict={cm_placeholder: confusion_img}), step)
        del confusion_img

        acc_summary_histogram_out = sess.run(acc_summary_histogram, feed_dict={accuracy_batch : np.array(acc_list[-100:])})
        summary_writer.add_summary(acc_summary_histogram_out, step)
        #print('done adding summary')
      num_valid = data['X_val'].shape[0]
      batch_valid_mask = np.random.choice(num_valid, batch_size)
      X_val_batch = data['X_val'][batch_valid_mask]
      y_val_batch = data['y_val'][batch_valid_mask]
      valid_dict = { X_image : X_val_batch, y_label : y_val_batch, keep_prob : 1.0, regularizer_weight : 0.00, is_training : False}
      format_str = ('{0}: step {1:>5d}, loss = {2:2.3f}, accuracy = {3:>3.2f}, accuracy (val) = {4:>3.2f}, loss = {5:2.3f}')
      valid_summary, valid_acc, valid_loss = sess.run([validation_acc_summary, accuracy_op, loss_op], feed_dict=valid_dict)
      valid_acc_list.append(valid_acc)
      #tqdm_val = valid_acc

      valid_acc_list = valid_acc_list[-100:]
      # Probably should change the slice size to be smaller (10 instead of 100)
      valid_accuracy_100_str = sess.run(validation_mean_summary, feed_dict={accuracy_batch : np.array(valid_acc_list)})
      print(format_str.format(datetime.now(), step, loss_value, 100*accuracy, 100*valid_acc, valid_loss))
      overfit_summary_str = sess.run(overfit_summary, feed_dict = {overfit_estimate : accuracy - valid_acc})
      summary_writer.add_summary(overfit_summary_str, step)
      summary_writer.add_summary(valid_summary, step)
      summary_writer.add_summary(valid_accuracy_100_str, step)

    if (step % 5000 == 0 and step > 0) or (step + 1) == max_steps:
      checkpoint_path = os.path.join(train_dir, current_time.strftime("%B") + "_" + str(current_time.day) + "_" + str(current_time.year) + "-h" + str(current_time.hour) + "m" + str(current_time.minute) + 'model.ckpt')
      print("Checkpoint path = ", checkpoint_path)
      saver.save(sess, checkpoint_path, global_step=step, write_meta_graph=True)

  return 0
Esempio n. 4
0
# plot the loss history
plt.figure(1)
plt.plot(stats['loss_history'])
plt.xlabel('iteration')
plt.ylabel('training loss')
plt.title('Training Loss history')
plt.show()

# Load the data
# Now that you have implemented a two-layer network that passes
# gradient checks and works on toy data, it's time to load up our favorite
# CIFAR-10 data so we can use it to train a classifier on a real dataset.
# Invoke the get_CIFAR10_data function to get our data.

X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)

# Visualize some images to get a feel for the data
plt.figure(2)
plt.imshow(
    visualize_grid(X_train[:100, :].reshape(100, 32, 32, 3),
                   padding=3).astype('uint8'))
plt.gca().axis('off')
plt.show()
Esempio n. 5
0
import matplotlib.pyplot as plt
from cnn import *
from data_utils import get_CIFAR10_data
from solver import Solver

data = get_CIFAR10_data()
model = ThreeLayerConvNet(reg=0.9)
solver = Solver(model,
                data,
                lr_decay=0.95,
                print_every=10,
                num_epochs=5,
                batch_size=2,
                update_rule='sgd_momentum',
                optim_config={
                    'learning_rate': 5e-4,
                    'momentum': 0.9
                })

solver.train()

plt.subplot(2, 1, 1)
plt.title('Training loss')
plt.plot(solver.loss_history, 'o')
plt.xlabel('Iteration')

plt.subplot(2, 1, 2)
plt.title('Accuracy')
plt.plot(solver.train_acc_history, '-o', label='train')
plt.plot(solver.val_acc_history, '-o', label='val')
plt.plot([0.5] * len(solver.val_acc_history), 'k--')
Esempio n. 6
0
    test_image = np.array(ndimage.imread(buf))
    plt.close()
    return test_image[np.newaxis, :]


#CUDA_VISIBLE_DEVICES=0,1
#CUDA_VISIBLE_DEVICES=0
#CUDA_VISIBLE_DEVICES=1
#CUDA_VISIBLE_DEVICES=""

config = tf.ConfigProto(device_count={'GPU': 0})

with tf.device('/cpu:0'):
    sess = tf.Session(config=config)
    data = get_CIFAR10_data(num_training=49000,
                            num_validation=1000,
                            num_test=5000)
    print(len(data['y_test']))

    X_image = tf.placeholder(dtype=tf.float32, shape=[None, 32, 32, 3])
    y_label = tf.placeholder(dtype=tf.int64, shape=[None])

    logits, grad_image, grad_image_placeholder = inference(X_image)
    top_k_op = tf.nn.in_top_k(predictions=logits, targets=y_label, k=1)

    ckpt = tf.train.get_checkpoint_state(
        './cifar10_results/LR_0.03/REG_0.11/KP_0.9/January_14_2017-h17m55/')
    saver = tf.train.Saver()
    if ckpt and ckpt.model_checkpoint_path:
        saver.restore(sess, ckpt.model_checkpoint_path)
        array = sess.run(top_k_op,
Esempio n. 7
0
import data_utils as du
import matplotlib.pyplot as plt

dataDict = du.get_CIFAR10_data()
xTrain = dataDict["X_train"]
print(xTrain.shape)
print(xTrain.shape[0])
Esempio n. 8
0
import tensorflow as tf
import numpy as np
import math
import data_utils
import cv2
import matplotlib.pyplot as plt

# get cifar10 dataset, you should specify the directory fo your cifar10 dataset if you use this funciton.
data = data_utils.get_CIFAR10_data()
X_train = data['X_train']
y_train = data['y_train']
X_val = data['X_val']
y_val = data['y_val']
X_test = data['X_test']
y_test = data['y_test']
print(X_train.shape)


def neural_net_image_input(image_shape):
    """
    Return a Tensor for a bach of image input
    : image_shape: Shape of the images
    : return: Tensor for image input.
    """
    return tf.placeholder(
        tf.float32, [None, image_shape[0], image_shape[1], image_shape[2]],
        name='x')


def batch_norm(x_tensor, name=None):
    mean, variance = tf.nn.moments(x_tensor, axes=[0])
Esempio n. 9
0
def get_data(path='.//cifar-10-batches-py//'
             ):  # cifar10_dir = './/cifar-10-batches-py//'
    return data_utils.get_CIFAR10_data(cifar10_dir=path)
Esempio n. 10
0
def main():
  print("Loading Data;")

  data = get_CIFAR10_data()
  for k, v in data.items():
      print('%s: '%(k), v.shape)

  #PLACEHOLDER VARIABLES
  keep_prob = tf.placeholder(dtype=tf.float32, shape=())
  learning_rate = tf.placeholder(dtype=tf.float32, shape=())
  regularizer_weight = tf.placeholder(dtype=tf.float32, shape=())
  #Not used --- ^ (currently)

  X_image = tf.placeholder(dtype=tf.float32, shape=[None, 32, 32, 3])
  y_label = tf.placeholder(dtype=tf.int64, shape=[None])

  #MODEL related operations and values
  global_step = tf.Variable(0, trainable=False)
  #MODEL construction
  logits = inference(X_image, keep_prob=keep_prob, regularizer_weight=regularizer_weight)
  prediction = predict(logits)
  loss_op = loss(logits, y_label)

  reg_loss = tf.reduce_sum(tf.get_collection(LOSSES_COLLECTION))
  total_loss = loss_op + reg_loss

  accuracy_op = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(logits,1), y_label), tf.float32))
  train_op = train(loss_op, global_step, learning_rate=INITIAL_LEARNING_RATE)
  saver = tf.train.Saver(tf.all_variables())

  #Summary operation
  tf.image_summary('images', X_image)
  summary_op = tf.merge_all_summaries()

  # confusion_img_placeholder = tf.placeholder(dtype=tf.uint8, shape=[1,None,None,4])
  # confusion_matrix_summary  = tf.image_summary('confusion_matrix', confusion_img_placeholder)

  acc_summary        = tf.scalar_summary('Training_accuracy (Batch)', accuracy_op)
  validation_acc_summary = tf.scalar_summary('Validation_accuracy', accuracy_op)
  cross_entropy_loss = tf.scalar_summary('loss_raw', loss_op)
  reg_loss_summary   = tf.scalar_summary('regularization_loss', reg_loss)
  total_loss_summary = tf.scalar_summary('total_loss', total_loss)

  accuracy_batch = tf.placeholder(shape=(None), dtype=tf.float32)
  accuracy_100 = tf.reduce_mean(accuracy_batch)
  mean_summary = tf.scalar_summary('Training_accuracy (Mean)', accuracy_100)
  validation_mean_summary = tf.scalar_summary('Validation_accuracy (Mean)', accuracy_100)

  acc_histogram_summary = tf.histogram_summary('Training_accuracy (Histogram)', accuracy_batch)

  #SESSION Construction
  init = tf.initialize_all_variables()
  sess = tf.Session(config=tf.ConfigProto(
        log_device_placement=False))
  sess.run(init)

  #today = date.today()
  current_time = datetime.now()
  # LR_%f, INITIAL_LEARNING_RATE
  # REG_%f, DEFAULT_REG_WEIGHT
  # add details, relating per epoch results (and mean filtered loss etc.)
  train_dir = "cifar10_results/LR_" + str(INITIAL_LEARNING_RATE) + "/" + "REG_" + str(DEFAULT_REG_WEIGHT) + "/" + current_time.strftime("%B") + "_" + str(current_time.day) + "_" + str(current_time.year) + "-h" + str(current_time.hour) + "m" + str(current_time.minute)
  print("Writing summary data to :  ",train_dir)
  summary_writer = tf.train.SummaryWriter(train_dir, sess.graph)

  acc_list = []
  valid_acc_list = []

  print("Starting Training.")
  print("Training for %d batches (of size %d); initial learning rate %f" % (MAX_STEPS, BATCH_SIZE, INITIAL_LEARNING_RATE))
  for step in range(MAX_STEPS):
    num_train = data['X_train'].shape[0]
    if BATCH_SIZE * (step - 1) // num_train < BATCH_SIZE * (step) // num_train and step > 0:
      print("Completed Epoch: %d (step=%d, MAX_STEPS=%d, percentage complete= %f)" % ((BATCH_SIZE * (step) // num_train ), step, MAX_STEPS, step/MAX_STEPS * 100))

    batch_mask = np.random.choice(num_train, BATCH_SIZE)
    X_batch = data['X_train'][batch_mask]
    y_batch = data['y_train'][batch_mask]
    start_time = time.time()
    feed_dict = { X_image : X_batch, y_label : y_batch, keep_prob : 0.8, regularizer_weight : 0.01 }

    loss_value, accuracy, acc_str, xentropy_str, reg_loss_str, predicted_class = sess.run([total_loss, accuracy_op, acc_summary, cross_entropy_loss, reg_loss_summary, prediction], feed_dict=feed_dict)
    #print(sess.run(prediction, feed_dict=feed_dict))
    sess.run(train_op, feed_dict=feed_dict)

    acc_list.append(accuracy)
    accuracy_100_str = sess.run(mean_summary, feed_dict={accuracy_batch : np.array(acc_list[-100:])})
    #print(sess.run([accuracy_100], feed_dict={accuracy_batch : np.array(acc_list[-100:])}))
    summary_writer.add_summary(acc_str, step)
    summary_writer.add_summary(xentropy_str, step)
    summary_writer.add_summary(reg_loss_str, step)
    summary_writer.add_summary(accuracy_100_str, step)
    #summary_writer.add_summary('Training_accuracy (Mean)', np.mean(acc_list[-100:]), step)
    assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
    if step % 100 == 0:
      summary_str = sess.run(summary_op, feed_dict=feed_dict)
      summary_writer.add_summary(summary_str, step)
    if step % 10 == 0:
      if step > 0:
        confusion_buf = plot_confusion_matrix(confusion_matrix(y_batch, predicted_class),
                                              title='Confusion matrix',
                                              cmap=plt.cm.Blues,
                                              labels=classes)
        img = tf.image.decode_png(confusion_buf.getvalue(), channels=4)
        img = tf.expand_dims(img, 0)
        confusion_summary = tf.image_summary('confusion_matrix', img)
        summary_writer.add_summary(confusion_summary.eval(session=sess), step)
        plt.close()

        histogram_summary_out = sess.run(acc_histogram_summary, feed_dict={accuracy_batch : np.array(acc_list[-100:])})
        summary_writer.add_summary(histogram_summary_out, step)
      num_valid = data['X_val'].shape[0]
      #batch_valid_mask = np.random.choice(num_valid, BATCH_SIZE)
      X_val_batch = data['X_val']#[batch_valid_mask]
      y_val_batch = data['y_val']#[batch_valid_mask]
      valid_dict = { X_image : X_val_batch, y_label : y_val_batch, keep_prob : 1.0, regularizer_weight : 0.00}
      format_str = ('{0}: step {1:>5d}, loss = {2:2.3f}, accuracy = {3:>3.2f}, accuracy (validation) = {4:>3.2f}')
      valid_summary, valid_acc = sess.run([validation_acc_summary, accuracy_op], feed_dict=valid_dict)
      valid_acc_list.append(valid_acc)
      # Probably should change the slice size to be smaller (10 instead of 100)
      valid_accuracy_100_str = sess.run(validation_mean_summary, feed_dict={accuracy_batch : np.array(valid_acc_list[-10:])})
      print(format_str.format(datetime.now(), step, loss_value, accuracy*100, 100*valid_acc))
      summary_writer.add_summary(valid_summary, step)
      summary_writer.add_summary(valid_accuracy_100_str, step)

    if (step % 500 == 0 and step > 0) or (step + 1) == MAX_STEPS:
      checkpoint_path = os.path.join(train_dir, current_time.strftime("%B") + "_" + str(current_time.day) + "_" + str(current_time.year) + "-h" + str(current_time.hour) + "m" + str(current_time.minute) + 'model.ckpt')
      print("Checkpoint path = ", checkpoint_path)
      saver.save(sess, checkpoint_path, global_step=step)

  return 0
Esempio n. 11
0
              .format(total_loss, total_correct, e+1))

    if plot_losses:
        plt.plot(losses)
        plt.grid(True)
        plt.title('training losses')
        plt.xlabel('iteration number')
        plt.ylabel('loss')
        plt.show()

    return total_loss, total_correct


if __name__ == '__main__':
    X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data(num_training=49000,
                                                                      num_validation=1000,
                                                                      num_test=1000)
    # convert to gray image
    X_train_gray = np.array([cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) for img in X_train]).reshape([-1, 32, 32, 1])
    X_val_gray = np.array([cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) for img in X_val]).reshape([-1, 32, 32, 1])
    X_test_gray = np.array([cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) for img in X_test]).reshape([-1, 32, 32, 1])

    # subtract the mean value
    mean_img = np.mean(X_train, axis=0).astype(np.float32)
    X_train = X_train.astype(np.float32) - mean_img
    X_val = X_val.astype(np.float32) - mean_img
    X_test = X_test.astype(np.float32) - mean_img
    mean_img_gray = np.mean(X_train_gray, axis=0).astype(np.float32)
    X_train_gray = X_train_gray.astype(np.float32) - mean_img_gray
    X_val_gray = X_val_gray.astype(np.float32) - mean_img_gray
    X_test_gray = X_test_gray.astype(np.float32) - mean_img_gray
Esempio n. 12
0
File: main.py Progetto: Haunis/ws_py
"""
SoftMax反向传播求导:
    https://blog.csdn.net/abc13526222160/article/details/84968161
"""

import matplotlib.pyplot as plt
from fc_net import *
from data_utils import get_CIFAR10_data
from solver import Solver

data = get_CIFAR10_data(
    # num_training=30000,
    # num_validation=5000,
    # num_test=5000,
    # train_files=['./data/data_batch_1', './data/data_batch_2', './data/data_batch_3', './data/data_batch_4'],
    # test_file='data/test_batch'
    num_training=500,  # 训练集个数
    num_validation=50,  # 训练集中验证集的个数
    num_test=50,  # 测试集个数
    train_files=['./data/data_batch_1'],  # 训练集数据
    test_file='data/test_batch'  # 测试集数据
)
model = TwoLayerNet(reg=0.9)  # 正则化惩罚力度0.9

solver = Solver(
    model,
    data,
    # lr_decay=0.95,
    # print_every=100,
    # num_epochs=40,
    # batch_size=400,
    # update_rule='sgd_momentum',
Esempio n. 13
0
    return X, y.astype(int)


if __name__ == '__main__':
    input_size = 4
    hidden_size = 10
    num_classes = 10
    num_input = 5

    # net = init_toy_model(input_size, hidden_size, num_classes)
    # net = FullConnectedNet(input_size, [100], num_classes)
    net = ConvolutionalNet(input_size=(32, 32, 3),
                           layer_size=[(5, 3, 3)],
                           output_size=num_classes)
    # X, y = init_toy_data(num_input, num_classes)
    X, y, _, _, _, _ = get_CIFAR10_data(5, 5, 5)
    '''matrix_y = np.zeros([num_input, num_classes])
    matrix_y[range(num_input), y] = 1
    net.forward(X, matrix_y)
    net.backward(1)
    grad = net.layers[-3].dW
    W = net.layers[-3].W
    f = lambda w: bn_test_net(net, w, X, matrix_y)
    gradient_check_sparse(f, W, grad)'''
    def f(W):
        net.layers[0].W = W
        num_data = X.shape[0]
        label_mat = np.zeros([num_data, num_classes])
        label_mat[range(num_data), y] = 1
        net.forward(X, label_mat)
        return net.loss
Esempio n. 14
0
import numpy as np
import utils as ut
import nn
import layers
import loss as ls
import optim
import data_utils as dutil

# Global variables
X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev = dutil.get_CIFAR10_data(
)
n = X_train.shape[1]
c = 10
Y_dev_enc = ut.encode_labels(y_dev)


def test_CrossEntropyLoss():
    np.random.seed(1)
    W = np.random.randn(c, n) * 0.0001
    b = np.random.randn(c, 1) * 0.0001
    layer_lin = layers.Linear(n, c, init_vals=(W.T, b.ravel()))
    loss_func = ls.CrossEntropy()
    net = nn.Network([layer_lin], loss_func, optimizer=None)
    my_loss = net.loss(X_dev, Y_dev_enc)
    assert (np.isclose(my_loss, -np.log(.1), atol=1e-2))


def test_CrossEntropy_Linear_Grad():
    np.random.seed(1)
    W = np.random.randn(c, n) * 0.0001
    b = np.random.randn(c, 1) * 0.0001