def train_classifiers(): genre_mapper = helper.get_genre_mapper() dataset_size = 100 test_size = 20 dataloader = Loader(['rock','classical','jazz', 'blues','disco','country','pop','metal'], '30new.csv','/media/files/musicsamples/genres') #dataloader = Loader(['rock','classical','jazz', 'blues','disco','country','pop','metal'], # '_mfcc_scaled.csv','/media/files/musicsamples/genres') datasets = dataloader.get_dataset() dv_pairs = [] for v in datasets.values(): dv_pairs.append(helper.combine_to_data_value_pair(v[0], v[1])) training_set, test_set = create_sets(dataset_size - test_size, dv_pairs) training_set = map_to_numeric_dataset(training_set, genre_mapper) test_set = map_to_numeric_dataset(test_set, genre_mapper) N = len(genre_mapper) support_vector_machine = svm.SVC(C = 10**5) kmeans = KMeans(n_clusters=N) bayes = GaussianNB() classifiers = [support_vector_machine,kmeans,bayes] training_data, training_values = helper.separate_input_and_check_values(training_set) for c in classifiers: fit_classifier(c,training_data, training_values) plot_confusion_matrix(c,test_set) save_classifiers(classifiers)
def test(): # dataloader for testing accuracy computation -dataloader for training data is embedded in model loader = Loader(FLAGS.batch_size, FLAGS.dataset) test_iterator = loader.get_dataset(train=False).get_next() # load model model = Model(FLAGS.batch_size) logits = model.logits labels = model.y_placeholder ## create testing op - add fake nodes to simulate quantization in inference # NOTE: keep below commented until quantization support is not enabled for training #print ('Quantization bits: %d delay: %d ' % (FLAGS.quant_bits, FLAGS.quant_delay)) #tf.contrib.quantize.experimental_create_eval_graph(weight_bits=FLAGS.quant_bits, activation_bits=FLAGS.quant_bits) outputs = tf.nn.softmax(logits) test_op = tf.nn.in_top_k(outputs, labels, 1) acc_op = tf.reduce_mean(tf.cast(test_op, tf.float32)) # set a saver for checkpointing saver = tf.train.Saver() with tf.Session(config=tf.ConfigProto(allow_soft_placement=True,log_device_placement=False)) as sess: # setup logfile for this testing session test_writer = tf.summary.FileWriter(logdir="./"+FLAGS.logdir, graph=sess.graph) assert (tf.gfile.Exists(FLAGS.chpk_dir)), 'Chpk file doesn\'t contain a trained model/checkpoint ...' saver.restore(sess, tf.train.latest_checkpoint("./"+FLAGS.chpk_dir)) num_batch_per_epoch_test = math.ceil(loader.num_testing_examples / FLAGS.batch_size) counter = 0 true_count = 0 while (counter < num_batch_per_epoch_test): counter += 1 # a batch of testing test_x,test_y = sess.run(test_iterator) test_equality, batch_accuracy = sess.run([test_op, acc_op],feed_dict={model.x_placeholder:test_x,model.y_placeholder:test_y,model.training:False}) true_count += np.sum(test_equality) # add batch accuracy to summary batch_accuracy_summary = tf.Summary() batch_accuracy_summary.value.add(tag='Batch Accuracy',simple_value=batch_accuracy) test_writer.add_summary(batch_accuracy_summary, global_step=counter) test_accuracy = true_count / (FLAGS.batch_size * num_batch_per_epoch_test) print ('Testing accuracy %.4f' % test_accuracy) # add test accuracy to summary accuracy_summary = tf.Summary() accuracy_summary.value.add(tag='Testing Accuracy',simple_value=test_accuracy) test_writer.add_summary(accuracy_summary, global_step=counter)
def test(self, restore=True): saver = tf.train.Saver() with tf.Session() as sess: if restore: saver.restore(sess, tf.train.latest_checkpoint('./saves')) else: sess.run(tf.global_variables_initializer()) train_loader = Loader(batch_size=64) val_x, val_y = sess.run( train_loader.get_dataset(train=False).get_next()) with open('fine_label_names.txt', 'r') as fp: lines = fp.readlines() lines = [line.rstrip('\n') for line in lines] idx = 61 acc, outputs = sess.run( [self.accuracy, self.outputs], feed_dict={ self.x_placeholder: val_x, self.y_placeholder: val_y, self.training: False }) for img in [ 0, 1, 2, 3, 4, 5, 12, 13, 21, 23, 24, 25, 26, 28, 29, 30, 31, 32, 34, 37, 39, 40, 41, 42, 44, 45, 48, 50, 51, 52, 53, 54, 55, 56, 57, 58, 61, 62 ]: choices = np.flip(np.argsort(outputs[img], axis=-1, kind='quicksort', order=None), axis=0)[0:5] names = [lines[x] for x in choices] fig = plt.figure(1) fig.add_subplot(121) image = ((val_x[img] * 255) + 121.936059453125) image = image.astype(np.uint8) plt.imshow(image) fig.add_subplot(122) y = outputs[img][choices] x = [0, 1, 2, 3, 4] plt.yticks(np.arange(5), names) plt.barh(x, y) plt.show() print(img)
def __init__(self): loader = Loader() iterator = loader.get_dataset() def build_model(): with tf.device("/device:GPU:0"): x_loaded, y_loaded = iterator.get_next() x = tf.placeholder_with_default(x_loaded, (None, 32, 32, 3), name="x_placeholder") y = tf.placeholder_with_default(y_loaded, (None), name="y_placeholder") training = tf.placeholder_with_default(True, name="training_bool", shape=()) #Layer1 - 64 channels conv1 = tf.layers.conv2d( x, filters=64, kernel_size=(3, 3), padding='SAME', use_bias=True, kernel_initializer=tf.contrib.layers.xavier_initializer()) bn_1 = tf.contrib.layers.batch_norm(conv1, activation_fn=tf.nn.relu, is_training=training) # Layer2 - 64 channels conv2 = tf.layers.conv2d( bn_1, filters=64, kernel_size=(3, 3), padding='SAME', use_bias=True, kernel_initializer=tf.contrib.layers.xavier_initializer()) bn_2 = tf.contrib.layers.batch_norm(conv2, activation_fn=tf.nn.relu, is_training=training) pool2 = tf.layers.max_pooling2d(bn_2, (2, 2), (2, 2), padding='SAME') dropout_2 = tf.layers.dropout(pool2, training=training, rate=0.5) #Layer 3 - 128 channels conv3 = tf.layers.conv2d( dropout_2, filters=128, kernel_size=(3, 3), padding='SAME', use_bias=True, kernel_initializer=tf.contrib.layers.xavier_initializer()) bn_3 = tf.contrib.layers.batch_norm(conv3, activation_fn=tf.nn.relu, is_training=training) # Layer 4 - 128 channels conv4 = tf.layers.conv2d( bn_3, filters=128, kernel_size=(3, 3), padding='SAME', use_bias=True, kernel_initializer=tf.contrib.layers.xavier_initializer()) bn_4 = tf.contrib.layers.batch_norm(conv4, activation_fn=tf.nn.relu, is_training=training) pool4 = tf.layers.max_pooling2d(bn_4, (2, 2), (2, 2), padding='SAME') dropout_4 = tf.layers.dropout(pool4, training=training, rate=0.5) #Layer 5 - 256 channels conv5 = tf.layers.conv2d( dropout_4, filters=256, kernel_size=(3, 3), padding='SAME', use_bias=True, kernel_initializer=tf.contrib.layers.xavier_initializer()) bn_5 = tf.contrib.layers.batch_norm(conv5, activation_fn=tf.nn.relu, is_training=training) # Layer 6 - 256 channels conv6 = tf.layers.conv2d( bn_5, filters=256, kernel_size=(3, 3), padding='SAME', use_bias=True, kernel_initializer=tf.contrib.layers.xavier_initializer()) bn_6 = tf.contrib.layers.batch_norm(conv6, activation_fn=tf.nn.relu, is_training=training) # Layer 7 - 256 channels conv7 = tf.layers.conv2d( bn_6, filters=256, kernel_size=(3, 3), padding='SAME', use_bias=True, kernel_initializer=tf.contrib.layers.xavier_initializer()) bn_7 = tf.contrib.layers.batch_norm(conv7, activation_fn=tf.nn.relu, is_training=training) pool7 = tf.layers.max_pooling2d(bn_7, (2, 2), (2, 2), padding='SAME') dropout_7 = tf.layers.dropout(pool7, training=training, rate=0.5) # Layer 8 - 512 channels conv8 = tf.layers.conv2d( dropout_7, filters=512, kernel_size=(3, 3), padding='SAME', use_bias=True, kernel_initializer=tf.contrib.layers.xavier_initializer()) bn_8 = tf.contrib.layers.batch_norm(conv8, activation_fn=tf.nn.relu, is_training=training) # Layer 9 - 512 channels conv9 = tf.layers.conv2d( bn_8, filters=512, kernel_size=(3, 3), padding='SAME', use_bias=True, kernel_initializer=tf.contrib.layers.xavier_initializer()) bn_9 = tf.contrib.layers.batch_norm(conv9, activation_fn=tf.nn.relu, is_training=training) # Layer 10 - 512 channels conv10 = tf.layers.conv2d( bn_9, filters=512, kernel_size=(3, 3), padding='SAME', use_bias=True, kernel_initializer=tf.contrib.layers.xavier_initializer()) bn_10 = tf.contrib.layers.batch_norm(conv10, activation_fn=tf.nn.relu, is_training=training) pool10 = tf.layers.max_pooling2d(bn_10, (2, 2), (2, 2), padding='SAME') dropout_7 = tf.layers.dropout(pool10, training=training, rate=0.5) # Layer 11 - 512 channels conv11 = tf.layers.conv2d( dropout_7, filters=512, kernel_size=(3, 3), padding='SAME', use_bias=True, kernel_initializer=tf.contrib.layers.xavier_initializer()) bn_11 = tf.contrib.layers.batch_norm(conv11, activation_fn=tf.nn.relu, is_training=training) # Layer 12 - 512 channels conv12 = tf.layers.conv2d( bn_11, filters=512, kernel_size=(3, 3), padding='SAME', use_bias=True, kernel_initializer=tf.contrib.layers.xavier_initializer()) bn_12 = tf.contrib.layers.batch_norm(conv12, activation_fn=tf.nn.relu, is_training=training) # Layer 13 - 512 channels conv13 = tf.layers.conv2d( bn_12, filters=512, kernel_size=(3, 3), padding='SAME', use_bias=True, kernel_initializer=tf.contrib.layers.xavier_initializer()) bn_13 = tf.contrib.layers.batch_norm(conv13, activation_fn=tf.nn.relu, is_training=training) pool13 = tf.layers.max_pooling2d(bn_13, (2, 2), (2, 2), padding='SAME') dropout_13 = tf.layers.dropout(pool13, training=training, rate=0.5) flattened = tf.contrib.layers.flatten(dropout_13) dense14 = tf.layers.dense( inputs=flattened, units=4096, kernel_initializer=tf.contrib.layers.xavier_initializer()) bn_14 = tf.contrib.layers.batch_norm(dense14, activation_fn=tf.nn.relu, is_training=training) dropout_14 = tf.layers.dropout(bn_14, training=training, rate=0.5) dense15 = tf.layers.dense( inputs=dropout_14, units=4096, kernel_initializer=tf.contrib.layers.xavier_initializer()) bn_15 = tf.contrib.layers.batch_norm(dense15, activation_fn=tf.nn.relu, is_training=training) dense16 = tf.layers.dense( inputs=bn_15, units=100, activation=None, kernel_initializer=tf.contrib.layers.xavier_initializer()) tf.summary.scalar("dense16_mean", tf.reduce_mean(dense16)) # Predict outputs = tf.nn.softmax(dense16) prediction = tf.argmax(outputs, axis=1) equality = tf.equal(prediction, y) accuracy = tf.reduce_mean(tf.cast(equality, tf.float32)) # Train softmax = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=y, logits=dense16, name="softmax") loss = tf.reduce_mean(softmax) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): step = tf.train.AdamOptimizer( learning_rate=0.0001).minimize(loss) self.loss = loss self.x_placeholder = x self.y_placeholder = y self.training = training self.accuracy = accuracy self.outputs = outputs self.prediction = prediction self.step = step tf.summary.scalar("Loss", loss) tf.summary.scalar("Train Accuracy", accuracy) build_model()
def train(self, restore=False): saver = tf.train.Saver() with tf.Session( config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) as sess: try: run_id = np.random.randint(0, 1e7) train_writer = tf.summary.FileWriter(logdir="logs/" + str(run_id), graph=sess.graph) if restore: saver.restore(sess, tf.train.latest_checkpoint('./saves')) else: sess.run(tf.global_variables_initializer()) counter = 0 train_loader = Loader(batch_size=256) val_x, val_y = sess.run( train_loader.get_dataset(train=False).get_next()) merge = tf.summary.merge_all() while True: counter += 1 _, summary = sess.run([self.step, merge], feed_dict={}) train_writer.add_summary(summary, counter) if counter % 1000 == 0: # Check validation accuracy on 10 batches acc = sess.run( [self.accuracy], feed_dict={ self.x_placeholder: val_x, self.y_placeholder: val_y, self.training: False }) accuracy_summary = tf.Summary(value=[ tf.Summary.Value(tag='Validation Accuracy', simple_value=acc) ]) train_writer.add_summary(accuracy_summary, counter) if counter % 10000 == 0: # Save model print("Periodically saving model...") save_path = saver.save(sess, "./saves/model.ckpt") except KeyboardInterrupt: print("Interupted... saving model.") save_path = saver.save(sess, "./saves/model.ckpt")
def __init__(self, batch_size): loader = Loader(batch_size, FLAGS.dataset) iterator = loader.get_dataset() def build_model(): with tf.device("/device:GPU:0"): x_loaded,y_loaded = iterator.get_next() x = tf.placeholder_with_default(x_loaded,(None,32,32,3),name="x_placeholder") y = tf.placeholder_with_default(y_loaded,(None),name="y_placeholder") training = tf.placeholder_with_default(True,name="training_bool",shape=()) #Layer1 - 64 channels conv1 = tf.layers.conv2d(x, filters=64,kernel_size=(3,3),padding='SAME', use_bias=True,kernel_initializer=tf.contrib.layers.xavier_initializer()) bn_1 = tf.contrib.layers.batch_norm(conv1,activation_fn=tf.nn.relu,is_training=training) # Layer2 - 64 channels conv2 = tf.layers.conv2d(bn_1, filters=64,kernel_size=(3,3),padding='SAME', use_bias=True,kernel_initializer=tf.contrib.layers.xavier_initializer()) bn_2 = tf.contrib.layers.batch_norm(conv2,activation_fn=tf.nn.relu,is_training=training) pool2 = tf.layers.max_pooling2d(bn_2, (2,2), (2,2), padding='SAME') dropout_2 = tf.layers.dropout(pool2,training=training,rate=0.5) #Layer 3 - 128 channels conv3 = tf.layers.conv2d(dropout_2, filters=128,kernel_size=(3,3),padding='SAME', use_bias=True,kernel_initializer=tf.contrib.layers.xavier_initializer()) bn_3 = tf.contrib.layers.batch_norm(conv3,activation_fn=tf.nn.relu,is_training=training) # Layer 4 - 128 channels conv4 = tf.layers.conv2d(bn_3, filters=128,kernel_size=(3,3),padding='SAME', use_bias=True,kernel_initializer=tf.contrib.layers.xavier_initializer()) bn_4 = tf.contrib.layers.batch_norm(conv4,activation_fn=tf.nn.relu,is_training=training) pool4 = tf.layers.max_pooling2d(bn_4, (2,2), (2,2), padding='SAME') dropout_4 = tf.layers.dropout(pool4,training=training,rate=0.5) #Layer 5 - 256 channels conv5 = tf.layers.conv2d(dropout_4, filters=256,kernel_size=(3,3),padding='SAME', use_bias=True,kernel_initializer=tf.contrib.layers.xavier_initializer()) bn_5 = tf.contrib.layers.batch_norm(conv5,activation_fn=tf.nn.relu,is_training=training) # Layer 6 - 256 channels conv6 = tf.layers.conv2d(bn_5, filters=256,kernel_size=(3,3),padding='SAME', use_bias=True,kernel_initializer=tf.contrib.layers.xavier_initializer()) bn_6 = tf.contrib.layers.batch_norm(conv6,activation_fn=tf.nn.relu,is_training=training) # Layer 7 - 256 channels conv7 = tf.layers.conv2d(bn_6, filters=256,kernel_size=(3,3),padding='SAME', use_bias=True,kernel_initializer=tf.contrib.layers.xavier_initializer()) bn_7 = tf.contrib.layers.batch_norm(conv7,activation_fn=tf.nn.relu,is_training=training) pool7 = tf.layers.max_pooling2d(bn_7, (2,2), (2,2), padding='SAME') dropout_7 = tf.layers.dropout(pool7,training=training,rate=0.5) # Layer 8 - 512 channels conv8 = tf.layers.conv2d(dropout_7, filters=512,kernel_size=(3,3),padding='SAME', use_bias=True,kernel_initializer=tf.contrib.layers.xavier_initializer()) bn_8 = tf.contrib.layers.batch_norm(conv8,activation_fn=tf.nn.relu,is_training=training) # Layer 9 - 512 channels conv9 = tf.layers.conv2d(bn_8, filters=512,kernel_size=(3,3),padding='SAME', use_bias=True,kernel_initializer=tf.contrib.layers.xavier_initializer()) bn_9 = tf.contrib.layers.batch_norm(conv9,activation_fn=tf.nn.relu,is_training=training) # Layer 10 - 512 channels conv10 = tf.layers.conv2d(bn_9, filters=512,kernel_size=(3,3),padding='SAME', use_bias=True,kernel_initializer=tf.contrib.layers.xavier_initializer()) bn_10 = tf.contrib.layers.batch_norm(conv10,activation_fn=tf.nn.relu,is_training=training) pool10 = tf.layers.max_pooling2d(bn_10, (2,2), (2,2), padding='SAME') dropout_7 = tf.layers.dropout(pool10,training=training,rate=0.5) # Layer 11 - 512 channels conv11 = tf.layers.conv2d(dropout_7, filters=512,kernel_size=(3,3),padding='SAME', use_bias=True,kernel_initializer=tf.contrib.layers.xavier_initializer()) bn_11 = tf.contrib.layers.batch_norm(conv11,activation_fn=tf.nn.relu,is_training=training) # Layer 12 - 512 channels conv12 = tf.layers.conv2d(bn_11, filters=512,kernel_size=(3,3),padding='SAME', use_bias=True,kernel_initializer=tf.contrib.layers.xavier_initializer()) bn_12 = tf.contrib.layers.batch_norm(conv12,activation_fn=tf.nn.relu,is_training=training) # Layer 13 - 512 channels conv13 = tf.layers.conv2d(bn_12, filters=512,kernel_size=(3,3),padding='SAME', use_bias=True,kernel_initializer=tf.contrib.layers.xavier_initializer()) bn_13 = tf.contrib.layers.batch_norm(conv13,activation_fn=tf.nn.relu,is_training=training) pool13 = tf.layers.max_pooling2d(bn_13, (2,2), (2,2), padding='SAME') dropout_13 = tf.layers.dropout(pool13,training=training,rate=0.5) flattened = tf.contrib.layers.flatten(dropout_13) # Layer 14 dense14 = tf.layers.dense(inputs=flattened, units=4096,kernel_initializer=tf.contrib.layers.xavier_initializer()) bn_14 = tf.contrib.layers.batch_norm(dense14,activation_fn=tf.nn.relu,is_training=training) dropout_14 = tf.layers.dropout(bn_14,training=training,rate=0.5) # Layer 15 dense15 = tf.layers.dense(inputs=dropout_14, units=4096,kernel_initializer=tf.contrib.layers.xavier_initializer()) bn_15 = tf.contrib.layers.batch_norm(dense15,activation_fn=tf.nn.relu,is_training=training) ## Layer 16 dense16 = tf.layers.dense(inputs=bn_15, units=100,activation=None,kernel_initializer=tf.contrib.layers.xavier_initializer()) tf.summary.scalar("dense16_mean",tf.reduce_mean(dense16)) # data used in the model_op self.x_placeholder = x self.y_placeholder = y self.training = training self.logits = dense16 build_model()
def train(): # for PANTHER-mode print slice precision if (FLAGS.ifpanther): if (FLAGS.ifmixed): assert (len(FLAGS.slice_bits_list) == 8), "list length should be 8" print("PUMA sliced_bits_list", FLAGS.slice_bits_list) else: print("PUMA slice bits: ", FLAGS.slice_bits) # dataloader for validation accuracy computation -dataloader for training data is embedded in model loader = Loader(FLAGS.batch_size, FLAGS.dataset) val_iterator = loader.get_dataset(train=False).get_next() # load model model = Model(FLAGS.batch_size) logits = model.logits labels = model.y_placeholder # create training op - add fake nodes to simulate quantization in inference # notice the qunat_delay of num_epochs+1, just adds fake nodes to be used later in inference softmax = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels, logits=logits, name="softmax_cross_entropy") loss = tf.reduce_mean(softmax) var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) if (FLAGS.ifpanther): ## NOTE: If not using puma-outerproduct; directly use nonideality class without going through outer_product class # outer_product is built on example-wise gradients and is slow [To Try for speedup - see Goodfeli blog - https://github.com/tensorflow/tensorflow/issues/4897] #nonideality = puma.nonideality(sigma=FLAGS.puma_sigma, alpha=FLAGS.puma_alpha) puma_op = puma.outer_product(var_list=var_list, sigma=FLAGS.puma_sigma, alpha=FLAGS.puma_alpha, \ slice_bits=FLAGS.slice_bits, ifmixed=FLAGS.ifmixed, slice_bits_list=[int(x) for x in FLAGS.slice_bits_list]) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): if (FLAGS.optimizer == "vanilla"): opt = tf.train.GradientDescentOptimizer(learning_rate=FLAGS.lr) elif (FLAGS.optimizer == "momentum"): opt = tf.train.MomentumOptimizer(learning_rate=FLAGS.lr, momentum=0.9) elif (FLAGS.optimizer == "nesterov"): opt = tf.train.MomentumOptimizer(learning_rate=FLAGS.lr, momentum=0.9, use_nesterov=True) else: opt = tf.train.AdamOptimizer(learning_rate=FLAGS.lr) # Layer gradient computation grad = opt.compute_gradients(loss) # Weight gradient computation and update if (FLAGS.ifpanther): print('Adding PANTHER ops') ## NOTE: If not using puma-outerproduct; directly use nonideality class without going through outer_product class #grad_n = nonideality.apply(grad) grad_n = puma_op.apply_batch(grad) train_op = opt.apply_gradients(zip(grad_n[0], var_list)) else: grad_n = grad # added this placeholder for grad_n for consistency among different run types train_op = opt.apply_gradients(grad) # create ops for validation and training accuracy outputs = tf.nn.softmax(logits) equality = tf.nn.in_top_k(outputs, labels, 1) accuracy = tf.reduce_mean(tf.cast(equality, tf.float32)) # create ops for top-5 accuracy equality5 = tf.nn.in_top_k(outputs, labels, 5) accuracy5 = tf.reduce_mean(tf.cast(equality5, tf.float32)) tf.summary.scalar("Loss", loss) tf.summary.scalar("Training accuracy - Top-1", accuracy) tf.summary.scalar("Training accuracy - Top-5", accuracy5) # capture slice-wise saturation statistics if (FLAGS.ifpanther): puma_sat_stats = grad_n[1] tf.summary.scalar("PUMA Parallel Write Saturation", puma_sat_stats[1]) for i in range(puma_sat_stats[0].get_shape()[0]): tf.summary.scalar( "PUMA Parallel Write Saturation-" + "Slice " + str(i), puma_sat_stats[0][i]) # puma crs_sync if (FLAGS.ifpanther): crs_op = puma_op.crs_sync(var_list) # set a saver for checkpointing saver = tf.train.Saver() # run training within a session with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) as sess: #sess = tf_debug.LocalCLIDebugWrapperSession(sess) try: # setup logfile for this training session train_writer = tf.summary.FileWriter(logdir="./" + FLAGS.logdir, graph=sess.graph) # setup counter, summary writes and model based on if restore required # keep track of progress during training with counter - for correct restoring from last checkpoint if FLAGS.restore: assert ( tf.gfile.Exists(FLAGS.logdir) ), 'Restore requires log file from previous run, set restore to False and run...' print('restoring train from: %s' % FLAGS.logdir) saver.restore(sess, tf.train.latest_checkpoint("./" + FLAGS.logdir)) ckpt_name = tf.train.get_checkpoint_state( FLAGS.logdir ).model_checkpoint_path # extract the latest checkpoint ## patch for case where model gets saved naturally, not because it stopped due to interrupt # counter = int(ckpt_name.split('-')[1]) # extract the counter from checkpoint temp_l = ckpt_name.split('-') if (len(temp_l) > 1): counter = int(ckpt_name.split('-') [1]) # extract the counter from checkpoint else: print("Restoring from counter:", FLAGS.start_counter) counter = FLAGS.start_counter else: counter = 0 sess.run(tf.global_variables_initializer()) merge = tf.summary.merge_all() print('counter: ', counter) # NOTE: keep below commented until quantization support is not enabled for training #print ('Quantization bits: %d delay: %d ' % (FLAGS.quant_bits, FLAGS.quant_delay)) # train network for user-defined epochs num_batch_per_epoch_train = math.ceil( loader.num_training_examples / FLAGS.batch_size) print('number of batches per epoch: ', num_batch_per_epoch_train) while (counter < FLAGS.epochs * num_batch_per_epoch_train): counter += 1 # puma carry resolution step if (FLAGS.ifpanther and (counter % FLAGS.crs_freq == 0)): print("Performing puma crs.....") sess.run(crs_op) # a batch of training start_time = time.time() ## Uncomment the lines below if running detailed traces - for runtime compute and mmeory data #run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) #Uncomment these to get run-time compute/memory utilization #run_metadata = tf.RunMetadata() #_, _, summary = sess.run([grad_n, train_op, merge],feed_dict={}, options=run_options, run_metadata=run_metadata) _, _, summary = sess.run([grad_n, train_op, merge], feed_dict={}) duration = time.time() - start_time print("Step: %d \t Training time (1 batch): %0.4f" % (counter, duration)) ## Uncomment the lines below if running detailed traces - for runtime compute and mmeory data #train_writer.add_run_metadata(run_metadata, 'step%d' % counter) train_writer.add_summary(summary, global_step=counter) # compute validation accuracy every epoch if (counter % num_batch_per_epoch_train == 0): num_batch_per_epoch_val = math.ceil( loader.num_testing_examples / FLAGS.batch_size) val_counter = 0 true_count = 0 true_count5 = 0 while (val_counter < num_batch_per_epoch_val): val_counter += 1 # a batch of validation val_x, val_y = sess.run(val_iterator) val_equality, val_equality5 = sess.run( [equality, equality5], feed_dict={ model.x_placeholder: val_x, model.y_placeholder: val_y, model.training: False }) true_count += np.sum(val_equality) true_count5 += np.sum(val_equality5) val_accuracy = true_count / (FLAGS.batch_size * num_batch_per_epoch_val) val_accuracy5 = true_count5 / (FLAGS.batch_size * num_batch_per_epoch_val) accuracy_summary = tf.Summary() accuracy_summary.value.add( tag='Validation Accuracy - Top-1', simple_value=val_accuracy) accuracy_summary.value.add( tag='Validation Accuracy - Top-5', simple_value=val_accuracy5) train_writer.add_summary(accuracy_summary, global_step=counter) print('Validation accuracy: Top-1 %.4f \t Top-5 %.4f' % (val_accuracy, val_accuracy5)) if (counter % (FLAGS.chpk_freq * num_batch_per_epoch_train) == 0): # Save model print("Periodically saving model...") save_path = saver.save(sess, "./" + FLAGS.logdir + "/model.ckpt") except KeyboardInterrupt: print("Interupted... saving model.") save_path = saver.save( sess, "./" + FLAGS.logdir + "/model.ckpt-" + str(counter))