def train(m): if FLAGS.download_data: google_drive_file_id = '0B7EVK8r0v71pZjFTYXZWM3FlRnM' download_path = os.path.join(FLAGS.download_dir, 'img_align_celeba.zip') download.maybe_download_from_google_drive(google_drive_file_id, download_path) download.maybe_extract(download_path, FLAGS.train_dir, FLAGS.test_dir) if (FLAGS.use_tfrecord): train_dir = FLAGS.tftrain_dir test_dir = FLAGS.tftest_dir else: train_dir = FLAGS.train_dir test_dir = FLAGS.test_dir training_inputs, count = input.inputs(train_dir, FLAGS.batch_size, FLAGS.min_queue_examples, FLAGS.input_height, FLAGS.input_width, FLAGS.use_tfrecord, FLAGS.epoch, FLAGS.shuffle_size) steps_per_epoch = int(FLAGS.train_data_size / FLAGS.batch_size) test_inputs, _ = input.inputs(test_dir, FLAGS.batch_size, 0, FLAGS.input_height, FLAGS.input_width, FLAGS.use_tfrecord, FLAGS.epoch, FLAGS.shuffle_size) m.train(training_inputs, test_inputs, FLAGS.epoch, steps_per_epoch, FLAGS.batch_size, FLAGS.learning_rate, FLAGS.l1_weight, FLAGS.beta1, FLAGS.load_ckpt)
def inputs(eval_data): """ Construct input for CIFAR evaluation using the Reader ops. Parameters: eval_data: bool, indicating if one should use the train or eval data set. Returns: images: Images. 4-D Tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size. labels: Labels. 1-D Tensor of [batch_size] size. Raises: ValueErroe: If no data_dir. """ if not FLAGS.data_dir: raise ValueError('Please supply a data_dir') data_dir = FLAGS.data_dir if data_dir[0:2] == './': data_dir = data_dir[2:] data_dir = os.path.join(data_dir, 'cifar-10-batches-bin') # print('*********************', data_dir) images, labels = input.inputs(eval_data=eval_data, data_dir=data_dir, batch_size=FLAGS.batch_size) if FLAGS.use_fp16: images = tf.cast(images, tf.float16) labels = tf.cast(labels, tf.float16) return images, labels
def inputs(eval_data): if not FLAGS.data_dir: raise ValueError('Please supply a data_dia') data_dir = os.path.join(FLAGS.data_dir, 'cifar-10-batches-bin') images, labels = input.inputs(eval_data, data_dir, FLAGS.batch_size) if FLAGS.use_fp16: images = tf.cast(images, tf.float16) labels = tf.cast(labels, tf.float16) return images, labels
def inputs(eval_data): if not FLAGS.data_dir: raise ValueError('Please supply a data_dir') data_dir = FLAGS.data_dir images, labels = input.inputs(eval_data=eval_data, data_dir=data_dir, batch_size=FLAGS.batch_size) return images, labels
def main(_): global NUM_EXAMPLES NUM_EXAMPLES = len(open(FLAGS.eval_lst, 'r').read().splitlines()) images, labels = inputs(FLAGS.data_dir, FLAGS.eval_lst) is_training = tf.placeholder('bool', [], name='is_training') # placeholder for the fusion part logits = inference(images, num_classes=FLAGS.num_classes, is_training=is_training, num_blocks=[3, 4, 6, 3]) evaluate(is_training,logits, images, labels) return
def main(): with tf.Graph().as_default(): # pass num_data = 5000 image, label = inputs(args.train_dir, "contrast", num_data, None, one_hot_labels=False) images, labels = convert_data_to_tensors(image, label) ## contrast version # c_image, c_label = inputs(args.train_dir, "contrast", num_data, None, one_hot_labels=False) # print(c_image) # c_images, c_labels = convert_data_to_tensors(c_image, c_label) ### predictions = models[args.model](images) ckpt_dir = 'log/%s/train' % args.model sv = tf.train.Supervisor(logdir=ckpt_dir) with sv.managed_session() as sess: preds, labels = sess.run([predictions, labels]) # c_predictions = models[args.model](c_images) # with sv.managed_session() as sess: # c_preds, c_labels = sess.run([c_predictions, c_labels]) # # print("augmented data pure mse: ", get_mse(preds, labels)) # print("augmented data avg mse: ", get_mse((preds + c_labels) / 2, labels)) # predictions = np.array(preds) img_name = np.array(labels) export_result( [str(img_name) for img_name in list(np.concatenate(img_name))], list(np.concatenate(predictions)), "original_contrast")
def inputs(eval_data): """Construct input for model evaluation using the Reader ops. Args: eval_data: bool, indicating if one should use the train or eval data set. Returns: images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size. labels: Labels. 1D tensor of [batch_size] size. Raises: ValueError: If no data_dir """ if not FLAGS.data_dir: raise ValueError('Please supply a data_dir') data_dir = os.path.join(FLAGS.data_dir, 'binary') images, labels = input.inputs(eval_data=eval_data, data_dir=data_dir, batch_size=FLAGS.batch_size) if FLAGS.use_fp16: images = tf.cast(images, tf.float16) labels = tf.cast(labels, tf.float16) return images, labels
def inputs(eval_data): """ Args: eval_data: bool, indicating if one should use the train or eval data set. Returns: images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size. labels: Labels. 1D tensor of [batch_size] size. Raises: ValueError: If no data_dir """ if not FLAGS.data_dir: raise ValueError('Please supply a data_dir') data_dir = FLAGS.data_dir images, labels = input.inputs(eval_data=eval_data, data_dir=data_dir, batch_size=FLAGS.batch_size) if FLAGS.use_fp16: images = tf.cast(images, tf.float16) labels = tf.cast(labels, tf.float16) return images, labels
def train(): input.init_dataset_constants() # build model into default graph with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) # get images with tf.variable_scope('input_pipeline') as scope: images = input.inputs() train_ops = [] # calculate loss gen_loss, discrim_loss, z_prediction_loss = model.losses(images) # loss summaries for l in tf.get_collection('losses'): tf.scalar_summary(l.op.name, l) # build graph that trains the model with one batch of examples gen_train_op = model.train(gen_loss, global_step, net='generator') discrim_train_op = model.train(discrim_loss, global_step, net='discriminator') z_predictor_train_op = model.train(z_prediction_loss, global_step, net='z_predictor') # create a saver gen_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'generator') discrim_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'discriminator') variables_to_save = gen_vars if FLAGS.save_discriminator: variables_to_save += discrim_vars saver = tf.train.Saver( variables_to_save, max_to_keep=1, keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours) # build the summary operation based on the TF collection of summaries summary_op = tf.merge_all_summaries() # start running operations on the graph sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) # initialize init_variables(sess) # start the queue runners coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph) net = 'discriminator' count = 0 for step in range(FLAGS.max_steps): if net == 'discriminator': op, loss = discrim_train_op, discrim_loss count += 1 if count == FLAGS.discriminator_steps: net = 'generator' count = 0 else: op, loss = gen_train_op, gen_loss count += 1 if count == FLAGS.generator_steps: net = 'discriminator' count = 0 start_time = time.time() _, loss_value = sess.run([op, loss]) assert not np.isnan( loss_value), 'Model diverged with NaN loss value' if net == 'generator': _, loss_value = sess.run( [z_predictor_train_op, z_prediction_loss]) assert not np.isnan( loss_value), 'Model diverged with NaN loss value' duration = time.time() - start_time if step % 10 == 0: num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = '{}: step {}, ({:.0f} examples/sec; {:.3f} sec/batch)' print( format_str.format(datetime.now(), step, examples_per_sec, sec_per_batch)) if step % 100 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) # save the model checkpoint periodically if step % 1000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_path = FLAGS.train_dir saver.save(sess, checkpoint_path, global_step=step) # ask threads to stop coord.request_stop() # wait for threads to finish coord.join(threads) sess.close()
def train(): labels_file = os.path.join(FLAGS.dataset, 'labels/labels.npy') files = helper.listdir_files(FLAGS.dataset, recursive=False, filter_ext=['.npy'], encoding='utf-8') epoch_size = FLAGS.epoch_size if FLAGS.epoch_size > 0 else len(files) steps_per_epoch = epoch_size // FLAGS.batch_size epoch_size = steps_per_epoch * FLAGS.batch_size max_steps = steps_per_epoch * FLAGS.num_epochs files = files[:epoch_size] print('epoch size: {}\n{} steps per epoch\n{} epochs\n{} steps'.format( epoch_size, steps_per_epoch, FLAGS.num_epochs, max_steps)) with tf.Graph().as_default(): # pre-processing for input with tf.device('/cpu:0'): spectrum, labels_ref = inputs(FLAGS, files, labels_file, epoch_size, is_training=True) # build model model = MRSmodel(FLAGS, data_format=FLAGS.data_format, seq_size=FLAGS.seq_size, num_labels=FLAGS.num_labels) g_loss = model.build_train(spectrum, labels_ref) # training step and op global_step = tf.train.get_or_create_global_step() g_train_op = model.train(global_step) # profiler #profiler(train_op) # a saver object which will save all the variables saver = tf.train.Saver(var_list=model.g_mvars, max_to_keep=1 << 16, save_relative_paths=True) # save the graph saver.export_meta_graph(os.path.join(FLAGS.train_dir, 'model.pbtxt'), as_text=True, clear_devices=True, clear_extraneous_savers=True) saver.export_meta_graph(os.path.join(FLAGS.train_dir, 'model.meta'), as_text=False, clear_devices=True, clear_extraneous_savers=True) # monitored session gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto( gpu_options=gpu_options, log_device_placement=FLAGS.log_device_placement) with tf.train.MonitoredTrainingSession( checkpoint_dir=FLAGS.train_dir, hooks=[ tf.train.StopAtStepHook(last_step=max_steps), tf.train.NanTensorHook(g_loss), LoggerHook(g_loss, steps_per_epoch) ], config=config, log_step_count_steps=FLAGS.log_frequency) as mon_sess: # options sess = helper.get_session(mon_sess) if FLAGS.timeline_steps > 0: run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() # restore pre-trained model if FLAGS.pretrain_dir and not FLAGS.restore: saver.restore(sess, os.path.join(FLAGS.pretrain_dir, 'model')) # sessions def run_sess(options=None, run_metadata=None): mon_sess.run(g_train_op, options=options, run_metadata=run_metadata) # run session while not mon_sess.should_stop(): step = tf.train.global_step(sess, global_step) if FLAGS.timeline_steps > 0 and step % FLAGS.timeline_steps == 0: run_sess(run_options, run_metadata) # Create the Timeline object, and write it to a json tl = timeline.Timeline(run_metadata.step_stats) ctf = tl.generate_chrome_trace_format() with open( os.path.join(FLAGS.train_dir, 'timeline_{:0>7}.json'.format(step)), 'a') as f: f.write(ctf) else: run_sess() if FLAGS.save_steps > 0 and step % FLAGS.save_steps == 0: saver.save(sess, os.path.join(FLAGS.train_dir, 'model_{:0>7}'.format(step)), write_meta_graph=False, write_state=False)
import model import experiments import experiments.learn from experiments.learn import memory_head FLAGS = tf.app.flags.FLAGS START_LEARNING_RATE = 0.005 # Inputs train_filenames = input.find_files(FLAGS.data_dir, "train-otb*") print("Found", len(train_filenames), "train files.") random.shuffle(train_filenames) filenames = tf.placeholder(tf.string, shape=[None]) dataset = input.inputs(filenames) iterator = tf.contrib.data.Iterator.from_structure(dataset.output_types, dataset.output_shapes) training_init_op = iterator.make_initializer(dataset) examples, labels, results = iterator.get_next() labels = tf.cast(labels, tf.int32) #labels = tf.one_hot(labels, model.NUM_LABELS, dtype=tf.float32) # Model features = model.feature_extractor(examples) layers = [] nn_logits = model.policy_model(examples, features, layers)
def test(): # label names if FLAGS.num_labels == 4: LABEL_NAMES = ['creatine', 'gaba', 'glutamate', 'glutamine'] elif FLAGS.num_labels == 12: LABEL_NAMES = [ 'choline-truncated', 'creatine', 'gaba', 'glutamate', 'glutamine', 'glycine', 'lactate', 'myo-inositol', 'NAAG-truncated', 'n-acetylaspartate', 'phosphocreatine', 'taurine' ] elif FLAGS.num_labels == 16: LABEL_NAMES = [ 'acetate', 'aspartate', 'choline-truncated', 'creatine', 'gaba', 'glutamate', 'glutamine', 'histamine', 'histidine', 'lactate', 'myo-inositol', 'n-acetylaspartate', 'scyllo-inositol', 'succinate', 'taurine', 'valine' ] else: LABEL_NAMES = list(range(FLAGS.num_labels)) print(*LABEL_NAMES) print('No.{}'.format(FLAGS.postfix)) # get dataset files labels_file = os.path.join(FLAGS.dataset, 'labels/labels.npy') files = helper.listdir_files(FLAGS.dataset, recursive=False, filter_ext=['.npy'], encoding=True) steps_per_epoch = len(files) // FLAGS.batch_size epoch_size = steps_per_epoch * FLAGS.batch_size max_steps = steps_per_epoch files = files[:epoch_size] with tf.Graph().as_default(): # pre-processing for input with tf.device('/cpu:0'): spectrum, labels_ref = inputs(FLAGS, files, labels_file, epoch_size, is_testing=True) # build model model = MRSmodel(FLAGS, data_format=FLAGS.data_format, seq_size=FLAGS.seq_size, num_labels=FLAGS.num_labels) model.build_model(spectrum) # a saver object which will save all the variables saver = tf.train.Saver(var_list=model.g_mvars) # get output labels_pred = tf.get_default_graph().get_tensor_by_name('Output:0') # losses ret_loss = list(get_losses(labels_ref, labels_pred)) ret_labels = [labels_ref, labels_pred] ret = ret_loss + ret_labels ret_loss = ret[:len(ret_loss) - 1] # model files if FLAGS.progress: mfiles = helper.listdir_files(FLAGS.train_dir, recursive=False, filter_ext=['.index'], encoding=None) mfiles = [f[:-6] for f in mfiles if 'model_' in f] mfiles.sort() stats = [] else: mfiles = [tf.train.latest_checkpoint(FLAGS.train_dir)] for model_file in mfiles: with setup() as sess: # restore variables from checkpoint saver.restore(sess, model_file) # run session sum_loss = [0 for _ in range(len(ret_loss))] all_errors = [] labels_ref = [] labels_pred = [] for i in range(max_steps): cur_ret = sess.run(ret) cur_loss = cur_ret[0:len(ret_loss)] cur_errors = cur_ret[len(ret_loss)] labels_ref.append(cur_ret[len(ret_loss) + 1]) labels_pred.append(cur_ret[len(ret_loss) + 2]) all_errors.append(cur_errors) # monitor losses for _ in range(len(ret_loss)): sum_loss[_] += cur_loss[_] #print('batch {}, MSE {}, MAD {}, MSE valid {}, MAD valid {}, False Positives {}, False Negatives {}'.format(i, *cur_loss)) # summary mean_loss = [l / max_steps for l in sum_loss] mean_loss[2] /= FLAGS.batch_size mean_loss[3] /= FLAGS.batch_size mean_loss[4] /= FLAGS.batch_size * FLAGS.num_labels mean_loss[5] /= FLAGS.batch_size * FLAGS.num_labels print('{} Metabolites'.format(FLAGS.num_labels)) print('MSE threshold {}'.format(FLAGS.mse_thresh)) print('MAD threshold {}'.format(FLAGS.mad_thresh)) print( 'Totally {} Samples, MSE {}, MAD {}, MSE accuracy {}, MAD accuracy {}, FP rate {}, FN rate {}' .format(epoch_size, *mean_loss)) # save stats if FLAGS.progress: model_num = os.path.split(model_file)[1][6:] stats.append(np.array([float(model_num)] + mean_loss)) # errors import matplotlib.pyplot as plt all_errors = np.concatenate(all_errors, axis=0) for _ in range(FLAGS.num_labels): errors = all_errors[:, _] plt.figure() plt.title('Error Ratio Histogram - {}'.format(LABEL_NAMES[_])) plt.hist(errors, bins=100, range=(0, 1)) plt.savefig(os.path.join(FLAGS.test_dir, 'hist_{}.png'.format(_))) plt.close() # labels labels_ref = np.concatenate(labels_ref, axis=0) labels_pred = np.concatenate(labels_pred, axis=0) with open(os.path.join(FLAGS.test_dir, 'labels.log'), mode='w') as file: file.write('Labels (Ground Truth)\nLabels (Predicted)\n\n') for _ in range(epoch_size): file.write('{}\n{}\n\n'.format(labels_ref[_], labels_pred[_])) # save stats if FLAGS.progress: stats = np.stack(stats) np.save(os.path.join(FLAGS.test_dir, 'stats.npy'), stats) fig, ax = plt.subplots() ax.set_title('Test Error with Training Progress') ax.set_xlabel('training steps') ax.set_ylabel('mean absolute difference') ax.set_xscale('linear') ax.set_yscale('log') ax.plot(stats[:, 0], stats[:, 2]) ax.axis(ymin=0) plt.tight_layout() plt.savefig(os.path.join(FLAGS.test_dir, 'stats.png')) plt.close() print('')
def train(): with tf.Graph().as_default(), tf.device('/cpu:0'): global_step = tf.Variable(initial_value=0, name="global_step", trainable=False, collections=[ tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.GLOBAL_VARIABLES ], dtype=tf.float32) lr = FLAGS.learning_rate opt = tf.train.AdamOptimizer(learning_rate=lr, beta1=0.9, beta2=0.999, epsilon=1e-8) with tf.name_scope('train_images'): images, labels, boxes, num_objects = input.distorted_inputs( FLAGS.batch_size) batch_queue = tf.contrib.slim.prefetch_queue.prefetch_queue( [images, labels, boxes, num_objects], capacity=2 * FLAGS.num_gpus) tower_grads = [] tower_losses = [] with tf.variable_scope(tf.get_variable_scope()): for i in xrange(FLAGS.num_gpus): with tf.device('/gpu:%d' % i): with tf.name_scope('%s_%d' % ('tower', i)) as scope: image_batch, label_batch, box_batch, num_objects_batch = batch_queue.dequeue( ) loss_hm, loss_wh, loss_off, tmp_wh, tmp_pos = centernet.loss( image_batch, label_batch, box_batch, num_objects_batch) loss = loss_hm + loss_wh * 0.1 + loss_off regularization_loss = tf.add_n( tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES)) loss = loss + regularization_loss tf.get_variable_scope().reuse_variables() summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope) grads = opt.compute_gradients(loss) tower_grads.append(grads) tower_losses.append(loss) grads = average_gradients(tower_grads) #validation val_images, val_labels, val_boxes, val_num_objects = input.inputs(1) with tf.device('/gpu:0'): with tf.name_scope('eval_images'): hm_pred, wh_pred, offset_pred = centernet.inference(val_images) classes_pred, scores_pred, boxes_pred, heatmap_pred = validation.decode_( hm_pred, wh_pred, offset_pred) summaries.extend( tf.get_collection(tf.GraphKeys.SUMMARIES, 'train_images')) summaries.extend( tf.get_collection(tf.GraphKeys.SUMMARIES, 'eval_images')) # Add a summary to track the learning rate. summaries.append(tf.summary.scalar('learning_rate', lr)) # # for grad, var in grads: # if grad is not None: # summaries.append(tf.summary.histogram(var.op.name + '/gradients', grad)) # with tf.control_dependencies(update_ops): train_op = opt.apply_gradients(grads, global_step=global_step) total_parameters = 0 trainable_list = [] for var in tf.trainable_variables(): print(var.name) trainable_list.append(var.name) summaries.append(tf.summary.histogram(var.op.name, var)) # shape is an array of tf.Dimension shape = var.get_shape() variable_parameters = 1 for dim in shape: variable_parameters *= dim.value total_parameters += variable_parameters print('total_parameters : ', total_parameters) saver = tf.train.Saver(max_to_keep=20) summary_op = tf.summary.merge(summaries) pretrained_ckpt_path = FLAGS.pretrained_dir if tf.train.latest_checkpoint(FLAGS.ckpt_dir): print('use latest trained check point') init_fn = None else: if FLAGS.pretrained_dir == "": print('use no ckpt') init_fn = None else: print('use pretrained check point') variables_to_restore = slim.get_variables_to_restore( include=trainable_list, exclude=['global_step']) # for k in variables_to_restore: # print(k.name) init_fn = slim.assign_from_checkpoint_fn( FLAGS.pretrained_dir, variables_to_restore, ignore_missing_vars=True) sv = tf.train.Supervisor(logdir=FLAGS.ckpt_dir, summary_op=None, saver=saver, save_model_secs=0, init_fn=init_fn) config_ = tf.ConfigProto(allow_soft_placement=True) config_.gpu_options.allow_growth = True # config_.gpu_options.per_process_gpu_memory_fraction = 1.0 # sess=sv.managed_session(config=config_) with sv.managed_session(config=config_) as sess: # Start the queue runners. sv.start_queue_runners(sess=sess) for step in xrange(FLAGS.max_steps): start_time = time.time() sess.run(train_op) sv_global_step, loss_value, loss_hm_, loss_wh_, loss_off_ = sess.run( [sv.global_step, loss, loss_hm, loss_wh, loss_off]) duration = time.time() - start_time assert not np.isnan( loss_value), 'Model diverged with loss = NaN' if sv_global_step % 100 == 0: num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus examples_per_sec = num_examples_per_step / duration sec_per_batch = duration / FLAGS.num_gpus epochs = sv_global_step * FLAGS.batch_size / FLAGS.num_train format_str = ( 'epochs %.2f, step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (epochs, step, loss_value, examples_per_sec, sec_per_batch)) print('loss_hm : ', loss_hm_, 'loss_wh : ', loss_wh_, 'loss_off : ', loss_off_) if sv_global_step % 10 == 0: summary_str = sess.run(summary_op) sv.summary_computed(sess, summary_str) if sv_global_step % (int(FLAGS.num_train / FLAGS.batch_size) * 1) == 0 and sv_global_step != 0: print('start validation') entire_TF = [] entire_score = [] entire_numGT = [] for val_step in range(FLAGS.num_validation): if val_step % 500 == 0: print(val_step, ' / ', FLAGS.num_validation) val_img, \ val_GT_boxes, \ val_GT_cls, \ val_loc_pred, \ val_cls_pred, \ val_score_pred, \ num_objects = sess.run([val_images, val_boxes, val_labels, boxes_pred, classes_pred, scores_pred, val_num_objects]) TF_array, TF_score, num_GT = validation.one_image_validation( val_GT_boxes, val_GT_cls, val_loc_pred, val_cls_pred, val_score_pred, num_objects) if len(entire_TF) == 0: entire_TF = TF_array entire_score = TF_score entire_numGT = num_GT else: for k_cls in range(FLAGS.num_classes - 1): entire_TF[k_cls] = np.concatenate( [entire_TF[k_cls], TF_array[k_cls]], axis=0) entire_score[k_cls] = np.concatenate( [entire_score[k_cls], TF_score[k_cls]], axis=0) entire_numGT[k_cls] += num_GT[k_cls] entire_AP_sum = validation.compute_AP( entire_score, entire_TF, entire_numGT) mAP = np.sum(np.array(entire_AP_sum)) / np.sum( np.array(entire_AP_sum) != 0) print('class AP : ', entire_AP_sum) print(len(entire_AP_sum)) print('mAP : ', mAP) checkpoint_path = os.path.join(FLAGS.ckpt_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=sv.global_step)
def result_accuracy(result_predictions, results): threshold = 0.001 num_correct_preds = np.array([0] * 3) num_preds = np.array([0] * 3) for i, pred_logits in enumerate(result_predictions): pred = np.argmax(pred_logits) - 1 num_preds[pred + 1] += 1 if results[i] == pred: num_correct_preds[pred + 1] += 1 return num_correct_preds, num_preds with tf.device(input.device): validationfilenames = tf.placeholder(tf.string, shape=[None]) validationset = input.inputs(validation_filenames, shuffle=False) iterator = tf.contrib.data.Iterator.from_structure( validationset.output_types, validationset.output_shapes) validation_init_op = iterator.make_initializer(validationset) examples, labels, results = iterator.get_next() features = model.feature_extractor(examples) logits = model.policy_model(examples, features) result_prediction = model.result_model(examples, features) prediction = tf.nn.softmax(logits) labels = tf.one_hot(labels, model.NUM_LABELS, dtype=tf.float32)
import cryptography from cryptography.fernet import Fernet from input import inputs result={} result=inputs() print('\n') key = Fernet.generate_key() print('Key: ') print(key) print('Store Above Key For Future Use') f = Fernet(key) encrypted = f.encrypt(result['message']) decrypted = f.decrypt(encrypted) decryptToggle=result['decryptToggle'] if decryptToggle == 'Y': print('\n') print('Encrypted Message: ') print(encrypted) print('\n') print('Decrypted Message: ') print(decrypted) elif decryptToggle == 'N': print('\n') print('Encrypted Message: ') print(encrypted) else: print('Invalid Y/N Input')
def test(): # label names if FLAGS.num_labels == 4: LABEL_NAMES = ['creatine', 'gaba', 'glutamate', 'glutamine'] elif FLAGS.num_labels == 12: LABEL_NAMES = [ 'choline-truncated', 'creatine', 'gaba', 'glutamate', 'glutamine', 'glycine', 'lactate', 'myo-inositol', 'NAAG-truncated', 'n-acetylaspartate', 'phosphocreatine', 'taurine' ] elif FLAGS.num_labels == 16: LABEL_NAMES = [ 'acetate', 'aspartate', 'choline-truncated', 'creatine', 'gaba', 'glutamate', 'glutamine', 'histamine', 'histidine', 'lactate', 'myo-inositol', 'n-acetylaspartate', 'scyllo-inositol', 'succinate', 'taurine', 'valine' ] else: LABEL_NAMES = list(range(FLAGS.num_labels)) # get dataset files labels_file = os.path.join(FLAGS.dataset, 'labels/labels.npy') files = helper.listdir_files(FLAGS.dataset, recursive=False, filter_ext=['.npy'], encoding=True) steps_per_epoch = len(files) // FLAGS.batch_size epoch_size = steps_per_epoch * FLAGS.batch_size max_steps = steps_per_epoch files = files[:epoch_size] with tf.Graph().as_default(): # setup global tensorflow state sess, summary_writer = setup() # pre-processing for input with tf.device('/cpu:0'): spectrum, labels_gt = inputs(FLAGS, files, labels_file, epoch_size, is_testing=True) # model inference and losses labels_pd = model.inference(spectrum, is_training=False) ret_loss = list(get_losses(labels_gt, labels_pd)) ret_labels = [labels_gt, labels_pd] # restore variables from checkpoint saver = tf.train.Saver() saver.restore(sess, tf.train.latest_checkpoint(FLAGS.train_dir)) # run session ret = ret_loss + ret_labels ret_loss = ret[:len(ret_loss) - 1] sum_loss = [0 for _ in range(len(ret_loss))] all_errors = [] labels_gt = [] labels_pd = [] for i in range(max_steps): cur_ret = sess.run(ret) cur_loss = cur_ret[0:len(ret_loss)] cur_errors = cur_ret[len(ret_loss)] labels_gt.append(cur_ret[len(ret_loss) + 1]) labels_pd.append(cur_ret[len(ret_loss) + 2]) all_errors.append(cur_errors) # monitor losses for _ in range(len(ret_loss)): sum_loss[_] += cur_loss[_] #print('batch {}, MSE {}, MAD {}, MSE valid {}, MAD valid {}, False Positives {}, False Negatives {}'.format(i, *cur_loss)) sess.close() # summary mean_loss = [l / max_steps for l in sum_loss] mean_loss[2] /= FLAGS.batch_size mean_loss[3] /= FLAGS.batch_size mean_loss[4] /= FLAGS.batch_size * FLAGS.num_labels mean_loss[5] /= FLAGS.batch_size * FLAGS.num_labels print('{} Metabolites'.format(FLAGS.num_labels)) print('MSE threshold {}'.format(FLAGS.mse_thresh)) print('MAD threshold {}'.format(FLAGS.mad_thresh)) print( 'Totally {} Samples, MSE {}, MAD {}, MSE accuracy {}, MAD accuracy {}, FP rate {}, FN rate {}' .format(epoch_size, *mean_loss)) # errors import matplotlib.pyplot as plt all_errors = np.concatenate(all_errors, axis=0) for _ in range(FLAGS.num_labels): errors = all_errors[:, _] plt.figure() plt.title('Error Ratio Histogram - {}'.format(LABEL_NAMES[_])) plt.hist(errors, bins=100, range=(0, 1)) plt.savefig( os.path.join(FLAGS.test_dir, 'hist{}_{}.png'.format(FLAGS.var_index, _))) plt.close() # labels labels_gt = np.concatenate(labels_gt, axis=0) labels_pd = np.concatenate(labels_pd, axis=0) with open(os.path.join(FLAGS.test_dir, 'labels{}.log'.format(FLAGS.var_index)), mode='w') as file: file.write('Labels (Ground Truth)\nLabels (Predicted)\n\n') for _ in range(epoch_size): file.write('{}\n{}\n\n'.format(labels_gt[_], labels_pd[_])) # draw plots plt.figure() plt.title('Predicted Responses to {}'.format(LABEL_NAMES[FLAGS.var_index])) x = labels_gt[:, FLAGS.var_index] for l in range(FLAGS.num_labels): y = labels_pd[:, l] plt.plot(x, y, label=LABEL_NAMES[l]) plt.legend(loc=2) plt.savefig( os.path.join(FLAGS.test_dir, 'val{}.png'.format(FLAGS.var_index))) plt.close() print('')
'harrison_dir', '/home/ardiya/HARRISON', 'Directory containing Benchmark Dataset(img_placeholder, data_list, and tag_list.' ) flags.DEFINE_string('train_dir', '/home/ardiya/HashtagPrediction', 'Directory with the training data.') flags.DEFINE_string('train_file', 'harrison_train.tfrecords', 'Filename of the training data') flags.DEFINE_string('test_file', 'harrison_test.tfrecords', 'Filename of the test data') if __name__ == '__main__': with tf.Graph().as_default(): tf.logging.set_verbosity(tf.logging.INFO) batch_x, batch_labels, batch_y = input.inputs(filename=os.path.join( FLAGS.train_dir, FLAGS.test_file), num_epochs=1) logits = model.inference(batch_x, is_training=False) batch_y = tf.cast(batch_y, tf.int64) names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({ 'accuracy': slim.metrics.streaming_accuracy(logits, batch_y), 'eval/precision/1': slim.metrics.streaming_sparse_precision_at_k( logits, batch_labels, 1), 'eval/precision/2': slim.metrics.streaming_sparse_precision_at_k( logits, batch_labels, 2), 'eval/precision/3':
def test(): # get dataset files = helper.listdir_files(FLAGS.dataset, filter_ext=['.jpeg', '.jpg', '.png']) steps_per_epoch = len(files) // FLAGS.batch_size epoch_size = steps_per_epoch * FLAGS.batch_size max_steps = steps_per_epoch files = files[:epoch_size] # test set test_lr_batches = [] test_hr_batches = [] with tf.Graph().as_default(): # dataset with tf.device('/cpu:0'): test_lr, test_hr = inputs(FLAGS, files, is_testing=True) with session() as sess: for _ in range(max_steps): _lr, _hr = sess.run((test_lr, test_hr)) test_lr_batches.append(_lr) test_hr_batches.append(_hr) with tf.Graph().as_default(): images_lr = tf.placeholder(tf.float32, name='InputLR') images_hr = tf.placeholder(tf.float32, name='InputHR') # build model model = SRmodel(FLAGS, data_format=FLAGS.data_format, input_range=FLAGS.input_range, output_range=FLAGS.output_range, multiGPU=FLAGS.multiGPU, use_fp16=FLAGS.use_fp16, scaling=FLAGS.scaling, image_channels=FLAGS.image_channels) model.build_model(images_lr) # a Saver object to restore the variables with mappings saver = tf.train.Saver(var_list=model.g_rvars) # get output images_sr = tf.get_default_graph().get_tensor_by_name('Output:0') # losses ret_loss = list(get_losses(images_hr, images_sr)) # post-processing for output with tf.device('/cpu:0'): # data format conversion if FLAGS.data_format == 'NCHW': images_lr = utils.image.NCHW2NHWC(images_lr) images_hr = utils.image.NCHW2NHWC(images_hr) images_sr = utils.image.NCHW2NHWC(images_sr) # Bicubic upscaling shape = tf.shape(images_lr) upsize = [shape[-3] * FLAGS.scaling, shape[-2] * FLAGS.scaling] images_bicubic = tf.image.resize_images( images_lr, upsize, tf.image.ResizeMethod.BICUBIC, align_corners=True) # PNGs output ret_pngs = [] ret_pngs.extend(helper.BatchPNG(images_lr, FLAGS.batch_size)) ret_pngs.extend(helper.BatchPNG(images_hr, FLAGS.batch_size)) ret_pngs.extend(helper.BatchPNG(images_sr, FLAGS.batch_size)) ret_pngs.extend(helper.BatchPNG(images_bicubic, FLAGS.batch_size)) # initialize session sess = session() # test latest checkpoint model_file = tf.train.latest_checkpoint(FLAGS.train_dir) saver.restore(sess, model_file) ret = ret_loss + ret_pngs sum_loss = [0 for _ in range(len(ret_loss))] for step in range(max_steps): feed_dict = { 'InputLR:0': test_lr_batches[step], 'InputHR:0': test_hr_batches[step] } cur_ret = sess.run(ret, feed_dict=feed_dict) cur_loss = cur_ret[0:len(ret_loss)] cur_pngs = cur_ret[len(ret_loss):] # monitor losses for _ in range(len(ret_loss)): sum_loss[_] += cur_loss[_] #print('batch {}, MSE (RGB) {}, MAD (RGB) {},' # 'SS-SSIM(Y) {}, MS-SSIM (Y) {}, loss {}' # .format(step, *cur_loss)) # images output _start = step * FLAGS.batch_size _stop = _start + FLAGS.batch_size _range = range(_start, _stop) ofiles = [] ofiles.extend([ os.path.join(FLAGS.test_dir, '{:0>5}.0.LR.png'.format(_)) for _ in _range ]) ofiles.extend([ os.path.join(FLAGS.test_dir, '{:0>5}.1.HR.png'.format(_)) for _ in _range ]) ofiles.extend([ os.path.join(FLAGS.test_dir, '{:0>5}.2.SR{}.png'.format(_, FLAGS.postfix)) for _ in _range ]) ofiles.extend([ os.path.join(FLAGS.test_dir, '{:0>5}.3.Bicubic.png'.format(_)) for _ in _range ]) helper.WriteFiles(cur_pngs, ofiles) # summary print('No.{}'.format(FLAGS.postfix)) mean_loss = [l / max_steps for l in sum_loss] psnr = 10 * np.log10(1 / mean_loss[0]) if mean_loss[0] > 0 else 100 print('PSNR (RGB) {}, MAD (RGB) {}, ' 'SS-SSIM(Y) {}, MS-SSIM (Y) {}, loss {}'.format( psnr, *mean_loss[1:])) # test progressively saved models if FLAGS.progress: mfiles = helper.listdir_files(FLAGS.train_dir, recursive=False, filter_ext=['.index'], encoding=None) mfiles = [f[:-6] for f in mfiles if 'model_' in f] mfiles.sort() stats = [] else: mfiles = [] for model_file in mfiles: # restore variables from saved model saver.restore(sess, model_file) # run session sum_loss = [0 for _ in range(len(ret_loss))] for step in range(max_steps): feed_dict = { 'InputLR:0': test_lr_batches[step], 'InputHR:0': test_hr_batches[step] } cur_loss = sess.run(ret_loss, feed_dict=feed_dict) # monitor losses for _ in range(len(ret_loss)): sum_loss[_] += cur_loss[_] # summary mean_loss = [l / max_steps for l in sum_loss] # save stats if FLAGS.progress: model_num = os.path.split(model_file)[1][6:] stats.append(np.array([float(model_num)] + mean_loss)) # save stats import matplotlib.pyplot as plt if FLAGS.progress: stats = np.stack(stats) np.save(os.path.join(FLAGS.test_dir, 'stats.npy'), stats) fig, ax = plt.subplots() ax.set_title('Test Error with Training Progress') ax.set_xlabel('training steps') ax.set_ylabel('MAD (RGB)') ax.set_xscale('linear') ax.set_yscale('log') stats = stats[1:] ax.plot(stats[:, 0], stats[:, 2]) #ax.axis(ymin=0) plt.tight_layout() plt.savefig(os.path.join(FLAGS.test_dir, 'stats.png')) plt.close() print('')
flags.DEFINE_string('harrison_dir', '../HARRISON', 'Directory containing Benchmark Dataset(img_placeholder, data_list, and tag_list.') flags.DEFINE_string('train_dir', '../HashtagPrediction', 'Directory with the training data.') flags.DEFINE_string('train_file', 'harrison_train.tfrecords', 'Filename of the training data') flags.DEFINE_string('test_file', 'harrison_test.tfrecords', 'Filename of the test data') if __name__ == '__main__': tf.reset_default_graph() with tf.Graph().as_default(): tf.logging.set_verbosity(tf.logging.INFO) batch_x, _, batch_y = input.inputs( filename=os.path.join(FLAGS.train_dir, FLAGS.train_file), num_epochs=FLAGS.num_epochs) logits = model.inference(batch_x) losses = model.loss(logits, batch_y) train_op = model.training(losses) init_fn = model.get_init_fn() with tf.Session() as sess: with slim.queues.QueueRunners(sess): init_op = tf.group(tf.initialize_all_variables(), tf.initialize_local_variables()) sess.run(init_op) number_of_steps = (57381*FLAGS.num_epochs)//FLAGS.batch_size print("Start training with total: %d steps from %d epoch and %d batch"%(number_of_steps, FLAGS.num_epochs, FLAGS.batch_size))
def test(): files = helper.listdir_files(FLAGS.dataset, filter_ext=['.jpeg', '.jpg', '.png']) steps_per_epoch = len(files) // FLAGS.batch_size epoch_size = steps_per_epoch * FLAGS.batch_size max_steps = steps_per_epoch files = files[:epoch_size] with tf.Graph().as_default(): # pre-processing for input with tf.device('/cpu:0'): images_src = inputs(FLAGS, files, is_testing=True) # build model model = ICmodel(FLAGS, data_format=FLAGS.data_format, input_range=FLAGS.input_range, output_range=FLAGS.output_range, multiGPU=FLAGS.multiGPU, use_fp16=FLAGS.use_fp16, image_channels=FLAGS.image_channels, input_height=FLAGS.patch_height, input_width=FLAGS.patch_width, batch_size=FLAGS.batch_size) model.build_model(images_src, None) # a saver object which will save all the variables saver = tf.train.Saver(var_list=model.g_svars) # get output images_enc = tf.get_default_graph().get_tensor_by_name('Encoded:0') images_dec = tf.get_default_graph().get_tensor_by_name('Decoded:0') # losses ret_loss = list(get_losses(images_src, images_dec, images_enc)) # post-processing for output with tf.device('/cpu:0'): # data format conversion if FLAGS.data_format == 'NCHW': images_src = utils.image.NCHW2NHWC(images_src) images_dec = utils.image.NCHW2NHWC(images_dec) images_enc = utils.image.NCHW2NHWC(images_enc) # PNGs output ret_pngs = [] ret_pngs.extend(helper.BatchPNG(images_src, FLAGS.batch_size)) ret_pngs.extend(helper.BatchPNG(images_dec, FLAGS.batch_size)) ret_pngs.extend(helper.BatchPNG(images_enc, FLAGS.batch_size)) # test latest checkpoint with setup() as sess: # restore variables from latest checkpoint model_file = tf.train.latest_checkpoint(FLAGS.train_dir) saver.restore(sess, model_file) # run session ret = ret_loss + ret_pngs sum_loss = [0 for _ in range(len(ret_loss))] for step in range(max_steps): cur_ret = sess.run(ret) cur_loss = cur_ret[0:len(ret_loss)] cur_pngs = cur_ret[len(ret_loss):] # monitor losses for _ in range(len(ret_loss)): sum_loss[_] += cur_loss[_] # images output _start = step * FLAGS.batch_size _stop = _start + FLAGS.batch_size _range = range(_start, _stop) ofiles = [] ofiles.extend([os.path.join(FLAGS.test_dir, '{:0>5}.0.src.png'.format(_)) for _ in _range]) ofiles.extend([os.path.join(FLAGS.test_dir, '{:0>5}.1.dec{}.png'.format(_, FLAGS.postfix)) for _ in _range]) ofiles.extend([os.path.join(FLAGS.test_dir, '{:0>5}.2.enc{}.png'.format(_, FLAGS.postfix)) for _ in _range]) helper.WriteFiles(cur_pngs, ofiles) # summary print('No.{}'.format(FLAGS.postfix)) mean_loss = [l / max_steps for l in sum_loss] psnr = 10 * np.log10(1 / mean_loss[0]) if mean_loss[0] > 0 else 100 print('PSNR (RGB) {}, MAD (RGB) {}, SS-SSIM(Y) {}, MS-SSIM (Y) {}, Entropy {}'.format( psnr, *mean_loss[1:])) # test progressively saved models if FLAGS.progress: mfiles = helper.listdir_files(FLAGS.train_dir, recursive=False, filter_ext=['.index'], encoding=None) mfiles = [f[:-6] for f in mfiles if 'model_' in f] mfiles.sort() stats = [] else: mfiles = [] for model_file in mfiles: with setup() as sess: # restore variables from saved model saver.restore(sess, model_file) # run session sum_loss = [0 for _ in range(len(ret_loss))] for step in range(max_steps): cur_loss = sess.run(ret_loss) # monitor losses for _ in range(len(ret_loss)): sum_loss[_] += cur_loss[_] # summary mean_loss = [l / max_steps for l in sum_loss] # save stats if FLAGS.progress: model_num = os.path.split(model_file)[1][6:] stats.append(np.array([float(model_num)] + mean_loss)) # save stats import matplotlib.pyplot as plt if FLAGS.progress: stats = np.stack(stats) np.save(os.path.join(FLAGS.test_dir, 'stats.npy'), stats) fig, ax = plt.subplots() ax.set_title('Test Error with Training Progress') ax.set_xlabel('training steps') ax.set_ylabel('MAD (RGB)') ax.set_xscale('linear') ax.set_yscale('log') stats = stats[1:] ax.plot(stats[:, 0], stats[:, 2]) ax.axis(ymin=0) plt.tight_layout() plt.savefig(os.path.join(FLAGS.test_dir, 'stats.png')) plt.close() print('')
def train(): with tf.Graph().as_default(), tf.device('/cpu:0'): global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) lr = FLAGS.learning_rate opt = tf.train.RMSPropOptimizer(lr, decay=0.9, momentum=0.9, epsilon=1) # Get images and labels # for train with tf.name_scope('train_images'): images, labels, boxes, num_objects = input.distorted_inputs( FLAGS.batch_size) batch_queue = tf.contrib.slim.prefetch_queue.prefetch_queue( [images, labels, boxes, num_objects], capacity=2 * FLAGS.num_gpus) tower_grads = [] tower_losses = [] with tf.variable_scope(tf.get_variable_scope()): for i in xrange(FLAGS.num_gpus): with tf.device('/gpu:%d' % i): with tf.name_scope('%s_%d' % ('tower', i)) as scope: image_batch, label_batch, box_batch, num_objects_batch = batch_queue.dequeue( ) cls_loss, loc_loss = ssd.loss(image_batch, label_batch, box_batch, num_objects_batch) loss = cls_loss + loc_loss regularization_loss = tf.add_n( tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES)) loss = loss + regularization_loss tf.get_variable_scope().reuse_variables() summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope) grads = opt.compute_gradients(loss) tower_grads.append(grads) tower_losses.append(loss) grads = average_gradients(tower_grads) #validation val_images, val_labels, val_boxes, val_num_objects = input.inputs(1) with tf.device('/gpu:0'): with tf.name_scope('eval_images'): cls_pred, loc_pred = ssd.inference(val_images) summaries.extend( tf.get_collection(tf.GraphKeys.SUMMARIES, 'train_images')) summaries.extend( tf.get_collection(tf.GraphKeys.SUMMARIES, 'eval_images')) # Add a summary to track the learning rate. summaries.append(tf.summary.scalar('learning_rate', lr)) for grad, var in grads: if grad is not None: summaries.append( tf.summary.histogram(var.op.name + '/gradients', grad)) with tf.control_dependencies(update_ops): train_op = opt.apply_gradients(grads, global_step=global_step) for var in tf.trainable_variables(): print(var.name) summaries.append(tf.summary.histogram(var.op.name, var)) saver = tf.train.Saver(max_to_keep=20) summary_op = tf.summary.merge(summaries) pretrained_ckpt_path = FLAGS.pretrained_ckpt_path if not tf.train.latest_checkpoint(FLAGS.ckpt_save_path): print('pretrained ckpt') exclude_layers = ['global_step'] restore_variables = slim.get_variables_to_restore( exclude=exclude_layers) init_fn = slim.assign_from_checkpoint_fn(pretrained_ckpt_path, restore_variables, ignore_missing_vars=True) else: print('training ckpt') init_fn = None sv = tf.train.Supervisor(logdir=FLAGS.ckpt_save_path, summary_op=None, saver=saver, save_model_secs=0, init_fn=init_fn) config_ = tf.ConfigProto(allow_soft_placement=True) config_.gpu_options.per_process_gpu_memory_fraction = 0.4 # sess=sv.managed_session(config=config_) with sv.managed_session(config=config_) as sess: # Start the queue runners. sv.start_queue_runners(sess=sess) for step in xrange(FLAGS.max_steps): start_time = time.time() sess.run(train_op) loss_value, cls_loss_value, loc_loss_value = sess.run( [loss, cls_loss, loc_loss]) duration = time.time() - start_time assert not np.isnan( loss_value), 'Model diverged with loss = NaN' if step % 100 == 0: num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus examples_per_sec = num_examples_per_step / duration sec_per_batch = duration / FLAGS.num_gpus format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) print(cls_loss_value, loc_loss_value) if step % 100 == 0: summary_str = sess.run(summary_op) if step % (int(FLAGS.num_train / FLAGS.batch_size) * 4) == 0 and step != 0: print('start validation') entire_TF = [] entire_score = [] entire_numGT = [] for val_step in range(FLAGS.num_validation): if val_step % 500 == 0: print(val_step, ' / ', FLAGS.num_validation) val_GT_boxes, val_GT_cls, val_loc_pred, val_cls_pred, num_objects = sess.run( [ val_boxes, val_labels, loc_pred, cls_pred, val_num_objects ]) TF_array, TF_score, num_GT = validation.one_image_validation( val_GT_boxes, val_GT_cls, val_loc_pred, val_cls_pred, num_objects) if len(entire_TF) == 0: entire_TF = TF_array entire_score = TF_score entire_numGT = num_GT else: for k_cls in range(FLAGS.num_classes - 1): entire_TF[k_cls] = np.concatenate( [entire_TF[k_cls], TF_array[k_cls]], axis=0) entire_score[k_cls] = np.concatenate( [entire_score[k_cls], TF_score[k_cls]], axis=0) entire_numGT[k_cls] += num_GT[k_cls] entire_AP_sum = validation.compute_AP( entire_score, entire_TF, entire_numGT) mAP = np.sum(np.array(entire_AP_sum)) / np.sum( np.array(entire_AP_sum) != 0) print('class AP : ', entire_AP_sum) print('mAP : ', mAP) checkpoint_path = os.path.join(FLAGS.ckpt_save_path, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
def train(file_names): with tf.Graph().as_default(): images, labels = input.inputs(file_names, FLAGS.batch_size) # Get images and labels for CIFAR-10. #images, labels = cifar10.distorted_inputs() global_step = tf.Variable(0, trainable=False) logits = model.inference(images) #logits = model.cifar10_inference(images) #logits = cifar10.inference(images) # Calculate loss. loss = model.loss(logits, labels) train_op = model.train(loss, global_step) # Create a saver. saver = tf.train.Saver(tf.all_variables()) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, graph_def=sess.graph_def) for step in xrange(FLAGS.max_steps): start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time #assert not numpy.isnan(loss), 'Model diverged with loss = NaN' if step % 10 == 0: num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ( '%s: step %d, loss = %.4f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) if step % 100 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step % 500 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
def train(): import random files = helper.listdir_files(FLAGS.dataset, filter_ext=['.jpeg', '.jpg', '.png'], encoding=True) random.shuffle(files) steps_per_epoch = len(files) // FLAGS.batch_size epoch_size = steps_per_epoch * FLAGS.batch_size max_steps = steps_per_epoch * FLAGS.num_epochs files = files[:epoch_size] steps_per_epoch //= FLAGS.critic_iters + 1 max_steps //= FLAGS.critic_iters + 1 print('epoch size: {}\n{} steps per epoch\n{} epochs\n{} steps'.format( epoch_size, steps_per_epoch, FLAGS.num_epochs, max_steps)) with tf.Graph().as_default(): # pre-processing for input with tf.device('/cpu:0'): images_lr, images_hr = inputs(FLAGS, files, is_training=True) # build model model = SRmodel(FLAGS, data_format=FLAGS.data_format, input_range=FLAGS.input_range, output_range=FLAGS.output_range, multiGPU=FLAGS.multiGPU, use_fp16=FLAGS.use_fp16, scaling=FLAGS.scaling, image_channels=FLAGS.image_channels, input_height=FLAGS.patch_height // FLAGS.scaling, input_width=FLAGS.patch_width // FLAGS.scaling) gd_loss = model.build_train(images_lr, images_hr) # training step and op global_step = tf.train.get_or_create_global_step() g_train_op, d_train_op = model.train(global_step) # a saver object which will save all the variables saver = tf.train.Saver(var_list=model.g_svars, max_to_keep=1 << 16, save_relative_paths=True) # save the graph saver.export_meta_graph(os.path.join(FLAGS.train_dir, 'model.meta'), as_text=False, clear_devices=True, clear_extraneous_savers=True) # monitored session gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto( gpu_options=gpu_options, log_device_placement=FLAGS.log_device_placement) with tf.train.MonitoredTrainingSession( checkpoint_dir=FLAGS.train_dir, hooks=[ tf.train.StopAtStepHook(last_step=max_steps), tf.train.NanTensorHook(gd_loss[0]), tf.train.NanTensorHook(gd_loss[1]), LoggerHook(gd_loss, steps_per_epoch) ], config=config, log_step_count_steps=FLAGS.log_frequency) as mon_sess: # options sess = helper.get_session(mon_sess) if FLAGS.timeline_steps > 0: run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() # restore pre-trained model if FLAGS.pretrain_dir and not FLAGS.restore: saver.restore(sess, os.path.join(FLAGS.pretrain_dir, 'model')) # sessions def run_sess(options=None, run_metadata=None): for k in range(FLAGS.critic_iters): sess.run(d_train_op) mon_sess.run(g_train_op, options=options, run_metadata=run_metadata) # run sessions while not mon_sess.should_stop(): step = tf.train.global_step(sess, global_step) if FLAGS.timeline_steps > 0 and step // FLAGS.timeline_steps < 10 and step % FLAGS.timeline_steps == 0: run_sess(run_options, run_metadata) # Create the Timeline object, and write it to a json tl = timeline.Timeline(run_metadata.step_stats) ctf = tl.generate_chrome_trace_format() with open( os.path.join(FLAGS.train_dir, 'timeline_{:0>7}.json'.format(step)), 'a') as f: f.write(ctf) else: run_sess() if FLAGS.save_steps > 0 and step % FLAGS.save_steps == 0: saver.save(sess, os.path.join(FLAGS.train_dir, 'model_{:0>7}'.format(step)), write_meta_graph=False, write_state=False)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope) grads = opt.compute_gradients(loss) tower_grads.append(grads) tower_losses.append(loss) total_loss = tf.reduce_mean(tower_losses) summaries.append(tf.summary.scalar('total_loss', total_loss)) grads = average_gradients(tower_grads) #validation with tf.name_scope('eval_images'): val_images, val_labels = input.inputs(1) with tf.device('/gpu:0'): pred_val, labels_val = dla_34.inference(val_images, val_labels) summaries.extend(tf.get_collection(tf.GraphKeys.SUMMARIES, 'train_images')) summaries.extend(tf.get_collection(tf.GraphKeys.SUMMARIES, 'eval_images')) # Add a summary to track the learning rate. summaries.append(tf.summary.scalar('learning_rate', lr)) for grad, var in grads: if grad is not None: summaries.append( tf.summary.histogram(var.op.name + '/gradients', grad)) with tf.control_dependencies(update_ops):
def train(): import random files = helper.listdir_files(FLAGS.dataset, filter_ext=['.jpeg', '.jpg', '.png']) random.shuffle(files) steps_per_epoch = len(files) // FLAGS.batch_size epoch_size = steps_per_epoch * FLAGS.batch_size max_steps = steps_per_epoch * FLAGS.num_epochs files = files[:epoch_size] print('epoch size: {}\n{} steps per epoch\n{} epochs\n{} steps'.format( epoch_size, steps_per_epoch, FLAGS.num_epochs, max_steps)) # validation set if FLAGS.lr_decay_steps < 0 and FLAGS.lr_decay_factor != 0: val_size = min(FLAGS.batch_size * 50, epoch_size // (10 * FLAGS.batch_size) * FLAGS.batch_size) val_batches = val_size // FLAGS.batch_size val_files = files[: : (epoch_size + val_size - 1) // val_size] val_src_batches = [] val_losses = [] with tf.Graph().as_default(): # dataset with tf.device('/cpu:0'): val_src = inputs(FLAGS, val_files, is_training=True) # session gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto(gpu_options=gpu_options) with tf.Session(config=config) as sess: for _ in range(val_batches): _src = sess.run((val_src)) val_src_batches.append(_src) # main training graph with tf.Graph().as_default(): # pre-processing for input with tf.device('/cpu:0'): images_src = inputs(FLAGS, files, is_training=True) # build model model = ICmodel(FLAGS, data_format=FLAGS.data_format, input_range=FLAGS.input_range, output_range=FLAGS.output_range, multiGPU=FLAGS.multiGPU, use_fp16=FLAGS.use_fp16, image_channels=FLAGS.image_channels, input_height=FLAGS.patch_height, input_width=FLAGS.patch_width, batch_size=FLAGS.batch_size) g_loss, d_loss = model.build_train(images_src) # lr decay operator def _get_val_window(lr, lr_last, lr_decay_op): with tf.variable_scope('validation_window') as scope: val_window = tf.Variable(30.0, trainable=False, dtype=tf.float64, name='validation_window_size') val_window_inc_base = 10.0 * np.log(1 - FLAGS.lr_decay_factor) / np.log(0.5) val_window_inc = tf.Variable(val_window_inc_base, trainable=False, dtype=tf.float64, name='validation_window_inc') tf.summary.scalar('val_window', val_window) tf.summary.scalar('val_window_inc', val_window_inc) with tf.control_dependencies([lr_decay_op]): def f1_t(): # lr > learning_rate * 0.1 return tf.assign(val_window_inc, val_window_inc * 0.9, use_locking=True) def f2_t(): # lr_last > learning_rate * 0.1 >= lr return tf.assign(val_window_inc, val_window_inc_base, use_locking=True) def f2_f(): # learning_rate * 0.1 >= lr_last return tf.assign(val_window_inc, val_window_inc * 0.95, use_locking=True) val_window_inc = tf.cond(lr > FLAGS.learning_rate * 0.1, f1_t, lambda: tf.cond(lr_last > FLAGS.learning_rate * 0.1, f2_t, f2_f)) val_window_op = tf.assign_add(val_window, val_window_inc, use_locking=True) return val_window, val_window_op if FLAGS.lr_decay_steps < 0 and FLAGS.lr_decay_factor != 0: g_lr_decay_op = model.lr_decay() val_window, val_window_op = _get_val_window(model.g_lr, model.g_lr_last, g_lr_decay_op) # training step and op global_step = tf.train.get_or_create_global_step() g_train_op = model.train(global_step) # a saver object which will save all the variables saver = tf.train.Saver(var_list=model.g_svars, max_to_keep=1 << 16, save_relative_paths=True) if FLAGS.pretrain_dir and not FLAGS.restore: saver0 = tf.train.Saver(var_list=model.g_rvars) # save the graph saver.export_meta_graph(os.path.join(FLAGS.train_dir, 'model.meta'), as_text=False, clear_devices=True, clear_extraneous_savers=True) # monitored session gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto(gpu_options=gpu_options, log_device_placement=FLAGS.log_device_placement) with tf.train.MonitoredTrainingSession( checkpoint_dir=FLAGS.train_dir, hooks=[tf.train.StopAtStepHook(last_step=max_steps), tf.train.NanTensorHook(g_loss), tf.train.NanTensorHook(d_loss), LoggerHook([g_loss, d_loss], steps_per_epoch)], config=config, log_step_count_steps=FLAGS.log_frequency) as mon_sess: # options sess = helper.get_session(mon_sess) if FLAGS.timeline_steps > 0: run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() # restore pre-trained model if FLAGS.pretrain_dir and not FLAGS.restore: saver0.restore(sess, os.path.join(FLAGS.pretrain_dir, 'model')) # get variables val_window_ = sess.run(val_window) val_window_ = int(np.round(val_window_)) lr_decay_last = val_window_ # training session call def run_sess(options=None, run_metadata=None): mon_sess.run(g_train_op, options=options, run_metadata=run_metadata) # run session while not mon_sess.should_stop(): global_step_ = tf.train.global_step(sess, global_step) # collect timeline info if FLAGS.timeline_steps > 0 and global_step_ // FLAGS.timeline_steps < 10 and global_step_ % FLAGS.timeline_steps == 0: run_sess(run_options, run_metadata) # Create the Timeline object, and write it to a json tl = timeline.Timeline(run_metadata.step_stats) ctf = tl.generate_chrome_trace_format() with open(os.path.join(FLAGS.train_dir, 'timeline_{:0>7}.json'.format(global_step_)), 'a') as f: f.write(ctf) else: run_sess() # save model periodically if FLAGS.save_steps > 0 and global_step_ % FLAGS.save_steps == 0: saver.save(sess, os.path.join(FLAGS.train_dir, 'model_{:0>7}'.format(global_step_)), write_meta_graph=False, write_state=False) # test model on validation set if FLAGS.lr_decay_steps < 0 and FLAGS.lr_decay_factor != 0 and global_step_ % FLAGS.lr_decay_steps == 0: # get validation error on current model val_batches_loss = [] for _ in range(val_batches): feed_dict = {images_src: val_src_batches[_]} val_batches_loss.append(sess.run(g_loss, feed_dict=feed_dict)) val_loss = np.mean(val_batches_loss) val_losses.append(val_loss) print('validation: step {}, val_loss = {:.8}'.format(global_step_, val_loss)) # compare recent few losses to previous few losses, decay learning rate if not decreasing if len(val_losses) >= lr_decay_last + val_window_: val_current = np.sort(val_losses[-val_window_ : ]) val_previous = np.sort(val_losses[-val_window_ * 2 : -val_window_]) def _mean(array, percent=0.1): clip = int(np.round(len(array) * percent)) return np.mean(np.sort(array)[clip : -clip if clip > 0 else None]) val_current = np.mean(val_current), np.median(val_current), np.min(val_current) val_previous = np.mean(val_previous), np.median(val_previous), np.min(val_previous) print(' statistics of {} losses (mean | median | min)'.format(val_window_)) print(' previous: {}'.format(val_previous)) print(' current: {}'.format(val_current)) if val_current[0] + val_current[1] >= val_previous[0] + val_previous[1]: lr_decay_last = len(val_losses) val_window_, lr_ = sess.run((val_window_op, g_lr_decay_op)) val_window_ = int(np.round(val_window_)) print(' learning rate decayed to {}'.format(lr_))