def run_training(data): # TODO: read in data training_data = data[0] labels = data[1] # test_data = with tf.Graph().as_default(): input_pl, labels_pl = network.placeholder_inputs(BATCH_SIZE) logits = network.feedforward(training_data, NUM_HIDDEN1, NUM_HIDDEN2) loss = network.loss(logits, labels_pl) train_op = network.training(loss,ETA) eval_correct = network.evaluation(logits, labels_pl) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Create a saver for writing training checkpoints. saver = tf.train.Saver() # Create a session for running Ops on the Graph. sess = tf.Session() # Run the Op to initialize the variables. init = tf.initialize_all_variables() sess.run(init) # Instantiate a SummaryWriter to output summaries and the Graph. summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph) for epoch in xrange(EPOCHS): start_time = time.time() training_size = len(training_data) batches = [training_data[k:k+BATCHSIZE] for k in xrange(0, training_size,BATCH_SIZE)] for batch in batches: feed_dict = fill_dict(training_data, labels, input_pl, labels_pl) _, loss_value = sess.run([train_op, loss], feed_dict = feed_dict) duration = time.time() - start_time # Write summarry after each 10 epochs if epoch % 10 == 0: print 'Epoch %d: loss = %.2f (%.3f sec)'%(epoch, loss_value, duration) summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, epoch) summary_writer.flush() print 'Evaluate with the validation set...' validate(sess, eval_correct, input_pl, labels_pl, test_data)
def main(_): # Read the expert rollouts from disk. observations, actions = load_data(ARGS.rollouts_file) print("observations shape = " + str(observations.shape)) print("actions shape = " + str(actions.shape)) observation_length = observations.shape[1] action_length = actions.shape[1] assert (observations.shape[0] == actions.shape[0]) assert (observations.shape[0] % ARGS.batch_size == 0) # Assemble the network. opl = tf.placeholder(tf.float32, shape=(None, observation_length), name="observations") apl = tf.placeholder(tf.float32, shape=(None, action_length), name="actions") logits = network.inference(opl, observation_length, ARGS.hidden1, ARGS.hidden2, action_length) errors, loss = network.loss(logits, apl) global_step, train_op = network.training(loss, ARGS.learning_rate) # Initialize the network. init = tf.global_variables_initializer() saver = tf.train.Saver(max_to_keep=10) sess = tf.Session() if os.path.exists(ARGS.checkpoint_dir): saver.restore(sess, tf.train.latest_checkpoint(ARGS.checkpoint_dir)) else: sess.run(init) # Train the network. num_batches = observations.shape[0] / ARGS.batch_size for step in range(ARGS.training_steps): i = step % num_batches if i == 0: p = np.random.permutation(observations.shape[0]) observations = observations[p] actions = actions[p] start = int(i * ARGS.batch_size) stop = int((i + 1) * ARGS.batch_size) feed_dict = {opl: observations[start:stop], apl: actions[start:stop]} _, loss_value, step_value = sess.run([train_op, loss, global_step], feed_dict=feed_dict) if step % 100 == 0: basename = os.path.basename(ARGS.checkpoint_dir) checkpoint_file = os.path.join(ARGS.checkpoint_dir, basename) saver.save(sess, checkpoint_file, global_step=step_value) loss_value = sess.run(loss, feed_dict={ opl: observations, apl: actions }) msg = "step {}; loss = {}".format(step_value, loss_value) print(msg)
def train(): """Train network for a number of steps.""" with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) # Get images and labels for network. images, labels = network.distorted_inputs() # Build a Graph that computes the logits predictions from the # inference model. logits = network.inference(images) # Calculate loss. loss = network.loss(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = network.train(loss, global_step) # Create a saver. saver = tf.train.Saver(tf.all_variables()) # Build the summary operation based on the TF collection of Summaries. #summary_op = tf.merge_all_summaries() # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) #summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, # graph_def=sess.graph_def) for step in xrange(FLAGS.max_steps): start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 10 == 0: num_examples_per_step = FLAGS.batch_input_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch))
def train(): """Train network for a number of steps.""" with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) # Get images and labels for network. images, labels = network.distorted_inputs() # Build a Graph that computes the logits predictions from the # inference model. logits = network.inference(images) # Calculate loss. loss = network.loss(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = network.train(loss, global_step) # Create a saver. saver = tf.train.Saver(tf.all_variables()) # Build the summary operation based on the TF collection of Summaries. #summary_op = tf.merge_all_summaries() # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) #summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, # graph_def=sess.graph_def) for step in xrange(FLAGS.max_steps): start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 10 == 0: num_examples_per_step = FLAGS.batch_input_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print (format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch))
def train(): with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) images, labels = network.distorted_inputs() logits = network.inference(images) loss = network.loss(logits, labels) train_op = network.train(loss, global_step) saver = tf.train.Saver(tf.all_variables()) summary_op = tf.merge_all_summaries() init = tf.initialize_all_variables() sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) sess.run(init) tf.train.start_queue_runners(sess=sess) summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph) for step in xrange(FLAGS.max_steps): start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 10 == 0: num_examples_per_step = FLAGS.batch_input_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) if step % 100 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) if step % 1000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
def train_all(epochs_count=EPOCHS_COUNT, batch_size=BATCH_SIZE): model.train() for i in range(epochs_count): training_loss = 0 for j in range(batches_count): network_output = model(get_batch_features_tensor(j).to(device)) optimizer.zero_grad() loss_value = network.loss(network_output, get_batch_labels_tensor(j)) training_loss += loss_value.item() loss_value.backward() optimizer.step() training_loss /= batches_count print("Epoch number:", i + 1) print("Training loss:", training_loss) validate() print("---------------------")
def train(): with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) # 获得图片数据和对应的标签batch float_image, label = tfrecord.train_data_read(tfrecord_path=FLAGS.train_data) images, labels = tfrecord.create_batch(float_image,label,count_num=FLAGS.train_num) logits = network.inference(images) # 误差计算 loss = network.loss(logits, labels) # 模型训练 train_op = network.train(loss, global_step) # 存储模型 saver = tf.train.Saver(tf.global_variables()) # 存储所有操作 summary_op = tf.summary.merge_all() # 初始化所有变量. init = tf.initialize_all_variables() # 开始计算流图 sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) sess.run(init) # 队列开始 tf.train.start_queue_runners(sess=sess) summary_writer = tf.summary.FileWriter(FLAGS.train_dir, graph_def=sess.graph_def) for step in xrange(FLAGS.max_steps): start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 10 == 0: num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print (format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) if step % 50 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) # 保存模型检查点. if step % 1000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
parameter_table = [["Initial parameters", parameter_path], ["Ranking loss", ranking_loss], ["SPP", spp], ["Pooling", args.pooling], ['Experiment', experiment_name], ['Embedding dim', embedding_dim], ['Batch size', batch_size_trn], ['Initial LR', initial_lr], ['Momentum', momentum_coeff], ['LR Step size', step_size], ['LR Step factor', step_factor], ['Total Steps', total_steps]] training_images = inputs(args.training_db, batch_size_trn, None, True, augment_training_data) test_images = inputs(args.validation_db, batch_size_val, None, False) net_data = np.load(parameter_path).item() var_dict= nw.get_variable_dict(net_data) with tf.variable_scope("ranker") as scope: feature_vec = nw.build_alexconvnet(training_images, var_dict, embedding_dim, spp, args.pooling) L, p = nw.loss(feature_vec, nw.build_loss_matrix(batch_size_trn), ranking_loss) scope.reuse_variables() val_feature_vec = nw.build_alexconvnet(test_images, var_dict, embedding_dim, spp, args.pooling) L_val, p_val = nw.loss(val_feature_vec, nw.build_loss_matrix(batch_size_val), ranking_loss) lr = tf.Variable(initial_lr) opt = tf.train.AdamOptimizer() grads = opt.compute_gradients(L) apply_grad_op = opt.apply_gradients(grads) init = tf.global_variables_initializer() saver = tf.train.Saver(tf.global_variables(), max_to_keep=10) config = tf.ConfigProto() config.gpu_options.allow_growth = True
def train(): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): gloabl_step = tf.train.get_or_create_global_step() # Get images and lables for CIFAR-10 # Force input pipelines to CPU:0 to avoid operations sometimes ending up on GPU and resultign in a slow down. with tf.device('/cpu:0'): images, labels = network.distorted_inputs() # Build a Graph that computes the logits predictions from the inference model. logits = network.inference(images) # print(logits.get_shape()) # print(labels.get_shape()) # os.system('pause') # Calcute loss. loss = network.loss(logits, labels) # Buid a Graph that trains the model with one batch of examples and updates the model parameters. train_op = network.train(loss, gloabl_step) class _LoggerHook(tf.train.SessionRunHook): """Logs loss and runtime""" def begin(self): self._step = -1 self._start_time = time.time() def before_run(self, run_context): self._step += 1 return tf.train.SessionRunArgs(loss) # Asks for loss value def after_run(self, run_context, run_values): if self._step % FLAGS.log_frequency == 0: current_time = time.time() duration = current_time - self._start_time self._start_time = current_time loss_value = run_values.results examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration sec_per_batch = float(duration / FLAGS.log_frequency) format_str = ( "%s: step %d, loss = %.2f (%.1f exmples/sec: %.3f sec/batch" ) print(format_str % (datetime.now(), self._step, loss_value, examples_per_sec, sec_per_batch)) with tf.train.MonitoredTrainingSession( checkpoint_dir=FLAGS.train_dir, hooks=[ tf.train.StopAtStepHook(last_step=FLAGS.max_steps), tf.train.NanTensorHook(loss), _LoggerHook() ], config=tf.ConfigProto(log_device_placement=FLAGS. log_device_placement)) as mon_sess: print( '********************************Sussessfully creating session' ) while not mon_sess.should_stop(): mon_sess.run(train_op)
def main(): print "initial model generator" with tf.Graph().as_default(): train_sets = dataset.get_datasets(main_path, EPIWidth, disp_precision, 'train') test_sets = dataset.get_datasets(main_path, EPIWidth, disp_precision, 'test') global_step = tf.Variable(0, trainable=False) images_placeholder_v = tf.placeholder(tf.float32, shape=(None, 9, EPIWidth, 1)) images_placeholder_u = tf.placeholder(tf.float32, shape=(None, 9, EPIWidth, 1)) labels_placeholder = tf.placeholder(tf.int32, shape=None) prop_placeholder = tf.placeholder('float') phase_train = tf.placeholder(tf.bool, name='phase_train') logits = network.inference_ds(images_placeholder_u, images_placeholder_v, prop_placeholder, phase_train, disp_precision) logits_softmax = network.softmax(logits) loss = network.loss(logits_softmax, labels_placeholder) train_op = network.training(loss, 1e-4, global_step) eval = network.evaluation(logits_softmax) summary = tf.summary.merge_all() saver = tf.train.Saver(tf.global_variables()) gpu_option = tf.GPUOptions(per_process_gpu_memory_fraction=0.5) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_option)) summary_writer = tf.summary.FileWriter(summary_path, sess.graph) sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(checkpoint_path) if ckpt: # saver.restore(sess,checkpoint_path+'/model.ckpt')#利用不同平台的训练结果 # saver.restore(sess, ckpt.model_checkpoint_path) # 本地训练的结果 print("restore from checkpoint!") else: print("no checkpoint found!") start_time = time.time() step = 0 while not train_sets.complete: feed_dict = fill_feed_dict(train_sets, images_placeholder_u, images_placeholder_v, labels_placeholder, prop_placeholder, phase_train, 'train') _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) duration = time.time() - start_time if step % 1000 == 0: print('Step:%d: loss = %.2f (%.3f sec)' % (step, loss_value, duration)) summary_str = sess.run(summary, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) summary_writer.flush() if step % 25000 == 24999: print('test Data Eval:') do_eval_true(sess, eval, logits_softmax, images_placeholder_u, images_placeholder_v, prop_placeholder, phase_train, test_sets) if step % 50000 == 49999: saver.save(sess, checkpoint_path + '/model.ckpt', global_step=step)
LEARNING_RATE = float(os.environ['LEARNING_RATE'] or 0.1) RESTORE = ((os.environ['RESTORE'] or '') == 'true') or False learning_rate_value = LEARNING_RATE session_config = tf.ConfigProto(log_device_placement=True) session_config.gpu_options.allow_growth = True # this is required if want to use GPU as device. # see: https://github.com/tensorflow/tensorflow/issues/2292 session_config.allow_soft_placement = True if __name__ == "__main__": with tf.Graph().as_default() as g, tf.device(USE_DEVICE): # inference() input, deep_features = network.inference() labels, logits, cross_entropy = network.loss(deep_features) centroid_loss, centroids, spread = network.center_loss( deep_features, labels) # combine the two losses _lambda = tf.placeholder(dtype=tf.float32) total_loss = cross_entropy + _lambda / 2. * centroid_loss learning_rate, train, global_step = network.training(total_loss) eval = network.evaluation(logits, labels) init = tf.initialize_all_variables() with tf.Session(config=session_config) as sess, \ h5py.File(DUMP_FILE, 'a', libver='latest', swmr=True) as h5_file: # Merge all the summaries and write them out to /tmp/mnist_logs (by default)
def run_training(): """Train network for a number of epochs.""" # Tell TensorFlow that the model will be built into the default Graph. with tf.Graph().as_default(): with tf.name_scope('input'): # Input data, pin to CPU because rest of pipeline is CPU-only with tf.device('/cpu:0'): input_data = tf.constant(training_data) input_labels = tf.constant(training_labels) input, label = tf.train.slice_input_producer( [input_data, input_labels], num_epochs=FLAGS.num_epochs) label = tf.cast(label, tf.int32) input, labels = tf.train.batch([input, label], batch_size=FLAGS.batch_size) # Build a Graph that computes predictions from the inference model. logits = network.inference(input, FLAGS.hidden1, FLAGS.hidden2) # Add to the Graph the Ops for loss calculation. loss = network.loss(logits, labels) # Add to the Graph the Ops that calculate and apply gradients. train_op = network.training(loss, FLAGS.learning_rate) # Add the Op to compare the logits to the labels during evaluation. eval_correct = network.evaluation(logits, labels) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() # Create a saver for writing training checkpoints. saver = tf.train.Saver() # Create the op for initializing variables. init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) # Create a session for running Ops on the Graph. sess = tf.Session() # Run the Op to initialize the variables. sess.run(init_op) # Instantiate a SummaryWriter to output summaries and the Graph. summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph) # Start input enqueue threads. coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # And then after everything is built, start the training loop. for ep in xrange(FLAGS.num_epochs): for step in xrange(FLAGS.max_steps): start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time # Write the summaries and print an overview fairly often. if loss_value - 0.0 <= 0.00001: print( 'Loss value: %.4f, done training for %d epochs, %d steps.' % (loss_value, ep, ep * FLAGS.max_steps + step)) return if step % 100 == 0: # Print status to stdout. print('Epochs %d: loss = %.4f (%.3f sec)' % (ep, loss_value, duration)) # Update the events file. summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) # Save a checkpoint periodically. if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps: print('Saving') saver.save(sess, FLAGS.train_dir, global_step=step)
def main(): ########################################## USER INPUT ############################################################## # Training parameters: if len(sys.argv) >= 8: IMAGE_NAME = sys.argv[1] # IMAGE_NAME = '1' NETWORK_NAME = sys.argv[2] # 'unet', 'deep_decoder' LOSS_NAME = sys.argv[ 3] # 'mse', 'l1', 'mse_l1', 'mse_with_tv_reg', 'mse_with_edge_reg' OPTIMIZER_TYPE = sys.argv[4] # 'sgd', 'adam' LEARNING_RATE = float(sys.argv[5]) NUM_ITERATIONS = int(sys.argv[6]) ITERATIONS_TO_SAVE = int(sys.argv[7]) if len(sys.argv) == 11: w_h = float(sys.argv[8]) w_v = float(sys.argv[9]) w_mse = float(sys.argv[10]) else: w_h = None w_v = None w_mse = None else: print('Not enough input parameters.') return #################################################################################################################### # Load images: RAW_FILENAME = os.path.join('Raw', '{}_Raw Image.tif'.format(IMAGE_NAME)) AVERAGED_FILENAME = os.path.join( 'Averaged', '{}_Averaged Image.tif'.format(IMAGE_NAME)) try: input_image = hf.get_training_image(RAW_FILENAME) except: print("Error loading {}".format(RAW_FILENAME)) return try: ground_truth = hf.get_training_image(AVERAGED_FILENAME) except: print("Error loading {}".format(AVERAGED_FILENAME)) return # Validate settings: VALID_NETWORK_NAMES = ["unet", "deep_decoder"] VALID_OPTIMIZER_TYPES = ["sgd", "adam"] VALID_LOSS_NAMES = [ "mse", "l1", "mse_l1", "mse_with_tv_reg", "mse_with_edge_reg" ] if not (NETWORK_NAME in VALID_NETWORK_NAMES): print("Error: {} network does not exist.".format(NETWORK_NAME)) return if not (OPTIMIZER_TYPE in VALID_OPTIMIZER_TYPES): print("Error: {} optimizer does not exist.".format(OPTIMIZER_TYPE)) return if not (LOSS_NAME in VALID_LOSS_NAMES): print("Error: {} loss does not exist.".format(LOSS_NAME)) return # Create folder to save results: SAVE_FOLDER = os.path.join('./results', IMAGE_NAME) count = 0 CHECK_FOLDER = SAVE_FOLDER while os.path.exists(CHECK_FOLDER): count += 1 CHECK_FOLDER = '{}({})'.format(SAVE_FOLDER, count) SAVE_FOLDER = CHECK_FOLDER os.mkdir(SAVE_FOLDER) WRITE_FILENAME = os.path.join(SAVE_FOLDER, 'metrics.txt') with open(WRITE_FILENAME, 'a') as wf: wf.write( 'PARAMETERS\nNetwork: {}\nLoss: {}\nOptimizer: {}\nLearning rate: {}\nNumber of iterations: {}' .format(NETWORK_NAME, LOSS_NAME, OPTIMIZER_TYPE, LEARNING_RATE, NUM_ITERATIONS)) wf.write('\n\nw_h: {}\nw_v: {}\nw_mse: {}'.format(w_h, w_v, w_mse)) wf.write('\n\nIteration\tLoss\tSNR\tCNR\tSSIM') # Get input noise: if NETWORK_NAME == "unet": input_noise = hf.get_noise_matrix(input_image.shape[1], input_image.shape[2], 32) elif NETWORK_NAME == "deep_decoder": input_noise = hf.get_noise_matrix(input_image.shape[1] / (2**4), input_image.shape[2] / (2**4), 64) # Save inputs: save_filename = os.path.join(SAVE_FOLDER, 'input_image.tif') imsave(save_filename, input_image[0, :, :, 0], cmap='gray') save_filename = os.path.join(SAVE_FOLDER, 'ground_truth.tif') imsave(save_filename, ground_truth[0, :, :, 0], cmap='gray') # Calculate initial metrics: snr_i = hf.calculate_metrics(ground_truth, input_image, 'snr', IMAGE_NAME) cnr_i = hf.calculate_metrics(ground_truth, input_image, 'cnr', IMAGE_NAME) ssim_i = hf.calculate_metrics(ground_truth, input_image, 'ssim', IMAGE_NAME) with open(WRITE_FILENAME, 'a') as wf: wf.write('\ninput_image\tN/A\t{}\t{}\t{}'.format(snr_i, cnr_i, ssim_i)) # Placeholders: z = tf.placeholder(tf.float32, shape=[1, None, None, input_noise.shape[3]]) # input noise x = tf.placeholder(tf.float32, shape=[1, None, None, 1]) # input image # Network: y = network.inference(NETWORK_NAME, z, height=input_noise.shape[1], width=input_noise.shape[2], channels=input_noise.shape[3]) if LOSS_NAME == "mse_with_edge_reg" or LOSS_NAME == "mse_with_tv_reg": loss, mse, edge_h, edge_v = network.loss(y, x, LOSS_NAME, w_h, w_v, w_mse) else: loss = network.loss(y, x, LOSS_NAME) # Update moving mean and variance for batch normalization (if required): if NETWORK_NAME == "deep_decoder": update_op = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # Create different optimizers here: if OPTIMIZER_TYPE == "sgd": train_op = tf.train.GradientDescentOptimizer( learning_rate=LEARNING_RATE).minimize(loss) elif OPTIMIZER_TYPE == "adam": train_op = tf.train.AdamOptimizer( learning_rate=LEARNING_RATE).minimize(loss) # Start session: with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # Keep track of metrics: track_iter = [] track_loss = [] track_snr = [] track_cnr = [] track_ssim = [] for i in range(NUM_ITERATIONS + 1): if NETWORK_NAME == "unet": if LOSS_NAME == "mse_with_edge_reg" or LOSS_NAME == "mse_with_tv_reg": _, output_image, loss_i, mse_i, edge_h_i, edge_v_i = sess.run( [train_op, y, loss, mse, edge_h, edge_v], feed_dict={ z: input_noise, x: input_image }) else: _, output_image, loss_i = sess.run([train_op, y, loss], feed_dict={ z: input_noise, x: input_image }) elif NETWORK_NAME == "deep_decoder": if LOSS_NAME == "mse_with_edge_reg" or LOSS_NAME == "mse_with_tv_reg": _, _, output_image, loss_i, mse_i, edge_h_i, edge_v_i = sess.run( [update_op, train_op, y, loss, mse, edge_h, edge_v], feed_dict={ z: input_noise, x: input_image }) else: _, _, output_image, loss_i = sess.run( [update_op, train_op, y, loss], feed_dict={ z: input_noise, x: input_image }) if i % ITERATIONS_TO_SAVE == 0: # Save image: save_filename = os.path.join(SAVE_FOLDER, 'iteration_{}.tif'.format(i)) imsave(save_filename, output_image[0, :, :, 0], cmap='gray') # Calculate metrics: snr_i = hf.calculate_metrics(ground_truth, output_image, 'snr', IMAGE_NAME) cnr_i = hf.calculate_metrics(ground_truth, output_image, 'cnr', IMAGE_NAME) ssim_i = hf.calculate_metrics(ground_truth, output_image, 'ssim', IMAGE_NAME) with open(WRITE_FILENAME, 'a') as wf: wf.write('\n{}\t{}\t{}\t{}\t{}'.format( i, loss_i, snr_i, cnr_i, ssim_i)) # Display: if LOSS_NAME == "mse_with_edge_reg" or LOSS_NAME == "mse_with_tv_reg": print( 'Iteration {}/{}\t| Loss: {}\tSNR: {}\tCNR: {}\tSSIM: {}\tMSE: {}\tEdge_h: {}\tEdge_v: {}' .format(i, NUM_ITERATIONS, loss_i, snr_i, cnr_i, ssim_i, mse_i, edge_h_i, edge_v_i)) else: print( 'Iteration {}/{}\t| Loss: {}\tSNR: {}\tCNR: {}\tSSIM: {}' .format(i, NUM_ITERATIONS, loss_i, snr_i, cnr_i, ssim_i)) # Track: track_iter.append(i) track_loss.append(loss_i) track_snr.append(snr_i) track_cnr.append(cnr_i) track_ssim.append(ssim_i) # Plot: hf.plot_metrics(track_iter, track_loss, 'loss', os.path.join(SAVE_FOLDER, 'loss.tif')) hf.plot_metrics(track_iter, track_snr, 'snr', os.path.join(SAVE_FOLDER, 'snr.tif')) hf.plot_metrics(track_iter, track_cnr, 'cnr', os.path.join(SAVE_FOLDER, 'cnr.tif')) hf.plot_metrics(track_iter, track_ssim, 'ssim', os.path.join(SAVE_FOLDER, 'ssim.tif')) print('Completed.')
sys.path.append(os.path.abspath(os.path.join( os.path.dirname(__file__), os.path.pardir, 'tracker'))) import network # Load tensorflow tf.Graph().as_default() batchSize = 1 delta = 1 imagePlaceholder = tf.placeholder(tf.float32, shape=(batchSize * delta * 2, 227, 227, 3)) labelsPlaceholder = tf.placeholder(tf.float32, shape=(batchSize * delta, 4)) learningRate = tf.placeholder(tf.float32) tfOutputs = network.inference(imagePlaceholder, num_unrolls=delta, train=True) tfLossFull, tfLoss = network.loss(tfOutputs, labelsPlaceholder) train_op = network.training(tfLossFull, learningRate) summary = tf.summary.merge_all() init = tf.global_variables_initializer() saver = tf.train.Saver() config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) summary_writer = tf.summary.FileWriter('logs/train/caffe_copy', sess.graph) ops = [] with sess.as_default(): sess.run(init) import caffe caffe.set_mode_cpu() # Load caffe net
def main(): args = get_parser().parse_args() observation_length = 17 action_length = 6 # Read the expert rollouts from disk. observations, actions = load_data(args.rollouts_file) print("observations shape = " + str(observations.shape)) print("actions shape = " + str(actions.shape)) # Make sure our files exist! assert (os.path.exists(os.path.dirname(os.path.abspath(args.stats_file)))) # Load the expert. print("Loading and building expert policy.") policy_fn = load_policy.load_policy(args.expert_policy_file) print("Expert policy loaded and built.") # Assemble the network. opl = tf.placeholder(tf.float32, shape=(None, observation_length), name="observations") apl = tf.placeholder(tf.float32, shape=(None, action_length), name="actions") logits = network.inference(opl, observation_length, args.hidden1, args.hidden2, action_length) errors, loss = network.loss(logits, apl) global_step, train_op = network.training(loss, args.learning_rate) with tf.Session() as sess: # Initialize the network. tf_util.initialize() saver = tf.train.Saver() saver.restore(sess, tf.train.latest_checkpoint(args.checkpoint_dir)) env = gym.make("Walker2d-v1") max_steps = env.spec.timestep_limit avg_returns = [] stddev_returns = [] observations = list(observations) actions = list(actions) for iteration in range(args.num_iterations): obs = np.array(observations) acts = np.array(actions) assert (obs.shape[0] == acts.shape[0]) # Train the network. if iteration != 0: num_batches = int(obs.shape[0] / args.batch_size) for step in range(args.training_steps): i = step % num_batches if i == 0: p = np.random.permutation(obs.shape[0]) obs = obs[p] acts = acts[p] start = int(i * args.batch_size) stop = int((i + 1) * args.batch_size) feed_dict = {opl: obs[start:stop], apl: acts[start:stop]} _, loss_value, step_value = sess.run( [train_op, loss, global_step], feed_dict=feed_dict) if step % 100 == 0: loss_value = sess.run(loss, feed_dict={ opl: obs, apl: acts }) msg = "Iteration {}; step {}; loss = {}".format( iteration, step_value, loss_value) print(msg) # Generate new rollouts. rewards = [] for i in range(args.num_rollouts): print("Iteration {}; rollout {}".format(iteration, i)) obs = env.reset() done = False steps = 0 totalr = 0 while not done: expert_action = policy_fn(obs[None, :]) observations.append(obs) actions.append(expert_action[0]) action = sess.run(logits, feed_dict={opl: obs[None, :]}) obs, r, done, _ = env.step(action) totalr += r steps += 1 if steps >= max_steps: break rewards.append(totalr) print("Iteration {}; average return {}".format( iteration, np.mean(rewards))) print("Iteration {}; stddev return {}".format( iteration, np.std(rewards))) avg_returns.append(np.mean(rewards)) stddev_returns.append(np.std(rewards)) with open(args.stats_file, "w") as f: stats = { "mean_return": avg_returns, "stddev_returns": stddev_returns } json.dump(stats, f, indent=4)
['Batch size', batch_size_trn], ['Initial LR', initial_lr], ['Momentum', momentum_coeff], ['LR Step size', step_size], ['LR Step factor', step_factor], ['Total Steps', total_steps]] training_images = inputs(args.training_db, batch_size_trn, None, True, augment_training_data) test_images = inputs(args.validation_db, batch_size_val, None, False) net_data = np.load(parameter_path).item() var_dict = nw.get_variable_dict(net_data) with tf.variable_scope("ranker") as scope: feature_vec = nw.build_alexconvnet(training_images, var_dict, embedding_dim, spp, args.pooling) L, p = nw.loss(feature_vec, nw.build_loss_matrix(batch_size_trn), ranking_loss) scope.reuse_variables() val_feature_vec = nw.build_alexconvnet(test_images, var_dict, embedding_dim, spp, args.pooling) L_val, p_val = nw.loss(val_feature_vec, nw.build_loss_matrix(batch_size_val), ranking_loss) lr = tf.Variable(initial_lr) opt = tf.train.AdamOptimizer() grads = opt.compute_gradients(L) apply_grad_op = opt.apply_gradients(grads) init = tf.global_variables_initializer()
LEARNING_RATE = float(os.environ['LEARNING_RATE'] or 0.1) RESTORE = ((os.environ['RESTORE'] or '') == 'true') or False learning_rate_value = LEARNING_RATE session_config = tf.ConfigProto(log_device_placement=True) session_config.gpu_options.allow_growth = True # this is required if want to use GPU as device. # see: https://github.com/tensorflow/tensorflow/issues/2292 session_config.allow_soft_placement = True if __name__ == "__main__": with tf.Graph().as_default() as g, tf.device(USE_DEVICE): # inference() input, deep_features = network.inference() labels, logits, cross_entropy = network.loss(deep_features) centroid_loss, centroids, spread = network.center_loss(deep_features, labels) # combine the two losses _lambda = tf.placeholder(dtype=tf.float32) total_loss = cross_entropy + _lambda / 2. * centroid_loss learning_rate, train, global_step = network.training(total_loss) eval = network.evaluation(logits, labels) init = tf.initialize_all_variables() with tf.Session(config=session_config) as sess, \ h5py.File(DUMP_FILE, 'a', libver='latest', swmr=True) as h5_file: # Merge all the summaries and write them out to /tmp/mnist_logs (by default) # to see the tensor graph, fire up the tensorboard with --logdir="./train"