# config1 = tf.ConfigProto(device_count={"CPU": cpu_num}, # inter_op_parallelism_threads=cpu_num, # intra_op_parallelism_threads=cpu_num, log_device_placement=True) # with tf.Session(config=config1) as sess: with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: sess.run(init) summary_writer = tf.summary.FileWriter(log_dir, sess.graph) # 创建图写入器并写文件 coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) print("Finetune starting!") start_time = time.time() for epoch in range(config.training_epochs): avg_cost = 0. total_batch = int(config.n_samples / config.batch_size) for _ in range(total_batch): batch_xs = get_random_block_from_data(plane_image, config.batch_size) cost, _ = sess.run((sae.cost, sae.optimizer), feed_dict={image: batch_xs}) avg_cost += cost / config.n_samples * config.batch_size # Display logs per epoch step # if epoch % config.display_step == 0: # print("Epoch:", '%d,' % (epoch + 1), "Cost:", "{:.9f}".format(avg_cost), # "Time/Epoch is ", (time.time() - start_time)) print ("total time is ", (time.time()-start_time)/(total_batch*config.training_epochs)) coord.request_stop() coord.join(threads) # # # 利用重建误差进行AUC计算 # hidden_tmp, recon_err_tmp = sess.run((sae.hidden_out, sae.errtmp), feed_dict={image: plane_image}) # recon_err_tmp = np.sum(recon_err_tmp, axis=1)
def train(): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(), tf.device('/cpu:0'): # Create a variable to count the number of train() calls. This equals the # number of batches processed * FLAGS.num_gpus. global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) # Calculate the learning rate schedule. num_batches_per_epoch = (config.n_samples / config.batch_size ) # num of updating times per epoch decay_steps = int(num_batches_per_epoch * config.num_epochs_per_decay) # updating every step # Decay the learning rate exponentially based on the number of steps. lr = tf.train.exponential_decay(config.initial_lr, global_step, decay_steps, config.lr_dacay_factor, staircase=True) opt = tf.train.AdamOptimizer(0.00005) # dataset = tf.data.Dataset.from_tensor_slices(plane_image) # dataset = dataset.repeat(config.training_epochs*config.num_gpus) # # dataset = dataset.batch(config.batch_size) # iterator = dataset.make_initializable_iterator() # images = iterator.get_next() # input_image = tf.constant(plane_image) input1 = tf.placeholder(tf.float32, [None, config.input_size]) input2 = tf.placeholder(tf.float32, [None, config.input_size]) input_image = [input1, input2] # images = tf.placeholder(tf.float32, [None, config.input_size]) # batch_queue = tf.contrib.slim.prefetch_queue.prefetch_queue( # [images, labels], capacity=2 * FLAGS.num_gpus) # Calculate the gradients for each model tower. tower_grads = [] with tf.variable_scope(tf.get_variable_scope()): for i in xrange(config.num_gpus): with tf.device('/gpu:%d' % i): with tf.name_scope('%s_%d' % ("SAE_Tower", i)) as scope: # Dequeues one batch for the GPU # image_batch, label_batch = batch_queue.dequeue() # Calculate the loss for one tower of the CIFAR model. This function # constructs the entire CIFAR model but shares the variables across # all towers. # images = tf.train.shuffle_batch([input_image], # batch_size=config.batch_size, # capacity=10000, # num_threads=10, # min_after_dequeue=100, # enqueue_many=True) # loss = tower_loss(scope, images) loss = tower_loss(scope, input_image[i]) # Reuse variables for the next tower. tf.get_variable_scope().reuse_variables() # Retain the summaries from the final tower. summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) # Calculate the gradients for the batch of data on this CIFAR tower. grads = opt.compute_gradients(loss) # Keep track of the gradients across all towers. tower_grads.append(grads) # We must calculate the mean of each gradient. Note that this is the # synchronization point across all towers. grads = average_gradients(tower_grads) # Add a summary to track the learning rate. summaries.append(tf.summary.scalar('learning_rate', lr)) # Add histograms for gradients. for grad, var in grads: if grad is not None: summaries.append( tf.summary.histogram(var.op.name + '/gradients', grad)) # Apply the gradients to adjust the shared variables. apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) # Add histograms for trainable variables. for var in tf.trainable_variables(): summaries.append(tf.summary.histogram(var.op.name, var)) # Track the moving averages of all trainable variables. variable_averages = tf.train.ExponentialMovingAverage( 0.9999, global_step) variables_averages_op = variable_averages.apply( tf.trainable_variables()) # Group all updates to into a single train op. train_op = tf.group(apply_gradient_op, variables_averages_op) # Create a saver. # saver = tf.train.Saver(tf.global_variables()) # Build the summary operation from the last tower summaries. # summary_op = tf.summary.merge(summaries) # Build an initialization operation to run below. # init = tf.global_variables_initializer() init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) # Start running operations on the Graph. allow_soft_placement must be set to # True to build towers on GPU, as some of the ops do not have GPU # implementations. sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=True)) sess.run(init) # sess.run(iterator.initializer) # Start the queue runners. tf.train.start_queue_runners(sess=sess) start_time = time.time() # summary_writer = tf.summary.FileWriter(log_dir, sess.graph) for epoch in range(config.training_epochs): avg_cost = 0. total_batch = int(config.n_samples / (config.num_gpus * config.batch_size)) for _ in range(total_batch): batch_0 = get_random_block_from_data(plane_image, config.batch_size) batch_1 = get_random_block_from_data(plane_image, config.batch_size) # cost, _ = sess.run([loss, train_op], feed_dict={images: batch_xs}) cost, _ = sess.run([loss, train_op], feed_dict={ input_image[0]: batch_0, input_image[1]: batch_1 }) # cost = sess.run(sae.cost, feed_dict={image: batch_xs}) avg_cost += cost / config.n_samples * config.batch_size # 生成timeline文件已经在tensorboard中加入内存和运行时间的记录,但是会导致运行时间增加 # summary_writer.add_run_metadata(run_metadata, 'step%03d' % epoch) # fetched_timeline = timeline.Timeline(run_metadata.step_stats) # chrome_trace = fetched_timeline.generate_chrome_trace_format() # with open('timeline_gpu.json', 'w') as f: # f.write(chrome_trace) # Display logs per epoch step # if epoch % config.display_step == 0: # print("Epoch:", '%d,' % (epoch + 1), "Cost:", "{:.9f}".format(avg_cost), # "Time/Epoch is ", (time.time() - start_time)) print("TOTAL TIME IS ", time.time() - start_time)