def main(_): # Import data mnist = input_data.read_data_sets(FLAGS.data_dir) a = bias_variable([3, 3]) b = tf.constant(0.2, shape=[3, 3]) c = tf.constant(10.0, shape=[3, 3]) d = a + b e = tf.multiply(d, c) relu1 = tf.nn.relu(e, name='relu1') train_relu1 = tf.train.AdamOptimizer(1e-4).minimize(relu1) # Create the model x = tf.placeholder(tf.float32, [None, 784]) # Define loss and optimizer y_ = tf.placeholder(tf.int64, [None]) # Build the graph for the deep net y_conv, keep_prob = deepnn(x) with tf.name_scope('loss'): cross_entropy = tf.losses.sparse_softmax_cross_entropy(labels=y_, logits=y_conv) cross_entropy = tf.reduce_mean(cross_entropy) with tf.name_scope('adam_optimizer'): train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) with tf.name_scope('accuracy'): correct_prediction = tf.equal(tf.argmax(y_conv, 1), y_) correct_prediction = tf.cast(correct_prediction, tf.float32) accuracy = tf.reduce_mean(correct_prediction) from tensorflow.python.profiler import model_analyzer from tensorflow.python.profiler import option_builder with tf.Session(config=get_sess_config()) as sess: many_runs_timeline = TimeLiner() sess.graph.get_operation_by_name( 'adam_optimizer/gradients/pool1/MaxPool_grad/MaxPoolGrad' )._set_attr( '_swap_to_host', attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue( i=[0, 1]))) sess.graph.get_operation_by_name( 'adam_optimizer/gradients/conv1/Relu_grad/ReluGrad')._set_attr( '_swap_to_host', attr_value_pb2.AttrValue(i=1)) sess.graph.get_operation_by_name( 'adam_optimizer/gradients/pool2/MaxPool_grad/MaxPoolGrad' )._set_attr( '_swap_to_host', attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue( i=[0, 1]))) sess.graph.get_operation_by_name( 'adam_optimizer/gradients/conv2/Relu_grad/ReluGrad')._set_attr( '_swap_to_host', attr_value_pb2.AttrValue(i=1)) sess.graph.get_operation_by_name( 'adam_optimizer/gradients/conv2/Conv2D_grad/Conv2DBackpropInput' )._set_attr('_swap_to_host', attr_value_pb2.AttrValue(i=2)) #sess.graph.get_operation_by_name('pool1/MaxPool')._set_attr('_swap_to_host', attr_value_pb2.AttrValue(i=0)) #gradient_ops = sess.graph.get_operation_by_name('adam_optimizer/gradients/conv2/Conv2D_grad/ShapeN') #gradient_ops._set_attr('_swap_to_host', attr_value_pb2.AttrValue(i=0)) #gradient_ops._set_attr('_swap_to_host', attr_value_pb2.AttrValue(i=1)) sess.run(tf.global_variables_initializer()) profiler = model_analyzer.Profiler(sess.graph) #for i in range(20000): for i in range(FLAGS.iteration_count): batch = mnist.train.next_batch(FLAGS.batch_size) run_metadata = tf.RunMetadata() sess.run( train_step, feed_dict={ x: batch[0], y_: batch[1], keep_prob: 0.5 }, options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE), run_metadata=run_metadata) #sess.run(train_relu1, feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5}, options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE), run_metadata=run_metadata) trace = timeline.Timeline(step_stats=run_metadata.step_stats) chrome_trace = trace.generate_chrome_trace_format( show_dataflow=True, show_memory=True) many_runs_timeline.update_timeline(chrome_trace) profiler.add_step(i, run_metadata) # profile the timing of your model operations. #opts = (tf.profiler.ProfileOptionBuilder( # option_builder.ProfileOptionBuilder.time_and_memory()) # .select(['micros', 'bytes', 'occurrence', 'peak_bytes', 'residual_bytes', 'output_bytes']) # .order_by('name').build()) #profiler.profile_operations(options=opts) # can generate a timeline: opts = (option_builder.ProfileOptionBuilder( option_builder.ProfileOptionBuilder.time_and_memory() ).with_step(i).with_timeline_output( "./timeline_output/step_" + FLAGS.mem_opt + str(FLAGS.batch_size) + str(FLAGS.iteration_count)).build()) profiler.profile_graph(options=opts) chrome_trace_filename = str(FLAGS.batch_size) + str(FLAGS.mem_opt) + "new" graph_location = str(FLAGS.batch_size) + str( FLAGS.mem_opt) + "_swap_test.pbtxt" print('Saving graph to: %s' % graph_location) tf.train.write_graph(sess.graph_def, '.', graph_location, as_text=True) many_runs_timeline.save(chrome_trace_filename + '.ctf.json')
def main(): args = get_arguments() try: directories = validate_directories(args) except ValueError as e: print("Some arguments are wrong:") print(str(e)) return logdir = directories['logdir'] restore_from = directories['restore_from'] # Even if we restored the model, we will treat it as new training # if the trained model is written into an arbitrary location. is_overwritten_training = logdir != restore_from with open(args.wavenet_params, 'r') as f: wavenet_params = json.load(f) # Create coordinator. coord = tf.train.Coordinator() # Load raw waveform from VCTK corpus. with tf.name_scope('create_inputs'): # Allow silence trimming to be skipped by specifying a threshold near # zero. silence_threshold = args.silence_threshold if args.silence_threshold > \ EPSILON else None gc_enabled = args.gc_channels is not None lc_enabled = args.lc_channels is not None lc_channels = args.lc_channels reader = AudioReader( args.data_dir, coord, sample_rate=wavenet_params['sample_rate'], gc_enabled=gc_enabled, lc_enabled=lc_enabled, lc_channels=lc_channels, receptive_field=WaveNetModel.calculate_receptive_field( wavenet_params["filter_width"], wavenet_params["dilations"], wavenet_params["scalar_input"], wavenet_params["initial_filter_width"]), sample_size=args.sample_size, silence_threshold=silence_threshold) audio_batch = reader.dequeue(args.batch_size) if gc_enabled: gc_id_batch = reader.dequeue_gc(args.batch_size) else: gc_id_batch = None if lc_enabled: lc_batch = reader.dequeue_lc(args.batch_size) else: lc_batch = None # Create network. net = WaveNetModel( batch_size=args.batch_size, dilations=wavenet_params["dilations"], filter_width=wavenet_params["filter_width"], residual_channels=wavenet_params["residual_channels"], dilation_channels=wavenet_params["dilation_channels"], skip_channels=wavenet_params["skip_channels"], quantization_channels=wavenet_params["quantization_channels"], use_biases=wavenet_params["use_biases"], scalar_input=wavenet_params["scalar_input"], initial_filter_width=wavenet_params["initial_filter_width"], histograms=args.histograms, global_condition_channels=args.gc_channels, global_condition_cardinality=reader.gc_category_cardinality, local_condition_channels=args.lc_channels) if args.l2_regularization_strength == 0: args.l2_regularization_strength = None loss = net.loss(input_batch=audio_batch, global_condition_batch=gc_id_batch, local_condition_batch=lc_batch, l2_regularization_strength=args.l2_regularization_strength) optimizer = optimizer_factory[args.optimizer]( learning_rate=args.learning_rate, momentum=args.momentum) trainable = tf.trainable_variables() optim = optimizer.minimize(loss, var_list=trainable) # Set up logging for TensorBoard. writer = tf.summary.FileWriter(logdir) writer.add_graph(tf.get_default_graph()) run_metadata = tf.RunMetadata() summaries = tf.summary.merge_all() # Set up session sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) init = tf.global_variables_initializer() sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=tf.trainable_variables(), max_to_keep=args.max_checkpoints) try: saved_global_step = load(saver, sess, restore_from) if is_overwritten_training or saved_global_step is None: # The first training step will be saved_global_step + 1, # therefore we put -1 here for new or overwritten trainings. saved_global_step = -1 except: print("Something went wrong while restoring checkpoint. " "We will terminate training to avoid accidentally overwriting " "the previous model.") raise threads = tf.train.start_queue_runners(sess=sess, coord=coord) reader.start_threads(sess) step = None last_saved_step = saved_global_step try: for step in range(saved_global_step + 1, args.num_steps): start_time = time.time() if args.store_metadata and step % 50 == 0: # Slow run that stores extra information for debugging. print('Storing metadata') run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) summary, loss_value, _ = sess.run([summaries, loss, optim], options=run_options, run_metadata=run_metadata) writer.add_summary(summary, step) writer.add_run_metadata(run_metadata, 'step_{:04d}'.format(step)) tl = timeline.Timeline(run_metadata.step_stats) timeline_path = os.path.join(logdir, 'timeline.trace') with open(timeline_path, 'w') as f: f.write(tl.generate_chrome_trace_format(show_memory=True)) else: summary, loss_value, _ = sess.run([summaries, loss, optim]) writer.add_summary(summary, step) duration = time.time() - start_time print('step {:d} - loss = {:.3f}, ({:.3f} sec/step)'.format( step, loss_value, duration)) if step % args.checkpoint_every == 0: save(saver, sess, logdir, step) last_saved_step = step except KeyboardInterrupt: # Introduce a line break after ^C is displayed so save message # is on its own line. print() finally: if step > last_saved_step: save(saver, sess, logdir, step) coord.request_stop() coord.join(threads)
def testing(self, sess, test_writer): ##======================================= if USE_ROS: import rospy from sensor_msgs.msg import PointCloud,Image from visualization_msgs.msg import MarkerArray, Marker from tools.data_visualize import Boxes_labels_Gen, Image_Gen,PointCloud_Gen rospy.init_node('rostensorflow') pub = rospy.Publisher('prediction', PointCloud, queue_size=1000) img_pub = rospy.Publisher('images_rgb', Image, queue_size=1000) box_pub = rospy.Publisher('label_boxes', MarkerArray, queue_size=1000) rospy.loginfo("ROS begins ...") #======================================= with tf.name_scope("Inference"): RNet_rpn_yaw_pred = self.net.get_output('RNet_theta')[1] RNet_rpn_yaw_gt_delta = self.net.get_output('cubic_grid')[1] RNet_rpn_yaw_pred_toshow = RNet_rpn_yaw_pred+RNet_rpn_yaw_gt_delta rpn_rois_3d = self.net.get_output('rpn_rois')[1] with tf.name_scope('view_rpn_bv_tb'): roi_bv = self.net.get_output('rpn_rois')[0] data_bv = self.net.lidar_bv_data image_rpn = tf.reshape(test_show_rpn_tf(data_bv,roi_bv), (1, 601, 601, -1)) tf.summary.image('lidar_bv_test', image_rpn) merged = tf.summary.merge_all() with tf.name_scope('load_weights'): weights = self.args.weights if weights.endswith('.ckpt'): print 'Loading test model weights from {:s}'.format(self.args.weights) self.saver.restore(sess, weights) else: print "error: Function [combinet_test.testing] can not load weights {:s}!".format(self.args.weights) return 0 vispy_init() # TODO: Essential step(before sess.run) for using vispy beacuse of the bug of opengl or tensorflow timer = Timer() for idx in range(0,self.epoch): # index_ = input('Type a new index: ') blobs = self.dataset.get_minibatch(idx) feed_dict = { self.net.lidar3d_data: blobs['lidar3d_data'], self.net.lidar_bv_data: blobs['lidar_bv_data'], self.net.im_info: blobs['im_info'], self.net.calib: blobs['calib']} run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() timer.tic() pred_yaw_toshow_,rpn_rois_3d_,summary = \ sess.run([RNet_rpn_yaw_pred_toshow,rpn_rois_3d,merged], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) timer.toc() if idx % 3 ==0 and cfg.TEST.DEBUG_TIMELINE: # chrome://tracing trace = timeline.Timeline(step_stats=run_metadata.step_stats) trace_file = open(cfg.LOG_DIR + '/' +'testing-step-'+ str(idx).zfill(7) + '.ctf.json', 'w') trace_file.write(trace.generate_chrome_trace_format(show_memory=False)) trace_file.close() if idx % cfg.TEST.ITER_DISPLAY == 0: pass print 'Test: %06d/%06d speed: %.4f s / iter' % (idx+1, self.epoch, timer.average_time) if VISION_DEBUG: scan = blobs['lidar3d_data'] img = blobs['image_data'] cubic_cls_value = np.ones([cfg.TRAIN.RPN_POST_NMS_TOP_N],dtype=np.float32)*0 boxes=BoxAry_Theta(pre_box3d=rpn_rois_3d_,pre_theta_value=pred_yaw_toshow_,pre_cube_cls=cubic_cls_value)# RNet_rpn_yaw_pred_toshow_ rpn_rois_3d_[:,-1] if USE_ROS: from tools.data_visualize import PointCloud_Gen,Boxes_labels_Gen,Image_Gen pointcloud = PointCloud_Gen(scan) label_boxes = Boxes_labels_Gen(boxes, ns='Predict') img_ros = Image_Gen(img) pub.publish(pointcloud) img_pub.publish(img_ros) box_pub.publish(label_boxes) else: pcd_vispy(scan, img, boxes,index=idx, save_img=True,#cfg.TEST.SAVE_IMAGE, visible=False, name='CubicNet testing') if idx % 1 == 0 and cfg.TEST.TENSORBOARD: test_writer.add_summary(summary, idx) pass print 'Testing process has done, happy every day !'
def train(self, train_data, train_label, valid_data=None, valid_label=None, learning_rate=0.01, max_epochs=1000, keep_training=False): """ This function defines the training process of the model :param train_data: the input training data, must be a shape like [sample_number, data] :param train_label: the input label of the training data, must be a shape like [sample_number, labels] :param valid_data: the give validation data, must be a shape like [sample_number, data] :param valid_label: the give validation labels, must be a shape like [sample_number, labels] :param learning_rate: the learning rate of the optimizer :param max_epochs: maximum epoch of the training :param keep_training: determine to continue train the model """ if valid_data or valid_label is None: valid_data = self.fashion_data.test.images valid_label = self.fashion_data.test.labels # Initialize all parameters # Fashion data images size pixel_size = train_data.shape[1] class_number = train_label.shape[1] graph = tf.Graph() with graph.as_default(), tf.device('cpu:0'): global_step = tf.Variable(0, name="global_step", trainable=False) # Define input sample for each layers with tf.name_scope('Inputs'): x_sample, y_sample = self.create_placeholders(pixel_size, class_number) # Define output layer hidden_layer1 = self.add_layer(x_sample, pixel_size, 300, 'layer1', activation_function=tf.nn.sigmoid) if FLAGS.REGULARIZATION == 'drop out': keep_prob = tf.placeholder(tf.float32) probability = 0.5 hidden_layer1 = tf.nn.dropout(hidden_layer1, keep_prob) prediction = self.add_layer(hidden_layer1, 300, class_number, 'layer2', activation_function=tf.nn.softmax) if FLAGS.REGULARIZATION == 'drop out': prediction = tf.nn.dropout(prediction, keep_prob) with tf.name_scope("Training"): # Define loss function using cross entropy with tf.name_scope("Loss_Function"): cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_sample * tf.log(prediction + FLAGS.EPSI), reduction_indices=1)) tf.summary.scalar("Loss/train", cross_entropy) if FLAGS.OPTIMIZER == 'Adam': training = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy, global_step=global_step) elif FLAGS.OPTIMIZER == 'Momentum': training = tf.train.MomentumOptimizer(learning_rate, momentum=0.7).minimize(cross_entropy, global_step=global_step) else: training = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy, global_step=global_step) # else: # print('Please select a correct optimizer.') with tf.name_scope("Accuracy"): correct_prediction = tf.equal(tf.argmax(prediction, 1), tf.argmax(y_sample, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy') tf.summary.scalar('accuracy', accuracy) # Create Data Pipeline batch_size = 512 minibatch = InputPipeline(batch_size, train_data, train_label) minibatch.schedule(buffer_size=10000) x_batch, y_batch = minibatch.next() iterator_initializer, mini_dict = minibatch.initializer() num_minibatch = int(train_data.shape[0] / batch_size) folder_name = (FLAGS.SUMMARY_FOLDER, FLAGS.TIMELINE_FOLDER, FLAGS.LOG_FOLDER, FLAGS.CHECKINGPOINT_FOLDER) for folder in folder_name: if not os.path.exists(folder): os.makedirs(folder) f_result = open(FLAGS.LOG_FOLDER + '/training result_{:.0f}.txt'.format(time.time()), 'a') # Initialize a session and saver sess = tf.Session() init = tf.global_variables_initializer() sess.run(init) saver = tf.train.Saver() # Initialize Tensorboard Summary merged_train = tf.summary.merge_all() merged_test = tf.summary.merge_all() train_writer = tf.summary.FileWriter(FLAGS.SUMMARY_FOLDER + '/train', sess.graph) test_writer = tf.summary.FileWriter(FLAGS.SUMMARY_FOLDER + '/test', sess.graph) # Set runtime statistics option run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() start_time = time.time() if FLAGS.VERBOSE: print('Training Start!') print('Optimizer:', FLAGS.OPTIMIZER) print('Regularization method:', FLAGS.REGULARIZATION) # Start training process and iteration is determined by max_epochs if keep_training is True: saver.restore(sess, tf.train.latest_checkpoint(FLAGS.CHECKINGPOINT_FOLDER)) if FLAGS.VERBOSE: print("Restore model from ", FLAGS.CHECKINGPOINT_FOLDER) for epoch in range(max_epochs): sess.run(iterator_initializer, mini_dict) # Start an epoch and training through all mini batches for step in range(num_minibatch): global_step_value = sess.run(global_step) batch_x_sample, batch_y_sample = sess.run([x_batch, y_batch]) if FLAGS.REGULARIZATION == 'drop out': sess.run(training, feed_dict={x_sample: batch_x_sample, y_sample: batch_y_sample, keep_prob: probability}) else: sess.run(training, feed_dict={x_sample: batch_x_sample, y_sample: batch_y_sample}) epoch = int(global_step_value / num_minibatch) + 1 # Start learning process and writer summary every 30 epochs if epoch % 30 == 0 or epoch == max_epochs: if FLAGS.REGULARIZATION == 'drop out': loss_train = sess.run(cross_entropy, feed_dict={x_sample: train_data, y_sample: train_label, keep_prob: 1}) loss_test = sess.run(cross_entropy, feed_dict={x_sample: valid_data, y_sample: valid_label, keep_prob: 1}) acc_test = sess.run(accuracy, feed_dict={x_sample: valid_data, y_sample: valid_label, keep_prob: 1}) train_summary = sess.run(merged_train, feed_dict={x_sample: train_data, y_sample: train_label, keep_prob: 1}, options=run_options, run_metadata=run_metadata) test_summary = sess.run(merged_test, feed_dict={x_sample: valid_data, y_sample: valid_label, keep_prob: 1}, options=run_options, run_metadata=run_metadata) else: loss_train = sess.run(cross_entropy, feed_dict={x_sample: train_data, y_sample: train_label}) loss_test = sess.run(cross_entropy, feed_dict={x_sample: valid_data, y_sample: valid_label}) acc_test = sess.run(accuracy, feed_dict={x_sample: valid_data, y_sample: valid_label}) train_summary = sess.run(merged_train, feed_dict={x_sample: train_data, y_sample: train_label}, options=run_options, run_metadata=run_metadata) test_summary = sess.run(merged_test, feed_dict={x_sample: valid_data, y_sample: valid_label}, options=run_options, run_metadata=run_metadata) # Add tensorboard summary train_writer.add_run_metadata(run_metadata, 'epoch%d' % epoch) train_writer.add_summary(train_summary, global_step=epoch) test_writer.add_summary(test_summary, global_step=epoch) result_log = "Epoch: {}, Accuracy: {:.3f}, Loss train: {:.3f}," \ " Loss test: {:.3f}\n".format(epoch, acc_test, loss_train, loss_test) if FLAGS.VERBOSE: print('Adding run metadata for epoch:', epoch) print(result_log) # Save training logs to txt file f_result.write('Adding run metadata for epoch:{}\n'.format(epoch)) f_result.write(result_log) # Save runtime statistic results fetched_timeline = timeline.Timeline(run_metadata.step_stats) chrome_trace = fetched_timeline.generate_chrome_trace_format() with open(FLAGS.TIMELINE_FOLDER + '/timeline_epoch_{}.json'.format(epoch), 'w') as f_tracing: f_tracing.write(chrome_trace) duration = time.time() - start_time if FLAGS.VERBOSE: print('Training for {} epochs took {:.3f} sec.\n'.format(epoch, duration)) print('Training process finished') f_result.write('Training for {} epochs took {:.3f} sec.'.format(epoch, duration)) f_result.close() # Save trained model to .ckpt files saver.save(sess, FLAGS.CHECKINGPOINT_FOLDER + '/project1_trained_model') sess.close()
def profile(run_metadata, epoch=0): with open('profs/timeline_step' + str(epoch) + '.json', 'w') as f: # Create the Timeline object, and write it to a json file fetched_timeline = timeline.Timeline(run_metadata.step_stats) chrome_trace = fetched_timeline.generate_chrome_trace_format() f.write(chrome_trace)
result = tf.matmul(matrix1, matrix2) return result if __name__ == "__main__": batch_run = 4 image_size = 32 count = 0 results = [] ##define the graph while count < batch_run: results.append(matrix_mul(image_size)) count = count + 1 # build option for perf options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() # build tensorboard tf_writer = tf.summary.FileWriter("./tensorboard_multi_ops_group", graph=tf.get_default_graph()) sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) init = tf.global_variables_initializer() sess.run(init) start = time.time() final_result = sess.run(tf.group(*results), options = options, run_metadata = run_metadata) end = time.time() fetched_timeline = timeline.Timeline(run_metadata.step_stats) chrome_trace = fetched_timeline.generate_chrome_trace_format() with open('timeline_02_step_%d.json', 'w') as f: f.write(chrome_trace) print("benchmark time: {}".format(end - start))
def run_op_benchmark(self, sess, op_or_tensor, feed_dict=None, burn_iters=2, min_iters=10, store_trace=False, store_memory_usage=True, name=None, extras=None, mbs=0): """Run an op or tensor in the given session. Report the results. Args: sess: `Session` object to use for timing. op_or_tensor: `Operation` or `Tensor` to benchmark. feed_dict: A `dict` of values to feed for each op iteration (see the `feed_dict` parameter of `Session.run`). burn_iters: Number of burn-in iterations to run. min_iters: Minimum number of iterations to use for timing. store_trace: Boolean, whether to run an extra untimed iteration and store the trace of iteration in returned extras. The trace will be stored as a string in Google Chrome trace format in the extras field "full_trace_chrome_format". Note that trace will not be stored in test_log_pb2.TestResults proto. store_memory_usage: Boolean, whether to run an extra untimed iteration, calculate memory usage, and store that in extras fields. name: (optional) Override the BenchmarkEntry name with `name`. Otherwise it is inferred from the top-level method name. extras: (optional) Dict mapping string keys to additional benchmark info. Values may be either floats or values that are convertible to strings. mbs: (optional) The number of megabytes moved by this op, used to calculate the ops throughput. Returns: A `dict` containing the key-value pairs that were passed to `report_benchmark`. If `store_trace` option is used, then `full_chrome_trace_format` will be included in return dictionary even though it is not passed to `report_benchmark` with `extras`. """ for _ in range(burn_iters): sess.run(op_or_tensor, feed_dict=feed_dict) deltas = [None] * min_iters for i in range(min_iters): start_time = time.time() sess.run(op_or_tensor, feed_dict=feed_dict) end_time = time.time() delta = end_time - start_time deltas[i] = delta extras = extras if extras is not None else {} unreported_extras = {} if store_trace or store_memory_usage: run_options = config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE) run_metadata = config_pb2.RunMetadata() sess.run(op_or_tensor, feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) tl = timeline.Timeline(run_metadata.step_stats) if store_trace: unreported_extras["full_trace_chrome_format"] = ( tl.generate_chrome_trace_format()) if store_memory_usage: step_stats_analysis = tl.analyze_step_stats(show_memory=True) allocator_maximums = step_stats_analysis.allocator_maximums for k, v in allocator_maximums.items(): extras["allocator_maximum_num_bytes_%s" % k] = v.num_bytes def _median(x): if not x: return -1 s = sorted(x) l = len(x) lm1 = l - 1 return (s[l//2] + s[lm1//2]) / 2.0 def _mean_and_stdev(x): if not x: return -1, -1 l = len(x) mean = sum(x) / l if l == 1: return mean, -1 variance = sum([(e - mean) * (e - mean) for e in x]) / (l - 1) return mean, math.sqrt(variance) median_delta = _median(deltas) benchmark_values = { "iters": min_iters, "wall_time": median_delta, "extras": extras, "name": name, "throughput": mbs / median_delta } self.report_benchmark(**benchmark_values) mean_delta, stdev_delta = _mean_and_stdev(deltas) unreported_extras["wall_time_mean"] = mean_delta unreported_extras["wall_time_stdev"] = stdev_delta benchmark_values["extras"].update(unreported_extras) return benchmark_values
def prediction_callback(self, input_msg): tic = timeit.default_timer() print("subscribed to prediction input") tic0 = timeit.default_timer() feed_dict = { self.car1: multiarray_to_numpy(input_msg.car1), self.car2: multiarray_to_numpy(input_msg.car2), self.extras: multiarray_to_numpy(input_msg.extras), self.traj_lengths: multiarray_to_numpy(input_msg.traj_lengths), self.sample_ct: [input_msg.sample_ct] } if input_msg.car1_future.data: feed_dict[self.car1_future] = multiarray_to_numpy( input_msg.car1_future) else: feed_dict[self.car1_future_x] = multiarray_to_numpy( input_msg.car1_future_x) feed_dict[self.car1_future_y] = multiarray_to_numpy( input_msg.car1_future_y) toc0 = timeit.default_timer() print("constructing feed_dict took: ", toc0 - tic0, " (s), running tf!") tic0 = timeit.default_timer() if profile: options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() c1top32 = self.sess.run(self.c1top32, feed_dict=feed_dict, options=options, run_metadata=run_metadata) fetched_timeline = timeline.Timeline(run_metadata.step_stats) chrome_trace = fetched_timeline.generate_chrome_trace_format() with open( '/home/schmrlng/Dropbox/timeline' + os.environ["CUDA_VISIBLE_DEVICES"] + '_0.json', 'w') as f: f.write(chrome_trace) feed_dict[self.car1_future] = c1top32 feed_dict[self.sample_ct] = [input_msg.sample_ct * 64] feed_dict.pop(self.car1_future_x) # should be unnecessary feed_dict.pop(self.car1_future_y) # should be unnecessary run_metadata = tf.RunMetadata() c1best, r = self.sess.run([self.c1best, self.r], feed_dict=feed_dict, options=options, run_metadata=run_metadata) fetched_timeline = timeline.Timeline(run_metadata.step_stats) chrome_trace = fetched_timeline.generate_chrome_trace_format() with open( '/home/schmrlng/Dropbox/timeline' + os.environ["CUDA_VISIBLE_DEVICES"] + '_1.json', 'w') as f: f.write(chrome_trace) else: c1top32 = self.sess.run(self.c1top32, feed_dict=feed_dict) feed_dict[self.car1_future] = c1top32 feed_dict[self.sample_ct] = [input_msg.sample_ct * 64] feed_dict.pop(self.car1_future_x) # should be unnecessary feed_dict.pop(self.car1_future_y) # should be unnecessary c1best, r = self.sess.run([self.c1best, self.r], feed_dict=feed_dict) toc0 = timeit.default_timer() print("done running tf!, took (s): ", toc0 - tic0) tic0 = timeit.default_timer() output_msg = prediction_output() output_msg.y = numpy_to_multiarray(c1best) output_msg.r = numpy_to_multiarray(r) self.pub.publish(output_msg) toc0 = timeit.default_timer() toc = timeit.default_timer() print("output_msg constructed and published, took (s): ", toc0 - tic0) print("total time taken (s): ", toc - tic)
def main_LSTM(args): ''' implementation of the original LSTM approach (https://github.com/KhaledSaleh/driving_behaviour_classification) ''' # set config params specific for the original code config = Config() config.training_volume = args.training_volume config.input_dim = args.input_dim config.encoding_dim = args.encoding_dim config.scale = args.scale if args.runtime_measurement: config.n_time_measures = 10 else: config.n_time_measures = 1 # load preprocessed data data = load_dataset(args.dataset,config) X_train = data[0] X_test = data[1] y_train = data[2] y_test = data[3] config = data[4] logs = [] # if train test data not a list, create one if type(X_train)==list: print("given data is not a list") X_train_list = X_train X_test_list = X_test y_train_list = y_train y_test_list = y_test else: X_train_list =[X_train] X_test_list = [X_test] y_train_list = [y_train] y_test_list = [y_test] ####################################################################################### # statistical iteration ####################################################################################### acc_mean = [] f1_mean = [] for stat_it in range(args.stat_iterations): logger.info('Statistial iteration: ' + str(stat_it)) # train for each element in list (that is why we need list form, even if it contains only one element) logger.info('Training data contains ' + str(len(X_train_list)) + ' training instances...') scores = [] accs = [] for it in range(len(X_train_list)): logger.info(('.......')) logger.info('instance ' + str(it) + ':') X_train = X_train_list[it] X_test = X_test_list[it] y_train = y_train_list[it] y_test = y_test_list[it] # use only fraction of training samples (if given) X_train = X_train[1:int(X_train.shape[0] * config.training_volume), :] y_train = y_train[1:int(y_train.shape[0] * config.training_volume), :] config.n_inputs = X_train.shape[2] config.train_count = len(X_train) config.test_data_count = len(X_test) config.n_steps = len(X_train[0]) config.n_classes = len(np.unique(y_train)) logger.info('Training dataset shape: ' + str(X_train.shape) + str(y_train.shape)) logger.info('Test dataset shape: ' + str(X_test.shape) + str(y_test.shape)) graph = tf.Graph() with graph.as_default(): X = tf.compat.v1.placeholder(tf.float32, [None, config.n_steps, config.n_inputs], name="X") Y = tf.compat.v1.placeholder(tf.float32, [None, config.n_classes], name="Y") pred_Y = LSTM_Network(X, config) # Loss,optimizer,evaluation l2 = config.lambda_loss_amount * \ sum(tf.nn.l2_loss(tf_var) for tf_var in tf.compat.v1.trainable_variables()) # Softmax loss and L2 cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=pred_Y, labels=Y), name="cost") + l2 optimizer = tf.compat.v1.train.AdamOptimizer( learning_rate=config.learning_rate).minimize(cost) correct_pred = tf.equal(tf.argmax(pred_Y, 1), tf.argmax(Y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, dtype=tf.float32)) saver = tf.compat.v1.train.Saver() with tf.compat.v1.Session(graph=graph, config=tf.compat.v1.ConfigProto(log_device_placement=False)) as sess: if not args.test: init_op = tf.compat.v1.global_variables_initializer() sess.run(init_op) best_accuracy = 0.0 # Start training for each batch and loop epochs for i in range(config.training_epochs): starttime = time() for start, end in zip(range(0, config.train_count, config.batch_size), range(config.batch_size, config.train_count + 1, config.batch_size)): sess.run(optimizer, feed_dict={X: X_train[start:end], Y: one_hot(y_train[start:end],config.n_classes)}) saver.save(sess, os.path.join("./weights", 'LSTM_model')) # Test completely at every epoch: calculate accuracy pred_out, accuracy_out, loss_out = sess.run([pred_Y, accuracy, cost], feed_dict={ X: X_test, Y: one_hot(y_test, config.n_classes)}) logs.append(time() - starttime) print("Training iter: {},".format(i) + \ " Test accuracy : {},".format(accuracy_out) + \ " Loss : {}".format(loss_out)) best_accuracy = max(best_accuracy, accuracy_out) print("") mean_epoch_time = np.mean(logs) overall_time = np.sum(logs) logger.info("Mean Epoch time: " + str(mean_epoch_time)) logger.info("overall training time: " + str(overall_time)) logger.info("Final test accuracy: {}".format(accuracy_out)) logger.info("Best epoch's test accuracy: {}".format(best_accuracy)) print("") # start testing the trained model else: saver.restore(sess, os.path.join("./weights", 'LSTM_model')) t1 = time() pred_out, accuracy_out, loss_out = sess.run([pred_Y, accuracy, cost], feed_dict={ X: X_test, Y: one_hot(y_test,config.n_classes)}) inference_time = time() - t1 print(" Test accuracy : {},".format(accuracy_out) + \ " Loss : {}".format(loss_out)) ############################################################################################# # evaluation of results ############################################################################################# pred_test_bool = pred_out.argmax(1) # runtime measurement t=[] traces = [] options = tf.compat.v1.RunOptions(trace_level=tf.compat.v1.RunOptions.FULL_TRACE) run_metadata = tf.compat.v1.RunMetadata() for i in range(config.n_time_measures): with tf.compat.v1.Session(graph=graph, config=tf.compat.v1.ConfigProto(log_device_placement=False)) as Sess: init_op = tf.compat.v1.global_variables_initializer() Sess.run(init_op) t1 = time() Sess.run([pred_Y, accuracy, cost], feed_dict={ X: X_test, Y: one_hot(y_test, config.n_classes)}, options=options, run_metadata=run_metadata) inference_time = time() - t1 fetched_timeline = timeline.Timeline(run_metadata.step_stats) chrome_trace = fetched_timeline.generate_chrome_trace_format() traces.append(chrome_trace) t.append(inference_time) with open('./logs/LSTM_ts_preproc_timeline_test.json', 'w') as f: f.write(traces[-1]) inference_time = np.median(inference_time) logger.info("Inference time: " + str(inference_time)) logger.info("Inference time of one sequence [ms]: " + str(inference_time*1000/X_test.shape[0])) logger.info('Accuracy on training data: ') report = classification_report(y_test.astype(int), pred_test_bool, output_dict=True) logger.info(classification_report(y_test.astype(int), pred_test_bool)) accs.append((report['accuracy'])) logger.info("Confusion matrix:") confusion_matrix = metrics.confusion_matrix(y_test.astype(int), pred_test_bool) logger.info(confusion_matrix) # f1 score f1 = f1_score(y_test.astype(int), pred_test_bool, average='weighted') scores.append(f1) logger.info("F1 Score: " + str(f1)) # add results to statistical result array acc_mean.append(np.mean(accs)) f1_mean.append(np.mean(scores)) # save as mat files save_dic = {"report": report, "confusion_matrix": confusion_matrix, "config": config, "pred": pred_out, "label": y_test, "f1": np.mean(f1_mean), "acc_mean": np.mean(acc_mean)} savemat("results/" + args.dataset + "/results_origNet_" + str(config.training_volume) + ".mat", save_dic) logger.info('Accuracy results of statistical repetitions: ' + str(acc_mean)) logger.info('F1 scores of statistical repetitions: ' + str(f1_mean)) # write all scores to extra file logger.info('Mean Score: ' + str(np.mean(f1_mean))) logger.info('Mean Accuracy: ' + str(np.mean(acc_mean))) with open("results/results_" + args.dataset + "_LSTM.txt", 'a') as file: file.write(str(args.stat_iterations) + '\t' + str(round(np.mean(f1_mean), 3)) + '\t' + str(round(np.mean(acc_mean), 3)) + '\t' + str(round(np.std(f1_mean), 3)) + '\t' + str(round(np.std(acc_mean), 3)) + '\t' + str(args.training_volume) + '\n' )
threads = [ threading.Thread(group=None, target=run_op, args=(op, )) for op in (enqueue_zeros, enqueue_ones) ] if reverse: threads.reverse() for t in threads: t.start() # wait for threads to finish for t in threads: t.join() # generate merged timeline merged_metadata = tf.RunMetadata() for run_metadata in run_metadatas: merged_metadata.MergeFrom(run_metadata) tl = timeline.Timeline(merged_metadata.step_stats) ctf = tl.generate_chrome_trace_format() with open(sys.argv[0] + '_%s_timeline.json' % (reverse), 'w') as f: f.write(ctf) assert sess.run(queue.size()) == 2 * n result = sess.run(queue.dequeue_many(2 * n)) padding = np.array([0]) diffs = np.concatenate([padding, result]) - np.concatenate([result, padding]) print("Interleaving detected: %s" % (abs(diffs).sum() > 2))
def train_model(self, sess, max_iters): """Network training loop.""" # 返回一个RoIDataLayer类对象,内容self._roidb ,self._num_classes ,self._perm,self._cur data_layer = get_data_layer(self.roidb, self.imdb.num_classes) # RPN # classification loss # 将'rpn_cls_score_reshape'层的输出(1,n,n,18)reshape为(-1,2),其中2为前景与背景的多分类得分() rpn_cls_score = tf.reshape( self.net.get_output('rpn_cls_score_reshape'), [-1, 2]) # 'rpn-data'层输出的[0]为rpn_label,shape为(1, 1, A * height, width),中存的是所有anchor的label(-1,0,1) # 问题1:目前感觉有异议,数据读取方向labels有问题################################ rpn_label = tf.reshape(self.net.get_output('rpn-data')[0], [-1]) # 把rpn_label不等于-1对应引索的rpn_cls_score取出,重新组合成rpn_cls_score rpn_cls_score = tf.reshape( tf.gather(rpn_cls_score, tf.where(tf.not_equal(rpn_label, -1))), [-1, 2]) # 把rpn_label不等于-1对应引索的rpn_label取出,重新组合成rpn_label rpn_label = tf.reshape( tf.gather(rpn_label, tf.where(tf.not_equal(rpn_label, -1))), [-1]) # score损失:tf.nn.sparse_softmax_cross_entropy_with_logits函数的两个参数logits,labels数目相同(shape[0]相同),分别为最后一层的输出与标签 # NOTE:这个函数返回的是一个向量,要求交叉熵就tf.reduce_sum,要求损失就tf.reduce_mean # 问题2:logits,labels应该shape相同的,但这里不同,有异议 rpn_cross_entropy = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=rpn_cls_score, labels=rpn_label)) # bounding box regression L1 loss # 'rpn_bbox_pred'层为了回归bbox,存的是(dx,dy,dw,dh) rpn_bbox_pred = self.net.get_output('rpn_bbox_pred') # 'rpn-data'[1]返回一个用于anchor回归成target的包含每个anchor回归值(dx、dy、dw、dh)的array,形状((len(inds_inside), 4),即(anchors.shape[0],4) # 重新reshape成(1, height, width, A * 4) rpn_bbox_targets = tf.transpose( self.net.get_output('rpn-data')[1], [0, 2, 3, 1]) # rpn_bbox_inside_weights:标签为1的anchor,对应(1.0, 1.0, 1.0, 1.0) # 重新reshape成(1, height, width, A * 4) rpn_bbox_inside_weights = tf.transpose( self.net.get_output('rpn-data')[2], [0, 2, 3, 1]) # rpn_bbox_outside_weights:标签为0或者1的,权重初始化都为(1/num_examples,1/num_examples,1/num_examples,1/num_examples),num_examples为标签为0或者1的anchor总数 # 重新reshape成(1, height, width, A * 4) rpn_bbox_outside_weights = tf.transpose( self.net.get_output('rpn-data')[3], [0, 2, 3, 1]) # 计算smooth_l1损失 rpn_smooth_l1 = self._modified_smooth_l1(3.0, rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights) # rpn_smooth_l1计算出的为一个向量,现在要合成loss形式 rpn_loss_box = tf.reduce_mean( tf.reduce_sum(rpn_smooth_l1, reduction_indices=[1, 2, 3])) # R-CNN # classification loss # 得到最后一个score分支fc层的输出 cls_score = self.net.get_output('cls_score') # label:筛选出的proposal与GT结合形成all_roi,从all_roi中筛选出符合的roi,得到这些roi的label label = tf.reshape(self.net.get_output('roi-data')[1], [-1]) # 用这些roi的label与最后一个score分支fc层的输出相比较,得到loss cross_entropy = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=cls_score, labels=label)) # bounding box regression L1 loss # 得到最后一个bbox分支fc层的输出 bbox_pred = self.net.get_output('bbox_pred') bbox_targets = self.net.get_output('roi-data')[2] bbox_inside_weights = self.net.get_output('roi-data')[3] bbox_outside_weights = self.net.get_output('roi-data')[4] # 计算smooth_l1损失 smooth_l1 = self._modified_smooth_l1(1.0, bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights) # smooth_l1计算出的为一个向量,现在要合成loss形式 loss_box = tf.reduce_mean( tf.reduce_sum(smooth_l1, reduction_indices=[1])) # final loss 计算总损失 loss = cross_entropy + loss_box + rpn_cross_entropy + rpn_loss_box # optimizer and learning rate global_step = tf.Variable(0, trainable=False) # cfg.TRAIN.LEARNING_RATE为0.001, cfg.TRAIN.STEPSIZE为50000 # tf.train.exponential_decay(初始lr,初始步数,多少步进入下一平台值,总步数,下一次平台值是多少(基于上次的比率),staircase) # staircase为True则遵循刚才规则,如为False则每一次迭代更新一次 lr = tf.train.exponential_decay(cfg.TRAIN.LEARNING_RATE, global_step, cfg.TRAIN.STEPSIZE, 0.1, staircase=True) # cfg.TRAIN.MOMENTUM 为 0.9 momentum = cfg.TRAIN.MOMENTUM # 动态系数为0.9的梯度下降法 train_op = tf.train.MomentumOptimizer(lr, momentum).minimize( loss, global_step=global_step) # iintialize variables sess.run(tf.global_variables_initializer()) if self.pretrained_model is not None: #如果有预训练模型,则加载 print('Loading pretrained model weights from {:s}'.format( self.pretrained_model)) self.net.load(self.pretrained_model, sess, self.saver, True) last_snapshot_iter = -1 timer = Timer() #记录当前时间 for iter in range(max_iters): # get one batch blobs = data_layer.forward() #得到一个batch信息 # Make one SGD update feed_dict={self.net.data: blobs['data'], self.net.im_info: blobs['im_info'], self.net.keep_prob: 0.5, \ self.net.gt_boxes: blobs['gt_boxes']} #给定placehold信息 run_options = None run_metadata = None # False if cfg.TRAIN.DEBUG_TIMELINE: run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() timer.tic() rpn_loss_cls_value, rpn_loss_box_value, loss_cls_value, loss_box_value, _ = sess.run( [ rpn_cross_entropy, rpn_loss_box, cross_entropy, loss_box, train_op ], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) timer.toc() if cfg.TRAIN.DEBUG_TIMELINE: trace = timeline.Timeline(step_stats=run_metadata.step_stats) trace_file = open( str(long(time.time() * 1000)) + '-train-timeline.ctf.json', 'w') trace_file.write( trace.generate_chrome_trace_format(show_memory=False)) trace_file.close() if (iter + 1) % (cfg.TRAIN.DISPLAY) == 0: print('iter: %d / %d, total loss: %.4f, rpn_loss_cls: %.4f, rpn_loss_box: %.4f, loss_cls: %.4f, loss_box: %.4f, lr: %f'%\ (iter+1, max_iters, rpn_loss_cls_value + rpn_loss_box_value + loss_cls_value + loss_box_value ,rpn_loss_cls_value, rpn_loss_box_value,loss_cls_value, loss_box_value, lr.eval())) # clw modify: for py3 print('speed: {:.3f}s / iter'.format( timer.average_time)) # clw modify: for py3 if (iter + 1) % cfg.TRAIN.SNAPSHOT_ITERS == 0: last_snapshot_iter = iter self.snapshot(sess, iter) if last_snapshot_iter != iter: self.snapshot(sess, iter)
def train_loop(sess, train_step, global_step, optlist, args, trainset, validationset, disable_training, enable_tf_timeline): train_loop_logger = logger(int(args["task_index"]), "Train Loop") train_loop_logger.start_timer() options = None run_metadata = None many_runs_timeline = None if enable_tf_timeline: options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() many_runs_timeline = timeliner() #counter stuff trainset.reset() validationset.reset() #restore weights belonging to graph epochs_completed = 0 if not args['restart']: last_model = tf.train.latest_checkpoint(args['modelpath']) print("Restoring model %s.", last_model) model_saver.restore(sess, last_model) #losses train_loss = 0. train_batches = 0 total_batches = 0 train_time = 0 #do training while not sess.should_stop(): train_iteration_logger = logger(int(args['task_index']), "Training Iteration", epochs_completed) train_iteration_logger.start_timer() #increment total batch counter total_batches += 1 #get next batch images, labels, normweights, _, _ = trainset.next_batch( args['train_batch_size_per_node']) #set weights to zero normweights[:] = 1. #set up feed dict: feed_dict = { variables['images_']: images, variables['labels_']: labels, variables['weights_']: normweights, variables['keep_prob_']: args['dropout_p'] } if not disable_training: #update weights start_time = time.time() if args['create_summary']: _, gstep, summary, tmp_loss = sess.run( [train_step, global_step, train_summary, loss_fn], feed_dict=feed_dict, options=options, run_metadata=run_metadata) if enable_tf_timeline: fetched_timeline = timeline.Timeline( run_metadata.step_stats) chrome_trace = fetched_timeline.generate_chrome_trace_format( ) many_runs_timeline.update_timeline(chrome_trace) else: _, gstep, tmp_loss = sess.run( [train_step, global_step, loss_fn], feed_dict=feed_dict, options=options, run_metadata=run_metadata) if enable_tf_timeline: fetched_timeline = timeline.Timeline( run_metadata.step_stats) chrome_trace = fetched_timeline.generate_chrome_trace_format( ) many_runs_timeline.update_timeline(chrome_trace) #update kfac parameters if optlist: sess.run(optlist[0], feed_dict=feed_dict, options=options, run_metadata=run_metadata) if enable_tf_timeline: fetched_timeline = timeline.Timeline( run_metadata.step_stats) chrome_trace = fetched_timeline.generate_chrome_trace_format( ) many_runs_timeline.update_timeline(chrome_trace) if gstep % args["kfac_inv_update_frequency"] == 0: sess.run(optlist[1], feed_dict=feed_dict, options=options, run_metadata=run_metadata) if enable_tf_timeline: fetched_timeline = timeline.Timeline( run_metadata.step_stats) chrome_trace = fetched_timeline.generate_chrome_trace_format( ) many_runs_timeline.update_timeline(chrome_trace) end_time = time.time() train_time += end_time - start_time #increment train loss and batch number train_loss += tmp_loss train_batches += 1 #determine if we give a short update: if gstep % args['display_interval'] == 0: print( time.time(), "REPORT rank", args["task_index"], "global step %d., average training loss %g (%.3f sec/batch)" % (gstep, train_loss / float(train_batches), train_time / float(train_batches))) #check if epoch is done if trainset._epochs_completed > epochs_completed: epochs_completed = trainset._epochs_completed print( time.time(), "COMPLETED rank", args["task_index"], "epoch %d, average training loss %g (%.3f sec/batch)" % (epochs_completed, train_loss / float(train_batches), train_time / float(train_batches))) #reset counters train_loss = 0. train_batches = 0 train_time = 0 #compute validation loss: #reset variables validation_loss = 0. validation_batches = 0 #iterate over batches while True: #get next batch images, labels, normweights, weights, _ = validationset.next_batch( args['validation_batch_size_per_node']) #set weights to 1: normweights[:] = 1. weights[:] = 1. if not disable_training: #compute loss if args['create_summary']: summary, tmp_loss = sess.run( [validation_summary, loss_fn], feed_dict={ variables['images_']: images, variables['labels_']: labels, variables['weights_']: normweights, variables['keep_prob_']: 1.0 }) else: tmp_loss = sess.run( [loss_fn], feed_dict={ variables['images_']: images, variables['labels_']: labels, variables['weights_']: normweights, variables['keep_prob_']: 1.0 }) #add loss validation_loss += tmp_loss[0] validation_batches += 1 #update accuracy sess.run(accuracy_fn[1], feed_dict={ variables['images_']: images, variables['labels_']: labels, variables['weights_']: normweights, variables['keep_prob_']: 1.0 }) #update auc sess.run(auc_fn[1], feed_dict={ variables['images_']: images, variables['labels_']: labels, variables['weights_']: normweights, variables['keep_prob_']: 1.0 }) #check if full pass done if validationset._epochs_completed > 0: validationset.reset() break print( time.time(), "COMPLETED epoch %d, average validation loss %g" % (epochs_completed, validation_loss / float(validation_batches))) validation_accuracy = sess.run(accuracy_fn[0]) print( time.time(), "COMPLETED epoch %d, average validation accu %g" % (epochs_completed, validation_accuracy)) validation_auc = sess.run(auc_fn[0]) print( time.time(), "COMPLETED epoch %d, average validation auc %g" % (epochs_completed, validation_auc)) if enable_tf_timeline: many_runs_timeline.save('Timeliner_output.json') train_iteration_logger.end_timer() if enable_tf_timeline: many_runs_timeline.save('Timeliner_output.json') train_loop_logger.end_timer()
def benchmark_model(self, warmup_runs, bm_runs, num_threads, trace_filename=None): """Benchmark model.""" if self.tensorrt: print('Using tensorrt ', self.tensorrt) self.build_and_save_model() graphdef = self.freeze_model() if num_threads > 0: print('num_threads for benchmarking: {}'.format(num_threads)) sess_config = tf.ConfigProto( intra_op_parallelism_threads=num_threads, inter_op_parallelism_threads=1) else: sess_config = tf.ConfigProto() # rewriter_config_pb2.RewriterConfig.OFF sess_config.graph_options.rewrite_options.dependency_optimization = 2 if self.use_xla: sess_config.graph_options.optimizer_options.global_jit_level = ( tf.OptimizerOptions.ON_2) with tf.Graph().as_default(), tf.Session(config=sess_config) as sess: inputs = tf.placeholder(tf.float32, name='input', shape=self.inputs_shape) output = self.build_model(inputs, is_training=False) img = np.random.uniform(size=self.inputs_shape) sess.run(tf.global_variables_initializer()) if self.tensorrt: fetches = [inputs.name] + [i.name for i in output] goutput = self.convert_tr(graphdef, fetches) inputs, output = goutput[0], goutput[1:] if not self.use_xla: # Don't use tf.group because XLA removes the whole graph for tf.group. output = tf.group(*output) for i in range(warmup_runs): start_time = time.time() sess.run(output, feed_dict={inputs: img}) print('Warm up: {} {:.4f}s'.format(i, time.time() - start_time)) print('Start benchmark runs total={}'.format(bm_runs)) timev = [] for i in range(bm_runs): if trace_filename and i == (bm_runs // 2): run_options = tf.RunOptions() run_options.trace_level = tf.RunOptions.FULL_TRACE run_metadata = tf.RunMetadata() sess.run(output, feed_dict={inputs: img}, options=run_options, run_metadata=run_metadata) tf.logging.info('Dumping trace to %s' % trace_filename) trace_dir = os.path.dirname(trace_filename) if not tf.io.gfile.exists(trace_dir): tf.io.gfile.makedirs(trace_dir) with tf.io.gfile.GFile(trace_filename, 'w') as trace_file: from tensorflow.python.client import timeline # pylint: disable=g-direct-tensorflow-import,g-import-not-at-top trace = timeline.Timeline(step_stats=run_metadata.step_stats) trace_file.write( trace.generate_chrome_trace_format(show_memory=True)) start_time = time.time() sess.run(output, feed_dict={inputs: img}) timev.append(time.time() - start_time) timev.sort() timev = timev[2:bm_runs-2] print('{} {}runs {}threads: mean {:.4f} std {:.4f} min {:.4f} max {:.4f}' .format(self.model_name, len(timev), num_threads, np.mean(timev), np.std(timev), np.min(timev), np.max(timev)))
def run(self, *args_, **kwargs_): if self._end_trace: ret = self.sess.run(*args_, **kwargs_) elif not self._end_trace and self.step_cnt < self.start_step: ret = self.sess.run(*args_, **kwargs_) self.step_cnt += 1 elif not self._end_trace and self.step_cnt < self.end_step: ret = self.sess.run(*args_, options=self.run_options, run_metadata=self.run_metadata, **kwargs_) # Create the Timeline object, and write it to a json tl = timeline.Timeline(self.run_metadata.step_stats) ctf = json.loads(tl.generate_chrome_trace_format()) self.traces["traceEvents"] += ctf["traceEvents"] print("Add the {}th step of traces".format(self.step_cnt)) self.step_cnt += 1 ### Create the DAG if self.dag is None: self.dag = nx.DiGraph() for trace in ctf["traceEvents"]: if trace["ph"] == "M" or "args" not in trace: continue op = trace["args"]["op"] name = trace["args"]["name"] ### Add nodes to the DAG if name not in self.dag.nodes: self.dag.add_node(name) ### Add dependency info for k, v in trace["args"].items(): if "input" in k: self.dag.add_edge(v, name) try: not_found = False nx.find_cycle(self.dag.cycle) except: not_found = True assert not_found def flatten_fetch_list(fetch_list): if not isinstance(fetch_list, (list, tuple)): return [fetch_list] else: result_list = [] for op in fetch_list: result_list += flatten_fetch_list(op) return result_list ### Output traces if self.step_cnt == self.end_step: fd = kwargs_.get("feed_dict") tensor_names, tensor_shape_ops = self.tensor_shape_ops out_shapes = self.sess.run(tensor_shape_ops, feed_dict=fd) self.tensor_shapes = {} for name, shape in zip(tensor_names, out_shapes): self.tensor_shapes[name] = [int(s) for s in list(shape)] # collect feed dict meta self.fetches = [ tensor.name for tensor in flatten_fetch_list(args_[0]) ] for key, tensor in fd.items(): shape_as_list = [int(dim) for dim in tensor.shape] dtype_as_str = (str(tensor.dtype).split("\'")[1] if "\'" in str(tensor.dtype) else str( tensor.dtype)).split("_ref")[0] self.feed_dict_meta[key.op.name] = { "shape": shape_as_list, "dtype": dtype_as_str } self._end_trace = True self.output_traces() ### Return all fetches return ret
def main(): args = get_arguments() data_dir = 'midi-Corpus/' + args.data_set + '/' logdir = data_dir + 'max_dilation=%d_reps=%d/' % (args.max_dilation_pow, args.expansion_reps) print('*************************************************') print(logdir) print('*************************************************') sys.stdout.flush() restore_from = logdir if not os.path.exists(logdir): os.makedirs(logdir) # Even if we restored the model, we will treat it as new training # if the trained model is written into an arbitrary location. is_overwritten_training = logdir != restore_from wavenet_params = loadParams(args.max_dilation_pow, args.expansion_reps, args.dil_chan, args.res_chan, args.skip_chan) with open(logdir + 'wavenet_params.json', 'w') as outfile: json.dump(wavenet_params, outfile) # Create coordinator. coord = tf.train.Coordinator() # Load raw waveform from VCTK corpus. with tf.name_scope('create_inputs'): # Allow silence trimming to be skipped by specifying a threshold near # zero. gc_enabled = False # data queue for the training set train_dir = data_dir + 'train/' train_reader = MidiReader( train_dir, coord, sample_rate=wavenet_params['sample_rate'], gc_enabled=gc_enabled, receptive_field=WaveNetModel.calculate_receptive_field( wavenet_params["filter_width"], wavenet_params["dilations"], wavenet_params["scalar_input"], wavenet_params["initial_filter_width"]), sample_size=args.sample_size) train_batch = train_reader.dequeue(args.batch_size) if gc_enabled: gc_id_batch = reader.dequeue_gc(args.batch_size) else: gc_id_batch = None # Create network. net = WaveNetModel( batch_size=BATCH_SIZE, dilations=wavenet_params["dilations"], filter_width=wavenet_params["filter_width"], residual_channels=wavenet_params["residual_channels"], dilation_channels=wavenet_params["dilation_channels"], skip_channels=wavenet_params["skip_channels"], use_biases=wavenet_params["use_biases"], scalar_input=wavenet_params["scalar_input"], initial_filter_width=wavenet_params["initial_filter_width"], histograms=False, global_condition_channels=None, global_condition_cardinality=train_reader.gc_category_cardinality) if args.l2_regularization_strength == 0: args.l2_regularization_strength = None print('constructing training loss') sys.stdout.flush() train_loss, target_output, prediction = net.loss( input_batch=train_batch, global_condition_batch=gc_id_batch, l2_regularization_strength=args.l2_regularization_strength) print('constructing validation loss') sys.stdout.flush() print('making optimizer') sys.stdout.flush() optimizer = optimizer_factory['adam'](learning_rate=args.learning_rate, momentum=args.momentum) trainable = tf.trainable_variables() optim = optimizer.minimize(train_loss, var_list=trainable) print('setting up tensorboard') sys.stdout.flush() # Set up logging for TensorBoard. writer = tf.summary.FileWriter(logdir) writer.add_graph(tf.get_default_graph()) run_metadata = tf.RunMetadata() summaries = tf.summary.merge_all() valid_input = tf.placeholder(dtype=tf.float32, shape=(1, None, 88)) valid_loss, valid_target_output, valid_prediction = net.loss( input_batch=valid_input, global_condition_batch=gc_id_batch, l2_regularization_strength=args.l2_regularization_strength) # Set up session sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) init = tf.global_variables_initializer() sess.run(init) print('saver') sys.stdout.flush() # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=tf.trainable_variables(), max_to_keep=5) try: saved_global_step = load(saver, sess, restore_from) if is_overwritten_training or saved_global_step is None: # The first training step will be saved_global_step + 1, # therefore we put -1 here for new or overwritten trainings. saved_global_step = -1 except: print("Something went wrong while restoring checkpoint. " "We will terminate training to avoid accidentally overwriting " "the previous model.") raise print('thread stuff') sys.stdout.flush() threads = tf.train.start_queue_runners(sess=sess, coord=coord) train_reader.start_threads(sess) step = None last_saved_step = saved_global_step # load validation data validation_audio = load_all_audio(data_dir + 'valid/') num_valid_files = len(validation_audio) valid_loss_values = np.zeros((int(np.ceil(args.num_steps / 50)), )) vl_ind = 0 print('optimization time') sys.stdout.flush() min_valid_loss = 1e10 try: for step in range(saved_global_step + 1, args.num_steps): start_time = time.time() if args.store_metadata and step % 50 == 0: # Slow run that stores extra information for debugging. run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) summary, loss_value, _ = sess.run( [summaries, train_loss, optim], options=run_options, run_metadata=run_metadata) writer.add_summary(summary, step) writer.add_run_metadata(run_metadata, 'step_{:04d}'.format(step)) valid_losses_step = np.zeros((num_valid_files, )) for i in range(num_valid_files): audio_i = np.expand_dims(validation_audio[i], 0) valid_losses_step[i] = sess.run(valid_loss, {valid_input: audio_i}) valid_loss_value_step = np.mean(valid_losses_step) valid_loss_values[vl_ind] = valid_loss_value_step np.savez(logdir + 'validation.npz', validation_loss=valid_loss_values) vl_ind += 1 tl = timeline.Timeline(run_metadata.step_stats) timeline_path = os.path.join(logdir, 'timeline.trace') with open(timeline_path, 'w') as f: f.write(tl.generate_chrome_trace_format(show_memory=True)) if (valid_loss_value_step < min_valid_loss): min_valid_loss = valid_loss_value_step save(saver, sess, logdir, step) last_saved_step = step else: summary, loss_value, _ = sess.run( [summaries, train_loss, optim]) writer.add_summary(summary, step) duration = time.time() - start_time print('step {:d} - loss = {:.3f}, ({:.3f} sec/step)'.format( step, loss_value, duration)) sys.stdout.flush() except KeyboardInterrupt: # Introduce a line break after ^C is displayed so save message # is on its own line. print() finally: if step > last_saved_step: save(saver, sess, logdir, step) coord.request_stop() coord.join(threads)
def train(self, data, valid_data): stop_batch = self.stop_batch if self.run_opt.is_distrib: self._init_sess_distrib() else: self._init_sess_serial() self.print_head() fp = None if self.run_opt.is_chief: fp = open(self.disp_file, "a") cur_batch = self.sess.run(self.global_step) is_first_step = True self.cur_batch = cur_batch self.run_opt.message( "start training at lr %.2e (== %.2e), decay_step %d, decay_rate %f, final lr will be %.2e" % (self.sess.run(self.learning_rate), self.lr.value(cur_batch), self.lr.decay_steps_, self.lr.decay_rate_, self.lr.value(stop_batch))) prf_options = None prf_run_metadata = None if self.profiling: prf_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) prf_run_metadata = tf.RunMetadata() train_time = 0 while cur_batch < stop_batch: batch_data = data.get_batch(sys_probs=self.sys_probs, auto_prob_style=self.auto_prob_style) feed_dict_batch = {} for kk in batch_data.keys(): if kk == 'find_type' or kk == 'type': continue if 'find_' in kk: feed_dict_batch[self.place_holders[kk]] = batch_data[kk] else: feed_dict_batch[self.place_holders[kk]] = np.reshape( batch_data[kk], [-1]) for ii in ['type']: feed_dict_batch[self.place_holders[ii]] = np.reshape( batch_data[ii], [-1]) for ii in ['natoms_vec', 'default_mesh']: feed_dict_batch[self.place_holders[ii]] = batch_data[ii] feed_dict_batch[self.place_holders['is_training']] = True if self.display_in_training and is_first_step: self.test_on_the_fly(fp, valid_data, feed_dict_batch) is_first_step = False if self.timing_in_training: tic = time.time() self.sess.run([self.train_op], feed_dict=feed_dict_batch, options=prf_options, run_metadata=prf_run_metadata) if self.timing_in_training: toc = time.time() if self.timing_in_training: train_time += toc - tic cur_batch = self.sess.run(self.global_step) self.cur_batch = cur_batch if self.display_in_training and (cur_batch % self.disp_freq == 0): tic = time.time() self.test_on_the_fly(fp, valid_data, feed_dict_batch) toc = time.time() test_time = toc - tic if self.timing_in_training: self._message( "batch %7d training time %.2f s, testing time %.2f s" % (cur_batch, train_time, test_time)) train_time = 0 if self.save_freq > 0 and cur_batch % self.save_freq == 0 and self.run_opt.is_chief: if self.saver is not None: self.saver.save(self.sess, os.getcwd() + "/" + self.save_ckpt) self._message("saved checkpoint %s" % self.save_ckpt) if self.run_opt.is_chief: fp.close() if self.profiling and self.run_opt.is_chief: fetched_timeline = timeline.Timeline(prf_run_metadata.step_stats) chrome_trace = fetched_timeline.generate_chrome_trace_format() with open(self.profiling_file, 'w') as f: f.write(chrome_trace)
def run_model(model, horovod=False, gpu_num=1, output=None, steptime=False, profile=False, timeline=False, loss=False, session=1, step=1, batchsize=None, graph=False): # TODO: description # cannot dump graph if timeline or profile is On if graph and (timeline or profile): raise ValueError("cannot dump graph togother with timeline or tfprof") with tf.Graph().as_default(): times_list = [] losses_list = [] op, _loss = tf_model.get_model(model, batchsize, horovod=horovod) # set gpus available config = tf.ConfigProto() if horovod is True: config.gpu_options.allow_growth = False config.gpu_options.visible_device_list = str(hvd.local_rank()) # print('DEBUG: ', str(hvd.local_rank())) else: # buildup gpus='0,1,2...' config.gpu_options.allow_growth = False gpus = ','.join(map(str, range(gpu_num))) print('DEBUG: gpus=%s' % gpus) config.gpu_options.visible_device_list = gpus for i in range(session): sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) times = [] losses = [] opts = None run_metadata = None # the dump graph mode on if graph: opts = tf.RunOptions(output_partition_graphs=True) run_metadata = tf.RunMetadata() # the profile mode on elif profile or timeline: # create runOptions and run_metadata object opts = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() if profile: # Create a profiler. profiler = model_analyzer.Profiler(sess.graph) for n in range(step): start_time = time.time() # run model if loss is True: res = sess.run([op, _loss], options=opts, run_metadata=run_metadata) losses.append(res[1]) else: res = sess.run(op, options=opts, run_metadata=run_metadata) train_time = time.time() - start_time times.append(train_time) # print steptime and loss at realtime if loss is True: print('Sess%d/%d Step%d/%d: time=%.2fms loss=%.2f' % (i + 1, session, n + 1, step, train_time * 1000, res[1])) else: print('Sess%d/%d Step%d/%d: time=%.2fms' % (i + 1, session, n + 1, step, train_time * 1000)) if (not graph) and profile: profiler.add_step(step=step, run_meta=run_metadata) times_list.append(times) losses_list.append(losses) if output is not None: # make folder if it not exist try: if not os.path.exists(output): os.makedirs(output) except (FileExistsError): print("") file_loss = '_lossOn' if loss else '' file_trace = '_traceOn' if profile or timeline else '' file_horovod = '_hvdRank%d' % hvd.rank() if horovod else '' file_batchsize = '_bs%d' % batchsize if batchsize is not None\ else '_bsDefault' file_gpunum = '_gpunum%d' % gpu_num if steptime is True: filename = '%s%s%s%s%s%s_steptime.csv' %\ (model, file_batchsize, file_loss, file_trace, file_horovod, file_gpunum) output_csv(filename, times_list, path=output, scale=1000) if loss is True: filename = '%s%s%s%s%s%s_loss.csv' % \ (model, file_batchsize, file_loss, file_trace, file_horovod, file_gpunum) output_csv(filename, losses_list, path=output) if graph: # save each partition of graph with _output_shapes attr if horovod: graph_dir = os.path.join( output, '%s%s%s%s_partitionGraph' % (model, file_batchsize, file_loss, file_gpunum), str(hvd.rank())) if not os.path.exists(graph_dir): os.makedirs(graph_dir) save_partition_graph_shapes(run_metadata, graph_dir, 'graph') else: save_partition_graph_shapes( run_metadata, output, '%s%s%s%s%s_partitionGraph' % (model, file_batchsize, file_loss, file_horovod, file_gpunum)) if profile is True: filename = '%s%s%s%s%s_gpunum%d.profile' % \ (model, file_batchsize, file_loss, file_trace, file_horovod, gpu_num) filepath = output + '/' + filename generate_tfprof_profile(profiler, filepath) if timeline is True: filename = '%s%s%s%s%s_gpunum%d.timeline' % \ (model, file_batchsize, file_loss, file_trace, file_horovod, gpu_num) filepath = output + '/' + filename tl = _timeline.Timeline(run_metadata.step_stats) ctf = tl.generate_chrome_trace_format() with open(filepath, 'w') as f: f.write(ctf)
def train(train_data, test_data=None, sampler_name='Uniform'): G = train_data[0] features = train_data[1] id_map = train_data[2] class_map = train_data[4] if isinstance(list(class_map.values())[0], list): num_classes = len(list(class_map.values())[0]) else: num_classes = len(set(class_map.values())) if not features is None: # pad with dummy zero vector features = np.vstack([features, np.zeros((features.shape[1], ))]) context_pairs = train_data[3] if FLAGS.random_context else None placeholders = construct_placeholders(num_classes) minibatch = NodeMinibatchIterator(G, id_map, placeholders, class_map, num_classes, batch_size=FLAGS.batch_size, max_degree=FLAGS.max_degree, context_pairs=context_pairs) adj_info_ph = tf.placeholder(tf.int32, shape=minibatch.adj.shape) adj_info = tf.Variable(adj_info_ph, trainable=False, name="adj_info") adj_shape = adj_info.get_shape().as_list() if FLAGS.model == 'mean_concat': # Create model if sampler_name == 'Uniform': sampler = UniformNeighborSampler(adj_info) elif sampler_name == 'ML': sampler = MLNeighborSampler(adj_info, features) elif sampler_name == 'FastML': sampler = FastMLNeighborSampler(adj_info, features) if FLAGS.samples_3 != 0: layer_infos = [ SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1), SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2), SAGEInfo("node", sampler, FLAGS.samples_3, FLAGS.dim_3) ] elif FLAGS.samples_2 != 0: layer_infos = [ SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1), SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2) ] else: layer_infos = [ SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1) ] # modified model = SupervisedGraphsage(num_classes, placeholders, features, adj_info, minibatch.deg, layer_infos, concat=True, model_size=FLAGS.model_size, sigmoid_loss=FLAGS.sigmoid, identity_dim=FLAGS.identity_dim, logging=True) elif FLAGS.model == 'mean_add': # Create model if sampler_name == 'Uniform': sampler = UniformNeighborSampler(adj_info) elif sampler_name == 'ML': sampler = MLNeighborSampler(adj_info, features) elif sampler_name == 'FastML': sampler = FastMLNeighborSampler(adj_info, features) if FLAGS.samples_3 != 0: layer_infos = [ SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1), SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2), SAGEInfo("node", sampler, FLAGS.samples_3, FLAGS.dim_3) ] elif FLAGS.samples_2 != 0: layer_infos = [ SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1), SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2) ] else: layer_infos = [ SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1) ] # modified model = SupervisedGraphsage(num_classes, placeholders, features, adj_info, minibatch.deg, layer_infos, concat=False, model_size=FLAGS.model_size, sigmoid_loss=FLAGS.sigmoid, identity_dim=FLAGS.identity_dim, logging=True) elif FLAGS.model == 'gcn': if sampler_name == 'Uniform': sampler = UniformNeighborSampler(adj_info) elif sampler_name == 'ML': sampler = MLNeighborSampler(adj_info, features) elif sampler_name == 'FastML': sampler = FastMLNeighborSampler(adj_info, features) layer_infos = [ SAGEInfo("node", sampler, FLAGS.samples_1, 2 * FLAGS.dim_1), SAGEInfo("node", sampler, FLAGS.samples_2, 2 * FLAGS.dim_2) ] model = SupervisedGraphsage(num_classes, placeholders, features, adj_info, minibatch.deg, layer_infos=layer_infos, aggregator_type="gcn", model_size=FLAGS.model_size, concat=False, sigmoid_loss=FLAGS.sigmoid, identity_dim=FLAGS.identity_dim, logging=True) elif FLAGS.model == 'graphsage_seq': sampler = UniformNeighborSampler(adj_info) layer_infos = [ SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1), SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2) ] model = SupervisedGraphsage(num_classes, placeholders, features, adj_info, minibatch.deg, layer_infos=layer_infos, aggregator_type="seq", model_size=FLAGS.model_size, sigmoid_loss=FLAGS.sigmoid, identity_dim=FLAGS.identity_dim, logging=True) elif FLAGS.model == 'graphsage_maxpool': if sampler_name == 'Uniform': sampler = UniformNeighborSampler(adj_info) elif sampler_name == 'ML': sampler = MLNeighborSampler(adj_info, features) elif sampler_name == 'FastML': sampler = FastMLNeighborSampler(adj_info, features) #sampler = UniformNeighborSampler(adj_info) layer_infos = [ SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1), SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2) ] model = SupervisedGraphsage(num_classes, placeholders, features, adj_info, minibatch.deg, layer_infos=layer_infos, aggregator_type="maxpool", model_size=FLAGS.model_size, sigmoid_loss=FLAGS.sigmoid, identity_dim=FLAGS.identity_dim, logging=True) elif FLAGS.model == 'graphsage_meanpool': if sampler_name == 'Uniform': sampler = UniformNeighborSampler(adj_info) elif sampler_name == 'ML': sampler = MLNeighborSampler(adj_info, features) elif sampler_name == 'FastML': sampler = FastMLNeighborSampler(adj_info, features) #sampler = UniformNeighborSampler(adj_info) layer_infos = [ SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1), SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2) ] model = SupervisedGraphsage(num_classes, placeholders, features, adj_info, minibatch.deg, layer_infos=layer_infos, aggregator_type="meanpool", model_size=FLAGS.model_size, sigmoid_loss=FLAGS.sigmoid, identity_dim=FLAGS.identity_dim, logging=True) else: raise Exception('Error: model name unrecognized.') config = tf.ConfigProto(log_device_placement=FLAGS.log_device_placement) config.gpu_options.allow_growth = True #config.gpu_options.per_process_gpu_memory_fraction = GPU_MEM_FRACTION config.allow_soft_placement = True # Initialize session sess = tf.Session(config=config) merged = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(log_dir(sampler_name), sess.graph) # Save model model_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) saver = tf.train.Saver(var_list=model_vars) model_path = './model/' + FLAGS.train_prefix.split( "/")[-1] + '-' + model_prefix() + '-' + sampler_name model_path += hyper_prefix() if not os.path.exists(model_path): os.makedirs(model_path) # Init variables sess.run(tf.global_variables_initializer(), feed_dict={adj_info_ph: minibatch.adj}) # Restore params of ML sampler model if sampler_name == 'ML' or sampler_name == 'FastML': sampler_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="MLsampler") #pdb.set_trace() saver_sampler = tf.train.Saver(var_list=sampler_vars) if FLAGS.allhop_rewards: sampler_model_path = './model/MLsampler-' + FLAGS.train_prefix.split( '/')[-1] + '-' + model_prefix() + '-allhops' else: sampler_model_path = './model/MLsampler-' + FLAGS.train_prefix.split( '/')[-1] + '-' + model_prefix() + '-lasthop' sampler_model_path += hyper_prefix() saver_sampler.restore(sess, sampler_model_path + 'model.ckpt') # Train model total_steps = 0 avg_time = 0.0 epoch_val_costs = [] train_adj_info = tf.assign(adj_info, minibatch.adj) val_adj_info = tf.assign(adj_info, minibatch.test_adj) val_cost_ = [] val_f1_mic_ = [] val_f1_mac_ = [] duration_ = [] epoch_laps_ = [] ln_acc = sparse.csr_matrix((adj_shape[0], adj_shape[0]), dtype=np.float32) lnc_acc = sparse.csr_matrix((adj_shape[0], adj_shape[0]), dtype=np.int32) ln_acc = ln_acc.tolil() lnc_acc = lnc_acc.tolil() #learning_rate = [0.01, 0.001, 0.0001] learning_rate = [FLAGS.learning_rate] for lr_iter in range(len(learning_rate)): for epoch in range(FLAGS.epochs): epoch_time = time.time() minibatch.shuffle() iter = 0 print('Epoch: %04d' % (epoch + 1)) epoch_val_costs.append(0) while not minibatch.end(): # Construct feed dictionary feed_dict, labels = minibatch.next_minibatch_feed_dict() if feed_dict.values()[0] != FLAGS.batch_size: break feed_dict.update({placeholders['dropout']: FLAGS.dropout}) feed_dict.update( {placeholders['learning_rate']: learning_rate[lr_iter]}) t = time.time() # Training step outs = sess.run([ merged, model.opt_op, model.loss, model.preds, model.loss_node, model.loss_node_count ], feed_dict=feed_dict) train_cost = outs[2] if iter % FLAGS.validate_iter == 0: # Validation sess.run(val_adj_info.op) if FLAGS.validate_batch_size == -1: val_cost, val_f1_mic, val_f1_mac, duration = incremental_evaluate( sess, model, minibatch, FLAGS.batch_size) else: val_cost, val_f1_mic, val_f1_mac, duration = evaluate( sess, model, minibatch, FLAGS.validate_batch_size) # accumulate val results val_cost_.append(val_cost) val_f1_mic_.append(val_f1_mic) val_f1_mac_.append(val_f1_mac) duration_.append(duration) # sess.run(train_adj_info.op) epoch_val_costs[-1] += val_cost if total_steps % FLAGS.print_every == 0: summary_writer.add_summary(outs[0], total_steps) # Print results avg_time = (avg_time * total_steps + time.time() - t) / (total_steps + 1) ln = outs[4].values ln_idx = outs[4].indices ln_acc[ln_idx[:, 0], ln_idx[:, 1]] += ln lnc = outs[5].values lnc_idx = outs[5].indices lnc_acc[lnc_idx[:, 0], lnc_idx[:, 1]] += lnc if total_steps % FLAGS.print_every == 0: train_f1_mic, train_f1_mac = calc_f1(labels, outs[3]) print("Iter:", '%04d' % iter, "train_loss=", "{:.5f}".format(train_cost), "train_f1_mic=", "{:.5f}".format(train_f1_mic), "val_loss=", "{:.5f}".format(val_cost), "val_f1_mic=", "{:.5f}".format(val_f1_mic), "time per iter=", "{:.5f}".format(avg_time)) iter += 1 total_steps += 1 if total_steps > FLAGS.max_total_steps: break epoch_laps = time.time() - epoch_time epoch_laps_.append(epoch_laps) print("Epoch time=", "{:.5f}".format(epoch_laps)) if total_steps > FLAGS.max_total_steps: break print("avg time per epoch=", "{:.5f}".format(np.mean(epoch_laps_))) # Save model save_path = saver.save(sess, model_path + 'model.ckpt') print('model is saved at %s' % save_path) # Save loss node and count loss_node_path = './loss_node/' + FLAGS.train_prefix.split( '/')[-1] + '-' + model_prefix() + '-' + sampler_name loss_node_path += hyper_prefix() if not os.path.exists(loss_node_path): os.makedirs(loss_node_path) loss_node = sparse.save_npz(loss_node_path + 'loss_node.npz', sparse.csr_matrix(ln_acc)) loss_node_count = sparse.save_npz(loss_node_path + 'loss_node_count.npz', sparse.csr_matrix(lnc_acc)) print('loss and count per node is saved at %s' % loss_node_path) print("Optimization Finished!") sess.run(val_adj_info.op) # test val_cost_ = [] val_f1_mic_ = [] val_f1_mac_ = [] duration_ = [] print("Writing test set stats to file (don't peak!)") # timeline if FLAGS.timeline == True: run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() else: run_options = None run_metadata = None for iter in range(10): val_cost, val_f1_mic, val_f1_mac, duration = incremental_evaluate( sess, model, minibatch, FLAGS.batch_size, run_options, run_metadata, test=True) print("Full validation stats:", "loss=", "{:.5f}".format(val_cost), "f1_micro=", "{:.5f}".format(val_f1_mic), "time=", "{:.5f}".format(duration)) val_cost_.append(val_cost) val_f1_mic_.append(val_f1_mic) duration_.append(duration) print("mean: loss={:.5f} f1_micro={:.5f} time={:.5f}\n".format( np.mean(val_cost_), np.mean(val_f1_mic_), np.mean(duration_))) # write test results with open(log_dir(sampler_name) + "test_stats.txt", "w") as fp: for iter in range(10): fp.write("loss={:.5f} f1_micro={:.5f} time={:.5f}\n".format( val_cost_[iter], val_f1_mic_[iter], duration_[iter])) fp.write("mean: loss={:.5f} f1_micro={:.5f} time={:.5f}\n".format( np.mean(val_cost_), np.mean(val_f1_mic_), np.mean(duration_))) fp.write("variance: loss={:.5f} f1_micro={:.5f} time={:.5f}\n".format( np.var(val_cost_), np.var(val_f1_mic_), np.var(duration_))) # create timeline object, and write it to a json if FLAGS.timeline == True: tl = timeline.Timeline(run_metadata.step_stats) ctf = tl.generate_chrome_trace_format(show_memory=True) with open(log_dir(sampler_name) + 'timeline.json', 'w') as f: print('timeline written at %s' % (log_dir(sampler_name) + 'timelnie.json')) f.write(ctf) sess.close() tf.reset_default_graph()
def im_detect(sess, net, im, boxes=None): """Detect object classes in an image given object proposals. Arguments: net (caffe.Net): Fast R-CNN network to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals Returns: scores (ndarray): R x K array of object class scores (K includes background as object category 0) boxes (ndarray): R x (4*K) array of predicted bounding boxes """ blobs, im_scales = _get_blobs(im, boxes) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) blobs['rois'] = blobs['rois'][index, :] boxes = boxes[index, :] if cfg.TEST.HAS_RPN: im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) # forward pass if cfg.TEST.HAS_RPN: feed_dict = { net.data: blobs['data'], net.im_info: blobs['im_info'], net.keep_prob: 1.0 } else: feed_dict = { net.data: blobs['data'], net.rois: blobs['rois'], net.keep_prob: 1.0 } run_options = None run_metadata = None if cfg.TEST.DEBUG_TIMELINE: run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() cls_score, cls_prob, bbox_pred, rois = sess.run([ net.get_output('cls_score'), net.get_output('cls_prob'), net.get_output('bbox_pred'), net.get_output('rois') ], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) if cfg.TEST.HAS_RPN: assert len(im_scales) == 1, "Only single-image batch implemented" boxes = rois[:, 1:5] / im_scales[0] if cfg.TEST.SVM: # use the raw scores before softmax under the assumption they # were trained as linear SVMs scores = cls_score else: # use softmax estimated probabilities scores = cls_prob if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = _clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] if cfg.TEST.DEBUG_TIMELINE: trace = timeline.Timeline(step_stats=run_metadata.step_stats) trace_file = open( str(long(time.time() * 1000)) + '-test-timeline.ctf.json', 'w') trace_file.write(trace.generate_chrome_trace_format(show_memory=False)) trace_file.close() return scores, pred_boxes
def train(self, names_train, y_train, names_valid, y_valid, model_path, batch_size=128, patience=1024000, stat_interval=1000, valid_interval=1000, summary_interval=1000, valid_batch_size=2048, profile=False): """Train a gender classifier on the name/gender pairs.""" start_time = time() def add_metric_summaries(mode, iteration, name2metric): """Add summary for metric.""" metric_summary = tf.Summary() for name, metric in name2metric.items(): metric_summary.value.add(tag='{}_{}'.format(mode, name), simple_value=metric) summary_writer.add_summary(metric_summary, global_step=iteration) def show_train_stats(epoch, iteration, losses, y_cat, y_cat_pred): # compute mean statistics loss = np.mean(losses) accuracy = accuracy_score(y_cat, y_cat_pred) score = accuracy - loss _LOGGER.info( 'Epoch={}, Iter={:,}, Mean Training Loss={:.4f}, Accuracy={:.4f}, ' 'Accuracy - Loss={:.4f}'.format(epoch, iteration, loss, accuracy, score)) add_metric_summaries( 'train', iteration, { 'cross_entropy': loss, 'accuracy': accuracy, 'accuracy - loss': score }) _LOGGER.info('\n{}'.format( classification_report(y_cat, y_cat_pred, digits=3))) return list(), list(), list() def validate(epoch, iteration, X, y, best_score, patience): """Validate the model on validation set.""" batch_generator = BatchGenerator(X, y, batch_size=valid_batch_size, valid=True) losses, y_cat, y_cat_pred = list(), list(), list() for X_batch, y_batch in batch_generator: X_batch, word_lens, char_lens = self._add_padding(X_batch) loss, y_pred = session.run( [nodes['loss'], nodes['y_pred']], feed_dict={ nodes['X']: X_batch, nodes['y']: y_batch, nodes['word_lens']: word_lens, nodes['char_lens']: char_lens, nodes['is_train']: False }, options=run_options, run_metadata=run_metadata) losses.append(loss) y_cat.extend(self._categorize_y(y_batch)) y_cat_pred.extend(self._categorize_y(y_pred)) # compute mean statistics loss = np.mean(losses) accuracy = accuracy_score(y_cat, y_cat_pred) score = accuracy - loss _LOGGER.info( 'Epoch={}, Iter={:,}, Validation Loss={:.4f}, Accuracy={:.4f}, ' 'Accuracy - Loss={:.4f}'.format(epoch, iteration, loss, accuracy, score)) add_metric_summaries( 'valid', iteration, { 'cross_entropy': loss, 'accuracy': accuracy, 'accuracy - loss': score }) _LOGGER.info('\n{}'.format( classification_report(y_cat, y_cat_pred, digits=3))) if score > best_score: _LOGGER.info( 'Best score (Accuracy - Loss) so far, save the model.') self._save(model_path, session) best_score = score if iteration * 2 > patience: patience = iteration * 2 _LOGGER.info('Increased patience to {:,}'.format(patience)) if run_metadata: with open(_VALID_PROFILE_FILE, 'w') as file_: file_.write( timeline.Timeline(run_metadata.step_stats). generate_chrome_trace_format()) return best_score, patience _LOGGER.info('Prepare inputs and other variables for the model...') self._fit_encoder(names_train + names_valid) X_train = self._encode_chars(names_train) X_valid = self._encode_chars(names_valid) train_size = len(X_train) train_batch_generator = BatchGenerator(X_train, y_train, batch_size) best_valid_score = np.float64('-inf') losses = list() y_cat = list() y_cat_pred = list() iteration = 0 # profiler run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) if profile else None run_metadata = tf.RunMetadata() if profile else None _LOGGER.info('Building the tensorflow graph...') self._build_graph() nodes = self._nodes session = tf.Session(graph=self._graph) summary_writer = tf.summary.FileWriter( os.path.join(model_path, self._tensorboard_dir), session.graph) self._visualize_embedding(model_path, summary_writer) session.run(nodes['init']) _LOGGER.info('Start fitting a model...') # iterate over batches for batch_id, (X_batch, y_batch) in enumerate(train_batch_generator): epoch = 1 + iteration // train_size if batch_id % summary_interval == 0: summaries = session.run(nodes['summaries']) summary_writer.add_summary(summaries, global_step=iteration) X_batch, word_lens, char_lens = self._add_padding(X_batch) # Predict labels and update the parameters _, loss, y_pred = session.run( [nodes['optimizer'], nodes['loss'], nodes['y_pred']], feed_dict={ nodes['X']: X_batch, nodes['y']: y_batch, nodes['word_lens']: word_lens, nodes['char_lens']: char_lens, nodes['is_train']: True }, options=run_options, run_metadata=run_metadata) losses.append(loss) y_cat.extend(self._categorize_y(y_batch)) y_cat_pred.extend(self._categorize_y(y_pred)) iteration += batch_size if run_metadata: with open(_TRAIN_PROFILE_FILE, 'w') as file_: file_.write( timeline.Timeline(run_metadata.step_stats). generate_chrome_trace_format()) if batch_id % stat_interval == 0: losses, y_cat, y_cat_pred = show_train_stats( epoch, iteration, losses, y_cat, y_cat_pred) if batch_id % valid_interval == 0: best_valid_score, patience = validate(epoch, iteration, X_valid, y_valid, best_valid_score, patience) if iteration > patience: _LOGGER.info( 'Iteration is more than patience, finish training.') break _LOGGER.info('Finished fitting the model.') _LOGGER.info( 'Best Validation Score (Accuracy - Cross-entropy Loss): {:.4f}'. format(best_valid_score)) # close the session session.close() end_time = time() _LOGGER.info('Took {:,} seconds to train the model.'.format( int(end_time - start_time))) return best_valid_score
def main(): def _str_to_bool(s): """Convert string to bool (in argparse context).""" if s.lower() not in ['true', 'false']: raise ValueError( 'Argument needs to be a boolean, got {}'.format(s)) return {'true': True, 'false': False}[s.lower()] parser = argparse.ArgumentParser(description='WaveNet example network') DATA_DIRECTORY = 'D:\\hccho\\Tacotron-Wavenet-Vocoder-hccho\\data\\moon,D:\\hccho\\Tacotron-Wavenet-Vocoder-hccho\\data\\son' #DATA_DIRECTORY = 'D:\\hccho\\Tacotron-Wavenet-Vocoder-hccho\\data\\moon' parser.add_argument('--data_dir', type=str, default=DATA_DIRECTORY, help='The directory containing the VCTK corpus.') #LOGDIR = None LOGDIR = './/logdir-wavenet//train//2019-03-27T20-27-18' parser.add_argument( '--logdir', type=str, default=LOGDIR, help= 'Directory in which to store the logging information for TensorBoard. If the model already exists, it will restore the state and will continue training. Cannot use with --logdir_root and --restore_from.' ) parser.add_argument( '--logdir_root', type=str, default=None, help= 'Root directory to place the logging output and generated model. These are stored under the dated subdirectory of --logdir_root. Cannot use with --logdir.' ) parser.add_argument( '--restore_from', type=str, default=None, help= 'Directory in which to restore the model from. This creates the new model under the dated directory in --logdir_root. Cannot use with --logdir.' ) CHECKPOINT_EVERY = 1000 # checkpoint 저장 주기 parser.add_argument( '--checkpoint_every', type=int, default=CHECKPOINT_EVERY, help='How many steps to save each checkpoint after. Default: ' + str(CHECKPOINT_EVERY) + '.') parser.add_argument('--eval_every', type=int, default=2, help='Steps between eval on test data') config = parser.parse_args() # command 창에서 입력받을 수 있는 조건 config.data_dir = config.data_dir.split(",") try: directories = validate_directories(config, default_hparams) except ValueError as e: print("Some arguments are wrong:") print(str(e)) return logdir = directories['logdir'] restore_from = directories['restore_from'] # Even if we restored the model, we will treat it as new training # if the trained model is written into an arbitrary location. is_overwritten_training = logdir != restore_from log_path = os.path.join(logdir, 'train.log') infolog.init(log_path, logdir) global_step = tf.Variable(0, name='global_step', trainable=False) if default_hparams.l2_regularization_strength == 0: default_hparams.l2_regularization_strength = None # Create coordinator. coord = tf.train.Coordinator() num_speakers = len(config.data_dir) # Load raw waveform from VCTK corpus. with tf.name_scope('create_inputs'): # Allow silence trimming to be skipped by specifying a threshold near # zero. silence_threshold = default_hparams.silence_threshold if default_hparams.silence_threshold > EPSILON else None gc_enable = True # Before: num_speakers > 1 After: 항상 True # AudioReader에서 wav 파일을 잘라 input값을 만든다. receptive_field길이만큼을 앞부분에 pad하거나 앞조각에서 가져온다. (receptive_field+ sample_size)크기로 자른다. reader = DataFeederWavenet( coord, config.data_dir, batch_size=default_hparams.wavenet_batch_size, gc_enable=gc_enable, test_mode=False) # test를 위한 DataFeederWavenet를 하나 만들자. 여기서는 딱 1개의 파일만 가져온다. reader_test = DataFeederWavenet(coord, config.data_dir, batch_size=1, gc_enable=gc_enable, test_mode=True, queue_size=1) audio_batch, lc_batch, gc_id_batch = reader.inputs_wav, reader.local_condition, reader.speaker_id # Create train network. net = create_network(default_hparams, default_hparams.wavenet_batch_size, num_speakers, is_training=True) net.add_loss( input_batch=audio_batch, local_condition=lc_batch, global_condition_batch=gc_id_batch, l2_regularization_strength=default_hparams.l2_regularization_strength, upsample_type=default_hparams.upsample_type) net.add_optimizer(default_hparams, global_step) run_metadata = tf.RunMetadata() # Set up session sess = tf.Session(config=tf.ConfigProto(log_device_placement=False) ) # log_device_placement=False --> cpu/gpu 자동 배치. init = tf.global_variables_initializer() sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver( var_list=tf.global_variables(), max_to_keep=default_hparams.max_checkpoints) # 최대 checkpoint 저장 갯수 지정 try: start_step = load(saver, sess, restore_from) # checkpoint load if is_overwritten_training or start_step is None: # The first training step will be saved_global_step + 1, # therefore we put -1 here for new or overwritten trainings. zero_step_assign = tf.assign(global_step, 0) sess.run(zero_step_assign) start_step = 0 except: print( "Something went wrong while restoring checkpoint. We will terminate training to avoid accidentally overwriting the previous model." ) raise ########### reader.start_in_session(sess, start_step) reader_test.start_in_session(sess, start_step) ################### Create test network. <---- Queue 생성 때문에, sess restore후 test network 생성 net_test = create_network(default_hparams, 1, num_speakers, is_training=False) if default_hparams.scalar_input: samples = tf.placeholder(tf.float32, shape=[net_test.batch_size, None]) waveform = 2 * np.random.rand(net_test.batch_size).reshape( net_test.batch_size, -1) - 1 else: samples = tf.placeholder(tf.int32, shape=[ net_test.batch_size, None ]) # samples: mu_law_encode로 변환된 것. one-hot으로 변환되기 전. (batch_size, 길이) waveform = np.random.randint(default_hparams.quantization_channels, size=net_test.batch_size).reshape( net_test.batch_size, -1) upsampled_local_condition = tf.placeholder( tf.float32, shape=[net_test.batch_size, default_hparams.num_mels]) speaker_id = tf.placeholder(tf.int32, shape=[net_test.batch_size]) next_sample = net_test.predict_proba_incremental( samples, upsampled_local_condition, speaker_id ) # Fast Wavenet Generation Algorithm-1611.09482 algorithm 적용 sess.run(net_test.queue_initializer) # test를 위한 placeholder는 모두 3개: samples,speaker_id,upsampled_local_condition # test용 mel-spectrogram을 하나 뽑자. 그것을 고정하지 않으면, thread가 계속 돌아가면서 data를 읽어온다. reader_test의 역할은 여기서 끝난다. mel_input_test, speaker_id_test = sess.run( [reader_test.local_condition, reader_test.speaker_id]) with tf.variable_scope('wavenet', reuse=tf.AUTO_REUSE): upsampled_local_condition_data = net_test.create_upsample( mel_input_test, upsample_type=default_hparams.upsample_type) upsampled_local_condition_data_ = sess.run( upsampled_local_condition_data ) # upsampled_local_condition_data_ 을 feed_dict로 placehoder인 upsampled_local_condition에 넣어준다. ###################################################### start_step = sess.run(global_step) step = last_saved_step = start_step try: while not coord.should_stop(): start_time = time.time() if default_hparams.store_metadata and step % 50 == 0: # Slow run that stores extra information for debugging. log('Storing metadata') run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) step, loss_value, _ = sess.run( [global_step, net.loss, net.optimize], options=run_options, run_metadata=run_metadata) tl = timeline.Timeline(run_metadata.step_stats) timeline_path = os.path.join(logdir, 'timeline.trace') with open(timeline_path, 'w') as f: f.write(tl.generate_chrome_trace_format(show_memory=True)) else: step, loss_value, _ = sess.run( [global_step, net.loss, net.optimize]) duration = time.time() - start_time log('step {:d} - loss = {:.3f}, ({:.3f} sec/step)'.format( step, loss_value, duration)) if step % config.checkpoint_every == 0: save(saver, sess, logdir, step) last_saved_step = step if step % config.eval_every == 0: # config.eval_every eval_step(sess, logdir, step, waveform, upsampled_local_condition_data_, speaker_id_test, mel_input_test, samples, speaker_id, upsampled_local_condition, next_sample) if step >= default_hparams.num_steps: # error message가 나오지만, 여기서 멈춘 것은 맞다. raise Exception('End xxx~~~yyy') except Exception as e: print('finally') log('Exiting due to exception: %s' % e, slack=True) #if step > last_saved_step: # save(saver, sess, logdir, step) traceback.print_exc() coord.request_stop(e)
def train_mnist_cnn(FLAGS): # Config config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False, inter_op_parallelism_threads=1) # Enable the custom optimizer using the rewriter config options # CRL-ORIG: config = ngraph_bridge.update_config(config) # Note: Additional configuration option to boost performance is to set the # following environment for the run: # OMP_NUM_THREADS=44 KMP_AFFINITY=granularity=fine,scatter # The OMP_NUM_THREADS number should correspond to the number of # cores in the system # Set Seed shuffle_batch = True if FLAGS.make_deterministic: seed = 1 tf.random.set_random_seed(seed) shuffle_batch = False supported_optimizers = ["adam", "sgd"] assert (FLAGS.optimizer in supported_optimizers), "Optimizer not supported" # Import data mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True) # Create the model x = tf.placeholder(tf.float32, [None, 784]) # Define loss and optimizer y_ = tf.placeholder(tf.float32, [None, 10]) # Build the graph for the deep net y_conv, keep_prob = deepnn(x) with tf.name_scope('loss'): cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv) cross_entropy = tf.reduce_mean(cross_entropy) optimizer_scope = FLAGS.optimizer + "_optimizer" with tf.name_scope(optimizer_scope): if FLAGS.optimizer == "adam": train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) elif FLAGS.optimizer == "sgd": train_step = tf.train.GradientDescentOptimizer(1e-4).minimize( cross_entropy) with tf.name_scope('accuracy'): correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1)) correct_prediction = tf.cast(correct_prediction, tf.float32) accuracy = tf.reduce_mean(correct_prediction) tf.summary.scalar('Training accuracy', accuracy) tf.summary.scalar('Loss function', cross_entropy) graph_location = "./tf-profile-train" print('Saving graph to: %s' % graph_location) merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(graph_location) train_writer.add_graph(tf.get_default_graph()) saver = tf.train.Saver() with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) train_loops = FLAGS.train_loop_count loss_values = [] for i in range(train_loops): batch = mnist.train.next_batch(FLAGS.batch_size, shuffle=shuffle_batch) if i % 10 == 0: t = time.time() train_accuracy = accuracy.eval(feed_dict={ x: batch[0], y_: batch[1], keep_prob: 1.0 }) #tf.summary.scalar('Training accuracy', train_accuracy) print('step %d, training accuracy %g, %g sec to evaluate' % (i, train_accuracy, time.time() - t)) t = time.time() run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() _, summary, loss = sess.run([train_step, merged, cross_entropy], feed_dict={ x: batch[0], y_: batch[1], keep_prob: 0.5 }, options=run_options, run_metadata=run_metadata) train_writer.add_run_metadata(run_metadata, 'cnn_' + "step_{}".format(i), i) if (i >= 100) and ( i < 105): # Only write timelines for steps 100 through 104 fetched_timeline = timeline.Timeline(run_metadata.step_stats) chrome_trace = fetched_timeline.generate_chrome_trace_format() with open(graph_location + '/timeline_{}.json'.format(i), 'w') as f: f.write(chrome_trace) loss_values.append(loss) print('step %d, loss %g, %g sec for training step' % (i, loss, time.time() - t)) train_writer.add_summary(summary, i) print("Training finished. Running test") num_test_images = FLAGS.test_image_count x_test = mnist.test.images[:num_test_images] y_test = mnist.test.labels[:num_test_images] test_accuracy = accuracy.eval(feed_dict={ x: x_test, y_: y_test, keep_prob: 1.0 }) print('test accuracy %g' % test_accuracy) saver.save(sess, FLAGS.model_dir) return loss_values, test_accuracy
def train_model(job_id): model_type_list = cfg_para.multi_model_type num_layer_list = cfg_para.multi_num_layer activation_list = cfg_para.multi_activation batch_size_list = cfg_para.multi_batch_size learning_rate_list = cfg_para.multi_learning_rate optimizer_list = cfg_para.multi_opt model_type = model_type_list[job_id] num_layer = num_layer_list[job_id] activation = activation_list[job_id] batch_size = batch_size_list[job_id] learning_rate = learning_rate_list[job_id] optimizer = optimizer_list[job_id] num_epoch = cfg_para.multi_num_epoch train_dataset = cfg_para.multi_train_dataset use_tf_timeline = cfg_para.multi_use_tb_timeline use_cpu = cfg_para.multi_use_cpu if use_cpu: train_device = '/cpu:0' else: train_device = '/gpu:0' model_name = '{0}-{1}-{2}-{3}-{4}-{5}-{6}-{7}'.format( job_id, model_type, num_layer, batch_size, learning_rate, optimizer, num_epoch, train_dataset) ########################################## # load dataset ########################################## img_width, img_height, num_channel, num_class = load_dataset_para( train_dataset) train_feature_input, train_label_input = load_train_dataset(train_dataset) ########################################## # build model ########################################## features = tf.placeholder(tf.float32, [None, img_width, img_height, num_channel]) labels = tf.placeholder(tf.int64, [None, num_class]) dm = ModelImporter(model_type, str(job_id), num_layer, img_height, img_width, num_channel, num_class, batch_size, optimizer, learning_rate, activation, batch_padding=False) model_entity = dm.get_model_entity() model_logit = model_entity.build(features, is_training=True) train_op = model_entity.train(model_logit, labels) ########################################## # train model ########################################## step_time = 0 step_count = 0 config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True if train_dataset == 'imagenet': image_list = sorted(os.listdir(train_feature_input)) with tf.device(train_device): with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) num_batch = train_label_input.shape[0] // batch_size for e in range(num_epoch): for i in range(num_batch): print('epoch %d / %d, step %d / %d' % (e + 1, num_epoch, i + 1, num_batch)) if i != 0: start_time = timer() batch_offset = i * batch_size batch_end = (i + 1) * batch_size if train_dataset == 'imagenet': batch_list = image_list[batch_offset:batch_end] train_feature_batch = load_imagenet_raw( train_feature_input, batch_list, img_height, img_width) else: train_feature_batch = train_feature_input[ batch_offset:batch_end] train_label_batch = train_label_input[ batch_offset:batch_end] if use_tf_timeline: profile_path = cfg_path.profile_path run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() sess.run(train_op, feed_dict={ features: train_feature_batch, labels: train_label_batch }, options=run_options, run_metadata=run_metadata) trace = timeline.Timeline( step_stats=run_metadata.step_stats) trace_file = open( profile_path + '/' + str(model_type) + '-' + str(batch_size) + '-' + str(i) + '.json', 'w') trace_file.write( trace.generate_chrome_trace_format( show_dataflow=True, show_memory=True)) else: sess.run(train_op, feed_dict={ features: train_feature_batch, labels: train_label_batch }) if i != 0: end_time = timer() dur_time = end_time - start_time print("step time:", dur_time) step_time += dur_time step_count += 1 step_time_result = f'average step time (ms) of {model_name}: {step_time / step_count * 1000}' return step_time_result
def main(): configproto = tf.ConfigProto() configproto.gpu_options.allow_growth = True configproto.log_device_placement = args.log_device configproto.allow_soft_placement = args.soft_placement configproto.inter_op_parallelism_threads = args.num_cores configproto.intra_op_parallelism_threads = args.num_cores with tf.Graph().as_default(), tf.Session(config=configproto) as sess: run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) if show_run_meta else None run_metadata = tf.RunMetadata() if show_run_meta else None model = graph_moudle.Model() model.init_global_step() vt, vs, vo = model.model_setup() tf.initialize_all_variables().run() cnt = 0 for var in vs: cnt += 1 str_line = str(cnt) + '. ' + str(var.name) + ': ' + str( var.get_shape()) print(str_line) ssll = input('aaaaa') np.random.seed(1234567890) qs = np.random.randint(0, args.vocab_size, [10, args.batchsize, args.max_sent_length]) qsm = np.ones_like(qs, dtype=np.float32) qsm[:, :, -1:] = 0 ts = np.random.randint(0, args.vocab_size, [10, args.batchsize, 3, args.max_sent_length]) tsm = np.ones_like(ts, dtype=np.float32) g = np.random.randint(1, 3, (10, args.batchsize)) for i in range(400): bs = i % 10 if bs == 0: idx = np.random.shuffle(np.arange(10 * args.batchsize)) qs = np.reshape(qs, [10 * args.batchsize, -1])[idx] ts = np.reshape(ts, [10 * args.batchsize, -1])[idx] g = np.reshape(g, [10 * args.batchsize, -1])[idx] qs = np.reshape(qs, qsm.shape) ts = np.reshape(ts, tsm.shape) g = np.reshape(g, (10, args.batchsize)) stime = time.time() loss, regu_loss = 0, 0 step, loss, pair_loss, regu_loss, acc, acc01, score, _ = \ model.run_epoch(sess, [qs[bs], qsm[bs], ts[bs], tsm[bs], g[bs], True], run_options=run_options, run_metadata=run_metadata) ''' pair_loss, acc, acc01, score = \ model.run_epoch(sess, [qs[bs], qsm[bs], ts[bs], tsm[bs], g[bs], False], run_options=run_options, run_metadata=run_metadata) ''' print(loss, pair_loss, regu_loss, acc, acc01, time.time() - stime) print(score[0, :], g[bs][0]) if show_run_meta: tl = timeline.Timeline(run_metadata.step_stats) ctf = tl.generate_chrome_trace_format(show_memory=True) with open(args.log_dir_path + '/timeline.json', 'w') as f: f.write(ctf)
def train(hparams): """Build and train the model as specified in hparams""" ckptsdir = str(Path(hparams.modeldir, "ckpts")) # build training and eval graphs train_tuple = create_model(hparams, tf.contrib.learn.ModeKeys.TRAIN) eval_tuple = create_model(hparams, tf.contrib.learn.ModeKeys.EVAL) with train_tuple.graph.as_default(): initializer = tf.global_variables_initializer() train_tables_initializer = tf.tables_initializer() with eval_tuple.graph.as_default(): local_initializer = tf.local_variables_initializer() eval_tables_initializer = tf.tables_initializer() # Summary writers summary_writer = tf.summary.FileWriter(hparams.modeldir, train_tuple.graph, max_queue=25, flush_secs=30) if hparams.saved is not None: # load checkpoint train_tuple.model.saver.restore(train_tuple.session, hparams.saved) else: train_tuple.session.run([initializer]) start_time = process_time() # initialize the training dataset train_tuple.session.run([train_tables_initializer]) train_tuple.session.run([train_tuple.iterator.initializer]) # initialize the eval table only once eval_tuple.session.run([eval_tables_initializer]) # finalize the graph train_tuple.graph.finalize() profile_next_step = False profiled = False # Train until the dataset throws an error (at the end of num_epochs) while True: step_time = [] try: curr_time = process_time() if False: #if not profiled and profile_next_step: print("Running training step with profiling") # run profiling _, train_loss, global_step, _, summary, metadata = train_tuple.model.\ train_with_profile(train_tuple.session, summary_writer) # write the metadata out to a chrome trace file trace = timeline.Timeline(step_stats=metadata.step_stats) with open(hparams.modeldir + "/timeline.ctf.json", "w") as tracefile: tracefile.write(trace.generate_chrome_trace_format()) profile_next_step = False profiled = True else: _, train_loss, global_step, _, summary = train_tuple.model.train( train_tuple.session) step_time.append(process_time() - curr_time) # write train summaries if global_step == 1: summary_writer.add_summary(summary, global_step) if global_step % 15 == 0: summary_writer.add_summary(summary, global_step) print("Step: %d, Training Loss: %f, Avg Sec/Step: %2.2f" % (global_step, train_loss, np.mean(step_time))) if global_step % 100 == 0: step_time = [] profile_next_step = True # Do one evaluation checkpoint_path = train_tuple.model.saver.save( train_tuple.session, ckptsdir + "/ckpt", global_step=global_step) print(checkpoint_path) eval_tuple.model.saver.restore(eval_tuple.session, checkpoint_path) eval_tuple.session.run( [eval_tuple.iterator.initializer, local_initializer]) while True: try: eval_loss, eval_acc, eval_summary, _ = eval_tuple.model.eval( eval_tuple.session) # summary_writer.add_summary(summary, global_step) except tf.errors.OutOfRangeError: print("Step: %d, Eval Loss: %f, Eval Accuracy: %f" % (global_step, eval_loss, eval_acc)) summary_writer.add_summary(eval_summary, global_step) break except tf.errors.OutOfRangeError: print("- End of Trainig -") break # End of training summary_writer.close() print("Total Training Time: %4.2f" % (process_time() - start_time))
def train_step(self, sess, train_op, global_step, train_step_kwargs): """Function that takes a gradient step and specifies whether to stop. Args: sess: The current session. train_op: An `Operation` that evaluates the gradients and returns the total loss. global_step: A `Tensor` representing the global training step. train_step_kwargs: A dictionary of keyword arguments. Returns: The total loss and a boolean indicating whether or not to stop training. Raises: ValueError: if 'should_trace' is in `train_step_kwargs` but `logdir` is not. """ start_time = time.time() trace_run_options = None run_metadata = None if 'should_trace' in train_step_kwargs: if 'logdir' not in train_step_kwargs: raise ValueError( 'logdir must be present in train_step_kwargs when ' 'should_trace is present') if sess.run(train_step_kwargs['should_trace']): trace_run_options = config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE) run_metadata = config_pb2.RunMetadata() total_loss, np_global_step = sess.run([train_op, global_step], options=trace_run_options, run_metadata=run_metadata) time_elapsed = time.time() - start_time # self.debug_training(sess,global_step) if run_metadata is not None: tl = timeline.Timeline(run_metadata.step_stats) trace = tl.generate_chrome_trace_format() trace_filename = os.path.join(train_step_kwargs['logdir'], 'tf_trace-%d.json' % np_global_step) logging.info('Writing trace to %s', trace_filename) file_io.write_string_to_file(trace_filename, trace) if 'summary_writer' in train_step_kwargs: train_step_kwargs['summary_writer'].add_run_metadata( run_metadata, 'run_metadata-%d' % np_global_step) if 'should_log' in train_step_kwargs: if sess.run(train_step_kwargs['should_log']): logging.info('global step %d: loss = %.4f (%.2f sec/step)', np_global_step, total_loss, time_elapsed) # TODO(nsilberman): figure out why we can't put this into sess.run. The # issue right now is that the stop check depends on the global step. The # increment of global step often happens via the train op, which used # created using optimizer.apply_gradients. # # Since running `train_op` causes the global step to be incremented, one # would expected that using a control dependency would allow the # should_stop check to be run in the same session.run call: # # with ops.control_dependencies([train_op]): # should_stop_op = ... # # However, this actually seems not to work on certain platforms. if 'should_stop' in train_step_kwargs: should_stop = sess.run(train_step_kwargs['should_stop']) else: should_stop = False return total_loss, should_stop
feed_dict={ x: batch_xs, y_true: batch_ys, keep_prob: 0.5 }, options=run_options, run_metadata=run_metadata) writer.add_summary(summary_str, i) if (i % 10) == 0: test_xs, test_ys = mnist.test.next_batch(100) #test_xs, test_ys = [mnist.test.images, mnist.test.labels] train_acc = sess.run(accuracy, feed_dict={ x: batch_xs, y_true: batch_ys, keep_prob: 1 }) test_acc = sess.run(accuracy, feed_dict={ x: test_xs, y_true: test_ys, keep_prob: 1 }) print('Step %.4d : train_err = %.2f%% ; test_err = %.2f%%' % (i, (1 - train_acc) * 100, (1 - test_acc) * 100)) trace = timeline.Timeline(step_stats=run_metadata.step_stats) with open('/tmp/layers/timeline.ctf.json', 'w') as outfile: outfile.write(trace.generate_chrome_trace_format())
step = None last_saved_step = saved_global_step minvalloss = 10000 try: for step in range(saved_global_step + 1, args.num_steps): start_time = time.time() if args.store_metadata and step % 50 == 0: # Slow run that stores extra information for debugging. print('Storing metadata') run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) summary, trloss_value, _ = sess.run([summaries, trloss, optim], options=run_options, run_metadata=run_metadata) writer.add_summary(summary, step) writer.add_run_metadata(run_metadata, 'step_{:04d}'.format(step)) tl = timeline.Timeline(run_metadata.step_stats) timeline_path = os.path.join(logdir, 'timeline.trace') with open(timeline_path, 'w') as f: f.write(tl.generate_chrome_trace_format(show_memory=True)) else: summary, trloss_value, _ = sess.run([summaries, trloss, optim]) writer.add_summary(summary, step) duration = time.time() - start_time print('step {:d} - trloss = {:.3f}, ({:.3f} sec/step)'.format( step, trloss_value, duration)) if step % args.checkpoint_every == 0: valloss_value = sess.run(valloss) print('validateLoss = {:.3f}, ({:.3f} sec/step)'.format( valloss_value, duration)) if (valloss_value < minvalloss):
def training(self, sess, train_writer): with tf.name_scope('loss_function'): RNet_rpn_yaw_pred = self.net.get_output('RNet_theta')[1] RNet_rpn_yaw_gt_delta = self.net.get_output('cubic_grid')[1] RNet_rpn_yaw_gt = self.net.get_output( 'rpn_rois' )[1][:, -1] #rpn_3d_boxes:(x1,y1,z1),(x2,y2,z2),score,rpn_cls_label,yaw RNet_rpn_yaw_gt_new = RNet_rpn_yaw_gt - RNet_rpn_yaw_gt_delta RNet_rpn_yaw_pred_toshow = RNet_rpn_yaw_pred + RNet_rpn_yaw_gt_delta rpn_cls_labels = self.net.get_output( 'rpn_rois' )[1][:, -2] #rpn_3d_boxes:(x1,y1,z1),(x2,y2,z2),score,rpn_cls_label,yaw RNet_rpn_yaw_pred = self.angle_trans(RNet_rpn_yaw_pred) RNet_rpn_yaw_gt_new = self.angle_trans(RNet_rpn_yaw_gt_new) debug_pred = tf.multiply(rpn_cls_labels, self.angle_trans(RNet_rpn_yaw_pred)) debug_gt = tf.multiply(rpn_cls_labels, self.angle_trans(RNet_rpn_yaw_gt_new)) tower_l1_loss = self.Rnet_modified_smooth_l1( sigma=3, bbox_pred=RNet_rpn_yaw_pred, bbox_targets=RNet_rpn_yaw_gt_new) tower_l1_loss_keep_positive = tf.multiply(rpn_cls_labels, tower_l1_loss) loss = tf.reduce_sum(tower_l1_loss_keep_positive) / ( 1e-5 + tf.reduce_sum( tf.cast(tf.not_equal(tower_l1_loss_keep_positive, 0.0), dtype=tf.float32))) with tf.name_scope('train_op'): global_step = tf.Variable(1, trainable=False, name='Global_Step') lr = tf.train.exponential_decay(cfg.TRAIN.LEARNING_RATE, global_step, 10000, 0.90, name='decay-Lr') Optimizer = tf.train.AdamOptimizer(lr) var_and_grad = Optimizer.compute_gradients( loss, var_list=tf.trainable_variables()) train_op = Optimizer.minimize(loss, global_step=global_step) with tf.name_scope('debug_board'): tf.summary.scalar('total_loss', loss) glb_var = tf.trainable_variables() for i in range(len(glb_var)): tf.summary.histogram(glb_var[i].name, glb_var[i]) tf.summary.image('theta', self.net.get_output('RNet_theta')[0], max_outputs=50) merged = tf.summary.merge_all() #hxd: before the next summary ops with tf.name_scope('epoch_valid'): epoch_cube_theta = tf.placeholder(dtype=tf.float32) epoch_cube_theta_sum_op = tf.summary.scalar( 'valid_los', epoch_cube_theta) sess.run(tf.global_variables_initializer()) if self.args.fine_tune: if True: # #full graph restore print 'Loading pre-trained model weights from {:s}'.format( self.args.weights) self.net.load(self.args.weights, sess, self.saver, True) else: # #part graph restore # # METHOD one # ref_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,scope=['vgg_feat_fc']) # saver1 = tf.train.Saver(ref_vars) # saver1.restore(sess, self.args.weights) # # METHOD two reader = pywrap_tensorflow.NewCheckpointReader( self.args.weights) var_to_shape_map = reader.get_variable_to_shape_map() with tf.variable_scope('', reuse=tf.AUTO_REUSE) as scope: for key in var_to_shape_map: try: var = tf.get_variable(key, trainable=False) sess.run(var.assign(reader.get_tensor(key))) print " Assign pretrain model: " + key except ValueError: print " Ignore variable:" + key trainable_var_for_chk = tf.trainable_variables( ) #tf.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) print 'Variables to train: ', trainable_var_for_chk timer = Timer() rpn_rois_3d = self.net.get_output('rpn_rois')[1] if DEBUG: pass # TODO: Essential step(before sess.run) for using vispy beacuse of the bug of opengl or tensorflow vispy_init() i = 0 training_series = range(self.epoch) #self.epoch for epo_cnt in range(self.args.epoch_iters): for data_idx in training_series: # DO NOT EDIT the "training_series",for the latter shuffle iter = global_step.eval( ) # function "minimize()"will increase global_step blobs = self.dataset.get_minibatch(data_idx, 'train') # get one batch feed_dict = { self.net.lidar3d_data: blobs['lidar3d_data'], self.net.lidar_bv_data: blobs['lidar_bv_data'], self.net.im_info: blobs['im_info'], self.net.keep_prob: 0.5, self.net.gt_boxes_bv: blobs['gt_boxes_bv'], self.net.gt_boxes_3d: blobs['gt_boxes_3d'], self.net.gt_boxes_corners: blobs['gt_boxes_corners'], self.net.calib: blobs['calib'], } run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() timer.tic() debug_pred_,delta_,RNet_rpn_yaw_gt_delta_,rpn_rois_3d_,loss_,RNet_rpn_yaw_pred_toshow_,debug_gt_,merged_,_ = \ sess.run([debug_pred, tower_l1_loss_keep_positive, RNet_rpn_yaw_gt_delta, rpn_rois_3d, loss, RNet_rpn_yaw_pred_toshow, debug_gt, merged, train_op, ] , feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) # debug_pred_,delta_,RNet_rpn_yaw_gt_delta_,rpn_rois_3d_,RNet_rpn_yaw_pred_toshow_,debug_gt_,merged_, = \ # sess.run([debug_pred,tower_l1_loss_keep_positive,RNet_rpn_yaw_gt_delta,rpn_rois_3d,RNet_rpn_yaw_pred_toshow,debug_gt,merged,] # ,feed_dict=feed_dict,options=run_options, run_metadata=run_metadata) timer.toc() if iter % cfg.TRAIN.ITER_DISPLAY == 0: print 'Iter: %d/%d, Serial_num: %s, Speed: %.3fs/iter, Loss: %.3f ' % ( iter, self.args.epoch_iters * self.epoch, blobs['serial_num'], timer.average_time, loss_) print 'theta_delta: ', for i in range(50): if delta_[i] != 0.0: print '%6.3f' % (delta_[i]), print '\nPredicted angle: ', for j in range(50): if debug_pred_[j] != 0.0: print '%6.3f' % (debug_pred_[j]), print '\nGt yaw angle: ', for j in range(50): if debug_gt_[j] != 0.0: print '%6.3f' % (debug_gt_[j]), print '\n' if iter % 20 == 0 and cfg.TRAIN.TENSORBOARD: train_writer.add_summary(merged_, iter) pass if (iter % 4000 == 0 and cfg.TRAIN.DEBUG_TIMELINE) or (iter == 100): #chrome://tracing trace = timeline.Timeline( step_stats=run_metadata.step_stats) trace_file = open( cfg.LOG_DIR + '/' + 'training-step-' + str(iter).zfill(7) + '.ctf.json', 'w') trace_file.write( trace.generate_chrome_trace_format(show_memory=False)) trace_file.close() if DEBUG: scan = blobs['lidar3d_data'] cubic_cls_value = np.ones([cfg.TRAIN.RPN_POST_NMS_TOP_N], dtype=np.float32) * 0 boxes = BoxAry_Theta( gt_box3d=blobs['gt_boxes_3d'], pre_box3d=rpn_rois_3d_, pre_theta_value=RNet_rpn_yaw_pred_toshow_, pre_cube_cls=cubic_cls_value ) # RNet_rpn_yaw_pred_toshow_ rpn_rois_3d_[:,-1] pcd_vispy(scan, boxes=boxes, name='CubicNet training', index=i, vis_size=(800, 600), save_img=False, visible=False) i += 1 if cfg.TRAIN.EPOCH_MODEL_SAVE: #iter % 2000==0 and : self.snapshot(sess, iter) pass if cfg.TRAIN.USE_VALID and True: #TODO: to complete the valid process with tf.name_scope('valid_cubic_' + str(epo_cnt + 1)): print 'Valid the net at the end of epoch_{} ...'.format( epo_cnt + 1) valid_loss_total = 0.0 for data_idx in range(self.val_epoch): # self.val_epoch blobs = self.dataset.get_minibatch(data_idx, 'valid') feed_dict_ = { self.net.lidar3d_data: blobs['lidar3d_data'], self.net.lidar_bv_data: blobs['lidar_bv_data'], self.net.im_info: blobs['im_info'], self.net.keep_prob: 0.5, self.net.gt_boxes_bv: blobs['gt_boxes_bv'], self.net.gt_boxes_3d: blobs['gt_boxes_3d'], self.net.gt_boxes_corners: blobs['gt_boxes_corners'], self.net.calib: blobs['calib'], } loss_valid = sess.run(loss, feed_dict=feed_dict_) # train_writer.add_summary(valid, data_idx) valid_loss_total += loss_valid if cfg.TRAIN.VISUAL_VALID and data_idx % 20 == 0: print 'Valid step: {:d}/{:d} , theta_loss = {:.3f}'\ .format(data_idx + 1,self.val_epoch,float(loss_valid)) if data_idx % 20 == 0 and cfg.TRAIN.TENSORBOARD: pass # train_writer.add_summary(valid_result_, data_idx/20+epo_cnt*1000) valid_summary = tf.summary.merge([epoch_cube_theta_sum_op]) valid_res = sess.run(valid_summary, feed_dict={ epoch_cube_theta: float(valid_loss_total) / self.val_epoch }) train_writer.add_summary(valid_res, epo_cnt + 1) print 'Validation of epoch_{}:theta_loss_total = {:.3f}\n'\ .format(epo_cnt + 1,float(valid_loss_total)/self.val_epoch) random.shuffle(training_series) # shuffle the training series print 'Training process has done, enjoy every day !'
def main(): def _str_to_bool(s): """Convert string to bool (in argparse context).""" if s.lower() not in ['true', 'false']: raise ValueError( 'Argument needs to be a boolean, got {}'.format(s)) return {'true': True, 'false': False}[s.lower()] parser = argparse.ArgumentParser(description='WaveNet example network') DATA_DIRECTORY = '.\\data\\moon,.\\data\\son' parser.add_argument('--data_dir', type=str, default=DATA_DIRECTORY, help='The directory containing the VCTK corpus.') LOGDIR = None #LOGDIR = './/logdir-wavenet//train//2018-12-21T22-58-10' parser.add_argument( '--logdir', type=str, default=LOGDIR, help= 'Directory in which to store the logging information for TensorBoard. If the model already exists, it will restore the state and will continue training. Cannot use with --logdir_root and --restore_from.' ) parser.add_argument( '--logdir_root', type=str, default=None, help= 'Root directory to place the logging output and generated model. These are stored under the dated subdirectory of --logdir_root. Cannot use with --logdir.' ) parser.add_argument( '--restore_from', type=str, default=None, help= 'Directory in which to restore the model from. This creates the new model under the dated directory in --logdir_root. Cannot use with --logdir.' ) CHECKPOINT_EVERY = 1000 # checkpoint 저장 주기 parser.add_argument( '--checkpoint_every', type=int, default=CHECKPOINT_EVERY, help='How many steps to save each checkpoint after. Default: ' + str(CHECKPOINT_EVERY) + '.') config = parser.parse_args() # command 창에서 입력받을 수 있는 조건 config.data_dir = config.data_dir.split(",") try: directories = validate_directories(config, hparams) except ValueError as e: print("Some arguments are wrong:") print(str(e)) return logdir = directories['logdir'] restore_from = directories['restore_from'] # Even if we restored the model, we will treat it as new training # if the trained model is written into an arbitrary location. is_overwritten_training = logdir != restore_from log_path = os.path.join(logdir, 'train.log') infolog.init(log_path, logdir) global_step = tf.Variable(0, name='global_step', trainable=False) # Create coordinator. coord = tf.train.Coordinator() num_speakers = len(config.data_dir) # Load raw waveform from VCTK corpus. with tf.name_scope('create_inputs'): # Allow silence trimming to be skipped by specifying a threshold near # zero. silence_threshold = hparams.silence_threshold if hparams.silence_threshold > EPSILON else None gc_enable = num_speakers > 1 # AudioReader에서 wav 파일을 잘라 input값을 만든다. receptive_field길이만큼을 앞부분에 pad하거나 앞조각에서 가져온다. (receptive_field+ sample_size)크기로 자른다. reader = DataFeederWavenet( coord, config.data_dir, batch_size=hparams.wavenet_batch_size, receptive_field=WaveNetModel.calculate_receptive_field( hparams.filter_width, hparams.dilations, hparams.scalar_input, hparams.initial_filter_width), gc_enable=gc_enable) if gc_enable: audio_batch, lc_batch, gc_id_batch = reader.inputs_wav, reader.local_condition, reader.speaker_id else: audio_batch, lc_batch = reader.inputs_wav, self.local_condition # Create network. net = WaveNetModel( batch_size=hparams.wavenet_batch_size, dilations=hparams.dilations, filter_width=hparams.filter_width, residual_channels=hparams.residual_channels, dilation_channels=hparams.dilation_channels, quantization_channels=hparams.quantization_channels, out_channels=hparams.out_channels, skip_channels=hparams.skip_channels, use_biases=hparams.use_biases, # True scalar_input=hparams.scalar_input, initial_filter_width=hparams.initial_filter_width, global_condition_channels=hparams.gc_channels, global_condition_cardinality=num_speakers, local_condition_channels=hparams.num_mels, upsample_factor=hparams.upsample_factor, train_mode=True) if hparams.l2_regularization_strength == 0: hparams.l2_regularization_strength = None net.add_loss(input_batch=audio_batch, local_condition=lc_batch, global_condition_batch=gc_id_batch, l2_regularization_strength=hparams.l2_regularization_strength) net.add_optimizer(hparams, global_step) run_metadata = tf.RunMetadata() # Set up session sess = tf.Session(config=tf.ConfigProto(log_device_placement=False) ) # log_device_placement=False --> cpu/gpu 자동 배치. init = tf.global_variables_initializer() sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver( var_list=tf.global_variables(), max_to_keep=hparams.max_checkpoints) # 최대 checkpoint 저장 갯수 지정 try: start_step = load(saver, sess, restore_from) # checkpoint load if is_overwritten_training or start_step is None: # The first training step will be saved_global_step + 1, # therefore we put -1 here for new or overwritten trainings. zero_step_assign = tf.assign(global_step, 0) sess.run(zero_step_assign) except: print( "Something went wrong while restoring checkpoint. We will terminate training to avoid accidentally overwriting the previous model." ) raise ########### start_step = sess.run(global_step) last_saved_step = start_step try: reader.start_in_session(sess, start_step) while not coord.should_stop(): start_time = time.time() if hparams.store_metadata and step % 50 == 0: # Slow run that stores extra information for debugging. log('Storing metadata') run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) step, loss_value, _ = sess.run( [global_step, net.loss, net.optimize], options=run_options, run_metadata=run_metadata) tl = timeline.Timeline(run_metadata.step_stats) timeline_path = os.path.join(logdir, 'timeline.trace') with open(timeline_path, 'w') as f: f.write(tl.generate_chrome_trace_format(show_memory=True)) else: step, loss_value, _ = sess.run( [global_step, net.loss, net.optimize]) duration = time.time() - start_time log('step {:d} - loss = {:.3f}, ({:.3f} sec/step)'.format( step, loss_value, duration)) if step % config.checkpoint_every == 0: save(saver, sess, logdir, step) last_saved_step = step if step >= hparams.num_steps: # error message가 나오지만, 여기서 멈춘 것은 맞다. raise Exception('End xxx~~~yyy') except Exception as e: print('finally') #if step > last_saved_step: # save(saver, sess, logdir, step) coord.request_stop(e)