def model(isTrain, isTrainBn): end_point = [] tf_input = tf.placeholder(dtype=tf.float32, shape=[None, 1, 1, 2], name='tf_input') tf_label = tf.placeholder(dtype=tf.int32, shape=[None], name='tf_label') if isTrain and isQuant: tf_input_1 = tf.fake_quant_with_min_max_args(tf_input, input_min, input_max, name='x0_1') else: tf_input_1 = tf_input # with tf.variable_scope('model'): x = tf.layers.separable_conv2d(tf_input_1, filters=10000, kernel_size=1, use_bias=False, name='L1') x = tf.layers.batch_normalization(x, training=isTrainBn, fused=True, name='L1_bn') with tf.variable_scope('L1_hard_swish'): x1 = tf.nn.relu6(x + 3) # x1 = tf.fake_quant_with_min_max_args(x1, 0, 6) x = x * x1 * 0.16666667 x = tf.layers.conv2d(x, filters=4, kernel_size=1, use_bias=False, name='L2') x = tf.layers.batch_normalization(x, training=isTrainBn, fused=True, name='L2_bn') with tf.variable_scope('L2_hard_swish'): x1 = tf.nn.relu6(x + 3) # x1 = tf.fake_quant_with_min_max_args(x1, 0, 6) x = x * x1 * 0.16666667 if isQuant: x = tf.fake_quant_with_min_max_args(x, 0, 6) x = tf.layers.conv2d(x, filters=2, kernel_size=1, use_bias=True, name='FCN') x = tf.layers.flatten(x, name='Xflatten') x = tf.identity(x, 'Xoutput') end_point.append(x) # if (not isTrain) and isQuant: # x = tf.fake_quant_with_min_max_args(x, -1, 1, name='x5') return tf_input, tf_label, x, end_point
def model(x): # float-in, float-out variables = {} x_2d = tf.reshape(x, [-1, 28, 28, 1]) fake_x = tf.fake_quant_with_min_max_args(x_2d, min=-1.0, max=3.0, num_bits=8) y = tf.nn.avg_pool(fake_x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') y = tf.fake_quant_with_min_max_args(y, min=-1.0, max=3.0, num_bits=8, name='ys') return y, variables
def model(x): # float-in, float-out variables = {} x_2d = tf.reshape(x, [-1, 14, 14, 4]) x_2d = tf.fake_quant_with_min_max_args(x_2d, min=-1.0, max=3.0, num_bits=8) y = tf.depth_to_space(x_2d, 2) y = tf.fake_quant_with_min_max_args(y, min=-1.0, max=3.0, num_bits=8, name='ys') return y, variables
def model(x): # float-in, float-out variables = {} W = generate_variable([10, 28, 28, 1], name='W') variables['W'] = W W2 = tf.fake_quant_with_min_max_args(W, min=-1.0, max=3.0, num_bits=8) x_2d = tf.reshape(x, [-1, 28, 28, 1]) x_2d = tf.fake_quant_with_min_max_args(x_2d, min=-1.0, max=3.0, num_bits=8) y = tf.multiply(x_2d, W2) y = tf.fake_quant_with_min_max_args(y, min=-3.0, max=9.0 ,name='ys') return y, variables
def model(x): # return variables to save variables = {} x_2d = tf.reshape(x, [-1, 14, 14, 4]) x_2d = tf.fake_quant_with_min_max_args(x_2d, min=-1.0, max=3.0, num_bits=8) pad_value = [[0, 0], [1, 2], [2, 1], [0, 0]] y = tf.pad(x_2d, pad_value, "CONSTANT", name='ys') y = tf.fake_quant_with_min_max_args(y, min=-1.0, max=3.0, num_bits=8, name='ys') return y, variables
def model(x): # return variables to save variables = {} x_2d = tf.reshape(x, [-1, 28, 28, 1]) fake_x = tf.fake_quant_with_min_max_args(x_2d, min=-1.0, max=3.0, num_bits=8) y = tf.reduce_mean(fake_x, [-1, 1, 1, -1]) y = tf.fake_quant_with_min_max_args(y, min=-1.0, max=3.0, num_bits=8, name='ys') return y, variables
def model(is_train, is_train_bn): end_point = [] tf_input = tf.placeholder(dtype=tf.float32, shape=[None, 1, 1, 1], name='tf_input') tf_label = tf.placeholder(dtype=tf.float32, shape=[None, 1], name='tf_label') if is_train: tf_input_1 = tf.fake_quant_with_min_max_args(tf_input, input_min, input_max, name='x0_1') else: tf_input_1 = tf_input with tf.variable_scope('X1'): x = tf.layers.conv2d(tf_input_1, 1, 1, use_bias=True, name='X1') end_point.append(x) x = tf.layers.batch_normalization(x, training=is_train_bn, name='X1/bn', fused=False) end_point.append(x) with tf.variable_scope('hard_swish'): x1 = tf.nn.relu6(x) - 3 x1 = tf.fake_quant_with_min_max_args(x1, -3, 3) # x1 = tf.nn.relu6(x + 3) # x1 = tf.fake_quant_with_min_max_args(x1, 0, 6) # x = x * x1 * 0.16666667 end_point.append(x) # with tf.variable_scope("X2"): # x = tf.layers.conv2d(x, 1, 1, use_bias=False, name='x2') # end_point.append(x) # x = tf.layers.batch_normalization(x, training=is_train_bn, name='x2/bn', fused=True) # end_point.append(x) # with tf.variable_scope('hard_swish'): # x1 = tf.nn.relu6(x + 3) # # x1 = tf.fake_quant_with_min_max_args(x1, 0, 6) # x = x * x1 * 0.16666667 # end_point.append(x) x = tf.layers.flatten(x, name='Xflatten') x = tf.identity(x, 'Xoutput') end_point.append(x) if not is_train: # todo: how to ues x = tf.fake_quant_with_min_max_args(x, -1, 1, name='x5') end_point.append(x) return tf_input, tf_label, x, end_point
def model(x): # return variables to save variables = {} x = tf.reshape(x, [2, 14, 56, 5]) y = tf.slice(x, [1, 7, 21, 0], [1, 6, 1, 5]) y = tf.fake_quant_with_min_max_args(y, min=-1.0, max=3.0, name='ys') return y, variables
def model(x): # return variables to save variables = {} x_2d = tf.reshape(x, [-1, 28, 28, 1]) y = tf.image.resize_bilinear(x_2d, [54, 54]) y = tf.fake_quant_with_min_max_args(y, min=-1.0, max=3.0, name='ys') return y, variables
def produce_low_resolution(input, k=3, blur_size=3, blur_sigma=0.5): """ Produces a batch of low resolution images from the high resolution images `input`. The images are produced by applying a Gaussian blur with kernel size `blur_size` x `blur_size` and standard deviation `blur_sigma`, downsampling by `k`, and applying bicubic interpolation up to the size of the input batch. """ n_channels = input.get_shape().as_list()[3] # Apply Gaussian blur kernel = gaussian_kernel(n_channels, blur_size, blur_sigma) lr = tf.nn.depthwise_conv2d_native(input, kernel, [1, 1, 1, 1], 'VALID') # Downsample the image lr = tf.nn.depthwise_conv2d_native(lr, tf.ones([1, 1, n_channels, 1]), [1, k, k, 1], 'VALID') # Apply bicubic interpolation lr = tf.image.resize_bicubic(lr, input.get_shape().as_list()[1:3]) # Apply clipping and quantization lr = tf.clip_by_value(lr, 0, 1) lr = tf.fake_quant_with_min_max_args(lr, min=0, max=1) return lr
def model(x): # return variables to save variables = {} x_2d = tf.reshape(x, [-1, 28, 28, 1]) y = tf.nn.leaky_relu(x_2d) y = tf.fake_quant_with_min_max_args(y, min=-1.0, max=3.0, name='ys') return y, variables
def model(x): # float-in, float-out variables = {} x_2d = tf.reshape(x, [-1, 14, 14, 4]) # x_2d = tf.fake_quant_with_min_max_args(x_2d, min=-1.0, max=3.0, num_bits=8) W = weight_variable([3, 3, 4, 32], name='W') b = bias_variable([32], name='b') variables['W'] = W variables['b'] = b W2 = tf.fake_quant_with_min_max_args(W, min=-1.0, max=1.0, num_bits=8) b2 = tf.fake_quant_with_min_max_args(b, min=-0.4, max=0.4, num_bits=8) x_dconv2d = tf.nn.conv2d(x_2d, W2, strides=[1, 1, 1, 1], padding='SAME') y = tf.nn.relu(x_dconv2d + b2) # 3 * 1.0 * (3 * 3 * 4) + 0.4 = 108.4 y = tf.fake_quant_with_min_max_args(y, min=0.0, max=108.4, name='ys') return y, variables
def dec_relu(input, enable_quantization=False): out = tf.nn.relu6(input) if (enable_quantization): return tf.fake_quant_with_min_max_args(out, min=0.0, max=6.0, name="fq_relu") return out
def model(x): # float-in, float-out variables = {} x_2d = tf.reshape(x, [-1, 28, 28, 1]) fake_x = tf.fake_quant_with_min_max_args(x_2d, min=-1.0, max=3.0, num_bits=8) W = weight_variable([5, 5, 1, 32], name='W') b = bias_variable([32], name='b') variables['W'] = W variables['b'] = b W2 = tf.fake_quant_with_min_max_args(W, min=-1.0, max=1.0, num_bits=8) # b2 = tf.fake_quant_with_min_max_args(b, min=-0.4, max=0.4, num_bits=8) x_dconv2d = tf.nn.conv2d(fake_x, W2, strides=[1, 1, 1, 1], padding='SAME') y = tf.nn.relu(x_dconv2d + b) # 3 * 1.0 * (5 * 5 * 1) + 0.4 = 75.4 y = tf.fake_quant_with_min_max_args(y, min=0.0, max=75.4 ,name='ys') return y, variables
def model(x): # float-in, float-out variables = {} y = tf.nn.softmax(x, name='ys') y = tf.fake_quant_with_min_max_args(y, min=0.0, max=1.0, num_bits=8, name='ys') return y, variables
def model(x): # return variables to save variables = {} x_2d = tf.reshape(x, [-1, 14, 14, 4]) W = weight_variable([3, 3, 8, 4], name='W') b = bias_variable([32], name='b') variables['W'] = W variables['b'] = b W2 = tf.fake_quant_with_min_max_args(W, min=-1.0, max=1.0, num_bits=8) b2 = tf.fake_quant_with_min_max_args(W, min=-0.4, max=0.4, num_bits=8) y = tf.nn.conv2d_transpose(x_2d, W, output_shape=[10, 28, 28, 8], strides=[1, 2, 2, 1], padding='SAME') y = tf.fake_quant_with_min_max_args(y, min=0.0, max=108.4, name='ys') return y, variables
def main(args=None): assert FLAGS.train_path, 'train_path is not set.' assert FLAGS.output_dir, 'output_dir is not set.' with tf.Graph().as_default() as g: image_ph = tf.placeholder( tf.float32, [model.IMAGE_SIZE * model.IMAGE_SIZE * INPUT_CHANNEL], name='input') image = tf.reshape( image_ph, [model.IMAGE_SIZE, model.IMAGE_SIZE, INPUT_CHANNEL]) # アルファチャンネルを削除 image = image[:, :, :3] normalized_image = tf.multiply(image, 1.0 / 255.0) normalized_image = tf.expand_dims(normalized_image, axis=0) feature_map = model.base_layers(normalized_image, is_train=False) ssd_logits = model.ssd_layers(feature_map, is_train=False) ssd_logits = tf.reshape(ssd_logits, [-1, model.OFFSET + model.CLASSES]) location_offset = tf.fake_quant_with_min_max_args( tf.nn.tanh(ssd_logits[:, :4]), min=-6, max=6, name='offset' ) confidence = tf.fake_quant_with_min_max_args( tf.nn.sigmoid(ssd_logits[:, 4:]), min=-6, max=6, name='confidence' ) saver = tf.train.Saver(tf.global_variables()) with tf.Session() as sess: saver.restore(sess, FLAGS.train_path) _export_graph(sess) _export_boxes_position(feature_map, FLAGS.output_dir)
def model(x): # float-in, float-out variables = {} # fake_x = tf.fake_quant_with_min_max_args(x, min=-32.0, max=31.0, num_bits=8) W = weight_variable([784, 10], name='W') b = bias_variable([10], name='b') variables['W'] = W variables['b'] = b W2 = tf.fake_quant_with_min_max_args(W, min=0.0, max=256.0, num_bits=8) b2 = tf.fake_quant_with_min_max_args(b, min=-0.4, max=0.4, num_bits=8) # 3 * 1.0 * 784 + 0.4 = 2352.4 y = tf.matmul(x, W2) y = tf.nn.relu(tf.add(y, b2)) y = tf.fake_quant_with_min_max_args(y, min=-0.0, max=2352.4, num_bits=8, name='ys') return y, variables
def _create_weights_node(self, weights_data): weights_name_scope = _get_name_scope() + "/weights" w_min, w_max = self._get_thresholds(weights_name_scope) weights_node = tf.constant(weights_data, tf.float32, name="weights") self._add_reference_node(weights_node) quantized_weights = tf.fake_quant_with_min_max_args( weights_node, w_min, w_max, name="quantized_weights") return quantized_weights
def _cell_output(self, net, output_type=None): output_name_scope = _get_name_scope() + "/output" if output_type == "fixed": i_min, i_max = -1, 1 else: i_min, i_max = self._get_thresholds(output_name_scope) net = tf.fake_quant_with_min_max_args(net, i_min, i_max, name="output") self._add_reference_node(net) return net
def fake_quantize_tensor(input_tensor, quantization_bits, min_val, max_val, name): with tf.name_scope(name): # TODO: Min and Max values need to be given manually right now # Get the max value in the input tensor # max_val = sess.run(tf.reduce_max(input_tensor)) # Get the min value in the input tensor # min_val = sess.run(tf.reduce_min(input_tensor)) # if(max_val == min_val): # min_val = -max_val # If biases are initialized as a constant # Quantization quantized_tensor = tf.fake_quant_with_min_max_args(input_tensor, min_val, max_val, quantization_bits, False, name) variable_summaries(quantized_tensor) return quantized_tensor
def _depthwise_separable_conv(inputs, num_pwc_filters, sc, kernel_size, stride): """ Helper function to build the depth-wise separable convolution layer. """ # skip pointwise by setting num_outputs=None depthwise_conv = slim.separable_convolution2d(inputs, num_outputs=None, stride=stride, depth_multiplier=1, kernel_size=kernel_size, scope=sc + '/depthwise_conv') if (is_training): bn = slim.batch_norm(depthwise_conv, scope=sc + '/dw_batch_norm') else: bn = depthwise_conv if (activations_bits <= 8): bn = tf.fake_quant_with_min_max_args(bn, min=-8, max=8, num_bits=activations_bits) pointwise_conv = slim.convolution2d(bn, num_pwc_filters, kernel_size=[1, 1], scope=sc + '/pointwise_conv') if (is_training): bn = slim.batch_norm(pointwise_conv, scope=sc + '/pw_batch_norm') else: bn = pointwise_conv if (activations_bits <= 8): bn == tf.fake_quant_with_min_max_args(bn, min=-8, max=8, num_bits=activations_bits) return bn
def quantize_test(input_tensor): with tf.name_scope('quantized_tensor'): # Get the max value in the input tensor max_val_index = tf.argmax(input_tensor, output_type=tf.int32) max_val = sess.run(input_tensor[max_val_index]) # Get the min value in the input tensor min_val_index = tf.argmin(input_tensor, output_type=tf.int32) min_val = sess.run(input_tensor[min_val_index]) max_val = .3 min_val = -.3 # Quantization quantized_tensor = tf.fake_quant_with_min_max_args(input_tensor, min_val, max_val, quantization_bits, False, 'quantized_tensor') variable_summaries(quantized_tensor) return quantized_tensor
def _get_outputs_from_inputs(input_tensors, detection_model, output_collection_name, pipeline_config, half=False, quantize=False): if not quantize: if not half: inputs = tf.to_float(input_tensors) else: inputs = input_tensors else: if not half: inputs = tf.fake_quant_with_min_max_args( tf.to_float(input_tensors), 0, 255) else: inputs = input_tensors if not half: preprocessed_inputs, true_image_shapes = detection_model.preprocess( inputs) else: preprocessed_inputs = inputs try: fixed_shape_resizer_config = pipeline_config.model.ssd.image_resizer.fixed_shape_resizer except: raise NotImplementedError( "Half Graph Expoter Is Only For SSD Structure!") true_image_shapes = [ 1, fixed_shape_resizer_config.height, fixed_shape_resizer_config.width, 3 ] output_tensors = detection_model.predict(preprocessed_inputs, true_image_shapes, half) if not half: postprocessed_tensors = detection_model.postprocess( output_tensors, true_image_shapes) return _add_output_tensor_nodes(postprocessed_tensors, output_collection_name) else: return _add_half_output_tensor_nodes(output_tensors, output_collection_name)
def downscale_model(input_tensor, input_tensor_lr, scale=2): tensor = None conv_00_w = tf.get_variable("conv_00_w", [3,3,1,64], initializer=tf.contrib.layers.xavier_initializer()) #conv_00_w = tf.get_variable("conv_00_w", [3,3,1,64], initializer=tf.random_normal_initializer(stddev=np.sqrt(2.0/9))) conv_00_b = tf.get_variable("conv_00_b", [64], initializer=tf.constant_initializer(0)) tensor = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(input_tensor, conv_00_w, strides=[1,1,1,1], padding='SAME'), conv_00_b)) # in each loop build a resNet block, and then cascade them. for i in range(5): tensor_shortcut = tensor conv_w = tf.get_variable("conv_%02d_w" % (2*i+1), [3,3,64,64], initializer=tf.contrib.layers.xavier_initializer()) conv_b = tf.get_variable("conv_%02d_b" % (2*i+1), [64], initializer=tf.constant_initializer(0)) tensor = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(tensor, conv_w, strides=[1,1,1,1], padding='SAME'), conv_b)) conv_w = tf.get_variable("conv_%02d_w" % (2*i+2), [3,3,64,64], initializer=tf.contrib.layers.xavier_initializer()) conv_b = tf.get_variable("conv_%02d_b" % (2*i+2), [64], initializer=tf.constant_initializer(0)) tensor = tf.nn.relu( tf.add( tf.nn.bias_add( tf.nn.conv2d(tensor, conv_w, strides=[1,1,1,1], padding='SAME' ), conv_b ) , tensor_shortcut ) ) # add a down scaling conv layer, scale = 2 by default. and should converge the chanel into 1. conv_w = tf.get_variable("conv_%02d_w" % (19), [3,3,64,1], initializer=tf.contrib.layers.xavier_initializer()) conv_b = tf.get_variable("conv_%02d_b" % (19), [1], initializer=tf.constant_initializer(0)) # Here we want to set the downscaled image between (0,1),so we can make further processing. tensor = tf.nn.relu6( tf.add( tf.nn.bias_add(tf.nn.conv2d(tensor, conv_w, strides=[1,scale,scale,1], padding='SAME'), conv_b), input_tensor_lr) *6 )/6 tensor = tf.fake_quant_with_min_max_args(tensor, min =0, max =1 ) # this is the downsampled image, which will be encoded and transmitted between transmitter and receiver # now it's quantized andnormalized tensor_downsampled = tensor return tensor_downsampled
def main(_): # We want to see all the logging messages for this tutorial. tf.logging.set_verbosity(tf.logging.INFO) # Start a new TensorFlow session. sess = tf.InteractiveSession() # Begin by making sure we have the training data we need. If you already have # training data of your own, use `--data_url= ` on the command line to avoid # downloading. model_settings = models.prepare_model_settings( len(input_data.prepare_words_list(FLAGS.wanted_words.split(','))), FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms, FLAGS.window_stride_ms, FLAGS.feature_bin_count, FLAGS.preprocess) audio_processor = input_data.AudioProcessor( FLAGS.data_url, FLAGS.data_dir, FLAGS.silence_percentage, FLAGS.unknown_percentage, FLAGS.wanted_words.split(','), FLAGS.validation_percentage, FLAGS.testing_percentage, model_settings, FLAGS.summaries_dir) fingerprint_size = model_settings['fingerprint_size'] label_count = model_settings['label_count'] time_shift_samples = int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000) # Figure out the learning rates for each training phase. Since it's often # effective to have high learning rates at the start of training, followed by # lower levels towards the end, the number of steps and learning rates can be # specified as comma-separated lists to define the rate at each stage. For # example --how_many_training_steps=10000,3000 --learning_rate=0.001,0.0001 # will run 13,000 training loops in total, with a rate of 0.001 for the first # 10,000, and 0.0001 for the final 3,000. training_steps_list = list(map(int, FLAGS.how_many_training_steps.split(','))) learning_rates_list = list(map(float, FLAGS.learning_rate.split(','))) if len(training_steps_list) != len(learning_rates_list): raise Exception( '--how_many_training_steps and --learning_rate must be equal length ' 'lists, but are %d and %d long instead' % (len(training_steps_list), len(learning_rates_list))) input_placeholder = tf.placeholder( tf.float32, [None, fingerprint_size], name='fingerprint_input') if FLAGS.quantize: fingerprint_min, fingerprint_max = input_data.get_features_range( model_settings) fingerprint_input = tf.fake_quant_with_min_max_args( input_placeholder, fingerprint_min, fingerprint_max) else: fingerprint_input = input_placeholder logits, dropout_prob = models.create_model( fingerprint_input, model_settings, FLAGS.model_architecture, is_training=True) # Define loss and optimizer ground_truth_input = tf.placeholder( tf.int64, [None], name='groundtruth_input') # Optionally we can add runtime checks to spot when NaNs or other symptoms of # numerical errors start occurring during training. control_dependencies = [] if FLAGS.check_nans: checks = tf.add_check_numerics_ops() control_dependencies = [checks] # Create the back propagation and training evaluation machinery in the graph. with tf.name_scope('cross_entropy'): cross_entropy_mean = tf.losses.sparse_softmax_cross_entropy( labels=ground_truth_input, logits=logits) if FLAGS.quantize: tf.contrib.quantize.create_training_graph(quant_delay=0) with tf.name_scope('train'), tf.control_dependencies(control_dependencies): learning_rate_input = tf.placeholder( tf.float32, [], name='learning_rate_input') train_step = tf.train.GradientDescentOptimizer( learning_rate_input).minimize(cross_entropy_mean) predicted_indices = tf.argmax(logits, 1) correct_prediction = tf.equal(predicted_indices, ground_truth_input) confusion_matrix = tf.confusion_matrix( ground_truth_input, predicted_indices, num_classes=label_count) evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) with tf.get_default_graph().name_scope('eval'): tf.summary.scalar('cross_entropy', cross_entropy_mean) tf.summary.scalar('accuracy', evaluation_step) global_step = tf.train.get_or_create_global_step() increment_global_step = tf.assign(global_step, global_step + 1) saver = tf.train.Saver(tf.global_variables()) # Merge all the summaries and write them out to /tmp/retrain_logs (by default) merged_summaries = tf.summary.merge_all(scope='eval') train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train', sess.graph) validation_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/validation') tf.global_variables_initializer().run() start_step = 1 if FLAGS.start_checkpoint: models.load_variables_from_checkpoint(sess, FLAGS.start_checkpoint) start_step = global_step.eval(session=sess) tf.logging.info('Training from step: %d ', start_step) # Save graph.pbtxt. tf.train.write_graph(sess.graph_def, FLAGS.train_dir, FLAGS.model_architecture + '.pbtxt') # Save list of words. with gfile.GFile( os.path.join(FLAGS.train_dir, FLAGS.model_architecture + '_labels.txt'), 'w') as f: f.write('\n'.join(audio_processor.words_list)) # Training loop. training_steps_max = np.sum(training_steps_list) for training_step in xrange(start_step, training_steps_max + 1): # Figure out what the current learning rate is. training_steps_sum = 0 for i in range(len(training_steps_list)): training_steps_sum += training_steps_list[i] if training_step <= training_steps_sum: learning_rate_value = learning_rates_list[i] break # Pull the audio samples we'll use for training. train_fingerprints, train_ground_truth = audio_processor.get_data( FLAGS.batch_size, 0, model_settings, FLAGS.background_frequency, FLAGS.background_volume, time_shift_samples, 'training', sess) # Run the graph with this batch of training data. train_summary, train_accuracy, cross_entropy_value, _, _ = sess.run( [ merged_summaries, evaluation_step, cross_entropy_mean, train_step, increment_global_step, ], feed_dict={ fingerprint_input: train_fingerprints, ground_truth_input: train_ground_truth, learning_rate_input: learning_rate_value, dropout_prob: 0.5 }) train_writer.add_summary(train_summary, training_step) tf.logging.info('Step #%d: rate %f, accuracy %.1f%%, cross entropy %f' % (training_step, learning_rate_value, train_accuracy * 100, cross_entropy_value)) is_last_step = (training_step == training_steps_max) if (training_step % FLAGS.eval_step_interval) == 0 or is_last_step: set_size = audio_processor.set_size('validation') total_accuracy = 0 total_conf_matrix = None for i in xrange(0, set_size, FLAGS.batch_size): validation_fingerprints, validation_ground_truth = ( audio_processor.get_data(FLAGS.batch_size, i, model_settings, 0.0, 0.0, 0, 'validation', sess)) # Run a validation step and capture training summaries for TensorBoard # with the `merged` op. validation_summary, validation_accuracy, conf_matrix = sess.run( [merged_summaries, evaluation_step, confusion_matrix], feed_dict={ fingerprint_input: validation_fingerprints, ground_truth_input: validation_ground_truth, dropout_prob: 1.0 }) validation_writer.add_summary(validation_summary, training_step) batch_size = min(FLAGS.batch_size, set_size - i) total_accuracy += (validation_accuracy * batch_size) / set_size if total_conf_matrix is None: total_conf_matrix = conf_matrix else: total_conf_matrix += conf_matrix tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix)) tf.logging.info('Step %d: Validation accuracy = %.1f%% (N=%d)' % (training_step, total_accuracy * 100, set_size)) # Save the model checkpoint periodically. if (training_step % FLAGS.save_step_interval == 0 or training_step == training_steps_max): checkpoint_path = os.path.join(FLAGS.train_dir, FLAGS.model_architecture + '.ckpt') tf.logging.info('Saving to "%s-%d"', checkpoint_path, training_step) saver.save(sess, checkpoint_path, global_step=training_step) set_size = audio_processor.set_size('testing') tf.logging.info('set_size=%d', set_size) total_accuracy = 0 total_conf_matrix = None for i in xrange(0, set_size, FLAGS.batch_size): test_fingerprints, test_ground_truth = audio_processor.get_data( FLAGS.batch_size, i, model_settings, 0.0, 0.0, 0, 'testing', sess) test_accuracy, conf_matrix = sess.run( [evaluation_step, confusion_matrix], feed_dict={ fingerprint_input: test_fingerprints, ground_truth_input: test_ground_truth, dropout_prob: 1.0 }) batch_size = min(FLAGS.batch_size, set_size - i) total_accuracy += (test_accuracy * batch_size) / set_size if total_conf_matrix is None: total_conf_matrix = conf_matrix else: total_conf_matrix += conf_matrix tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix)) tf.logging.info('Final test accuracy = %.1f%% (N=%d)' % (total_accuracy * 100, set_size))
def main(_): # We want to see all the logging messages for this tutorial. tf.logging.set_verbosity(tf.logging.INFO) # Start a new TensorFlow session. sess = tf.InteractiveSession() # Begin by making sure we have the training data we need. If you already have # training data of your own, use `--data_url= ` on the command line to avoid # downloading. model_settings = models.prepare_model_settings( len(input_data.prepare_words_list(FLAGS.wanted_words.split(','))), FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms, FLAGS.window_stride_ms, FLAGS.feature_bin_count, FLAGS.preprocess) audio_processor = input_data.AudioProcessor( FLAGS.data_url, FLAGS.data_dir, FLAGS.silence_percentage, FLAGS.unknown_percentage, FLAGS.wanted_words.split(','), FLAGS.validation_percentage, FLAGS.testing_percentage, model_settings, FLAGS.summaries_dir) fingerprint_size = model_settings['fingerprint_size'] label_count = model_settings['label_count'] time_shift_samples = int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000) # Figure out the learning rates for each training phase. Since it's often # effective to have high learning rates at the start of training, followed by # lower levels towards the end, the number of steps and learning rates can be # specified as comma-separated lists to define the rate at each stage. For # example --how_many_training_steps=10000,3000 --learning_rate=0.001,0.0001 # will run 13,000 training loops in total, with a rate of 0.001 for the first # 10,000, and 0.0001 for the final 3,000. training_steps_list = list(map(int, FLAGS.how_many_training_steps.split(','))) learning_rates_list = list(map(float, FLAGS.learning_rate.split(','))) if len(training_steps_list) != len(learning_rates_list): raise Exception( '--how_many_training_steps and --learning_rate must be equal length ' 'lists, but are %d and %d long instead' % (len(training_steps_list), len(learning_rates_list))) input_placeholder = tf.placeholder( tf.float32, [None, fingerprint_size], name='fingerprint_input') if FLAGS.quantize: fingerprint_min, fingerprint_max = input_data.get_features_range( model_settings) fingerprint_input = tf.fake_quant_with_min_max_args( input_placeholder, fingerprint_min, fingerprint_max) else: fingerprint_input = input_placeholder logits, dropout_prob = models.create_model( fingerprint_input, model_settings, FLAGS.model_architecture, is_training=True) # Define loss and optimizer ground_truth_input = tf.placeholder( tf.int64, [None], name='groundtruth_input') # Optionally we can add runtime checks to spot when NaNs or other symptoms of # numerical errors start occurring during training. control_dependencies = [] if FLAGS.check_nans: checks = tf.add_check_numerics_ops() control_dependencies = [checks] # Create the back propagation and training evaluation machinery in the graph. with tf.name_scope('cross_entropy'): cross_entropy_mean = tf.losses.sparse_softmax_cross_entropy( labels=ground_truth_input, logits=logits) if FLAGS.quantize: tf.contrib.quantize.create_training_graph(quant_delay=0) with tf.name_scope('train'), tf.control_dependencies(control_dependencies): learning_rate_input = tf.placeholder( tf.float32, [], name='learning_rate_input') train_step = tf.train.GradientDescentOptimizer( learning_rate_input).minimize(cross_entropy_mean) predicted_indices = tf.argmax(logits, 1) correct_prediction = tf.equal(predicted_indices, ground_truth_input) confusion_matrix = tf.confusion_matrix( ground_truth_input, predicted_indices, num_classes=label_count) evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) with tf.get_default_graph().name_scope('eval'): tf.summary.scalar('cross_entropy', cross_entropy_mean) tf.summary.scalar('accuracy', evaluation_step) global_step = tf.train.get_or_create_global_step() increment_global_step = tf.assign(global_step, global_step + 1) saver = tf.train.Saver(tf.global_variables()) # Merge all the summaries and write them out to /tmp/retrain_logs (by default) merged_summaries = tf.summary.merge_all(scope='eval') train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train', sess.graph) validation_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/validation') tf.global_variables_initializer().run() start_step = 1 if FLAGS.start_checkpoint: models.load_variables_from_checkpoint(sess, FLAGS.start_checkpoint) start_step = global_step.eval(session=sess) tf.logging.info('Training from step: %d ', start_step) # Save graph.pbtxt. tf.train.write_graph(sess.graph_def, FLAGS.train_dir, FLAGS.model_architecture + '.pbtxt') # Save list of words. with gfile.GFile( os.path.join(FLAGS.train_dir, FLAGS.model_architecture + '_labels.txt'), 'w') as f: f.write('\n'.join(audio_processor.words_list)) # Training loop. training_steps_max = np.sum(training_steps_list) for training_step in xrange(start_step, training_steps_max + 1): # Figure out what the current learning rate is. training_steps_sum = 0 for i in range(len(training_steps_list)): training_steps_sum += training_steps_list[i] if training_step <= training_steps_sum: learning_rate_value = learning_rates_list[i] break # Pull the audio samples we'll use for training. train_fingerprints, train_ground_truth = audio_processor.get_data( FLAGS.batch_size, 0, model_settings, FLAGS.background_frequency, FLAGS.background_volume, time_shift_samples, 'training', sess) # Run the graph with this batch of training data. train_summary, train_accuracy, cross_entropy_value, _, _ = sess.run( [ merged_summaries, evaluation_step, cross_entropy_mean, train_step, increment_global_step, ], feed_dict={ fingerprint_input: train_fingerprints, ground_truth_input: train_ground_truth, learning_rate_input: learning_rate_value, dropout_prob: 0.5 }) train_writer.add_summary(train_summary, training_step) tf.logging.info('Step #%d: rate %f, accuracy %.1f%%, cross entropy %f' % (training_step, learning_rate_value, train_accuracy * 100, cross_entropy_value)) is_last_step = (training_step == training_steps_max) if (training_step % FLAGS.eval_step_interval) == 0 or is_last_step: set_size = audio_processor.set_size('validation') total_accuracy = 0 total_conf_matrix = None for i in xrange(0, set_size, FLAGS.batch_size): validation_fingerprints, validation_ground_truth = ( audio_processor.get_data(FLAGS.batch_size, i, model_settings, 0.0, 0.0, 0, 'validation', sess)) # Run a validation step and capture training summaries for TensorBoard # with the `merged` op. validation_summary, validation_accuracy, conf_matrix = sess.run( [merged_summaries, evaluation_step, confusion_matrix], feed_dict={ fingerprint_input: validation_fingerprints, ground_truth_input: validation_ground_truth, dropout_prob: 1.0 }) validation_writer.add_summary(validation_summary, training_step) batch_size = min(FLAGS.batch_size, set_size - i) total_accuracy += (validation_accuracy * batch_size) / set_size if total_conf_matrix is None: total_conf_matrix = conf_matrix else: total_conf_matrix += conf_matrix tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix)) tf.logging.info('Step %d: Validation accuracy = %.1f%% (N=%d)' % (training_step, total_accuracy * 100, set_size)) # Save the model checkpoint periodically. if (training_step % FLAGS.save_step_interval == 0 or training_step == training_steps_max): checkpoint_path = os.path.join(FLAGS.train_dir, FLAGS.model_architecture + '.ckpt') tf.logging.info('Saving to "%s-%d"', checkpoint_path, training_step) saver.save(sess, checkpoint_path, global_step=training_step) set_size = audio_processor.set_size('testing') tf.logging.info('set_size=%d', set_size) total_accuracy = 0 total_conf_matrix = None for i in xrange(0, set_size, FLAGS.batch_size): test_fingerprints, test_ground_truth = audio_processor.get_data( FLAGS.batch_size, i, model_settings, 0.0, 0.0, 0, 'testing', sess) test_accuracy, conf_matrix = sess.run( [evaluation_step, confusion_matrix], feed_dict={ fingerprint_input: test_fingerprints, ground_truth_input: test_ground_truth, dropout_prob: 1.0 }) batch_size = min(FLAGS.batch_size, set_size - i) total_accuracy += (test_accuracy * batch_size) / set_size if total_conf_matrix is None: total_conf_matrix = conf_matrix else: total_conf_matrix += conf_matrix tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix)) tf.logging.info('Final test accuracy = %.1f%% (N=%d)' % (total_accuracy * 100, set_size))
def create_rendered_images(batch_size, textures): backgrounds_ = tf.placeholder(tf.float32, [batch_size, None, None, 3], name='backgrounds') frames_ = tf.placeholder(tf.float32, [batch_size, None, None, 4], name='frames') texture_channel_multiplicative_noise_ = tf.placeholder_with_default( [1., 1.], [2], name='texture_channel_multiplicative_noise') texture_channel_additive_noise_ = tf.placeholder_with_default( [0., 0.], [2], name='texture_channel_additive_noise') texture_pixel_multiplicative_noise_ = tf.placeholder_with_default( [1., 1.], [2], name='texture_pixel_multiplicative_noise') texture_pixel_additive_noise_ = tf.placeholder_with_default( [0., 0.], [2], name='texture_pixel_additive_noise') texture_gaussian_noise_stddev_ = tf.placeholder_with_default( [0., 0.], [2], name='texture_gaussian_noise_stddev') image_channel_multiplicative_noise_ = tf.placeholder_with_default( [1., 1.], [2], name='image_channel_multiplicative_noise') image_channel_additive_noise_ = tf.placeholder_with_default( [0., 0.], [2], name='image_channel_additive_noise') image_pixel_multiplicative_noise_ = tf.placeholder_with_default( [1., 1.], [2], name='image_pixel_multiplicative_noise') image_pixel_additive_noise_ = tf.placeholder_with_default( [0., 0.], [2], name='image_pixel_additive_noise') image_gaussian_noise_stddev_ = tf.placeholder_with_default( [0., 0.], [2], name='image_gaussian_noise_stddev') IDENTITY_KERNEL = [[0., 0., 0.], [0., 1., 0.], [0., 0., 0.]] image_gaussian_blur_kernel_ = tf.placeholder_with_default( IDENTITY_KERNEL, [None, None], name='image_gaussian_blur_kernel') image_gaussian_blur_kernel_ = image_gaussian_blur_kernel_[:, :, tf.newaxis, tf.newaxis] image_gaussian_blur_kernel_ = tf.tile(image_gaussian_blur_kernel_, [1, 1, 3, 1]) # TODO: This could probably be made faster by removing random elements to outside of loop def render_frame(frame_): textures_ = textures # Add noise to textures textures_ = textures_ * tf.random_uniform( [3], texture_channel_multiplicative_noise_[0], texture_channel_multiplicative_noise_[1]) textures_ = textures_ + tf.random_uniform( [3], texture_channel_additive_noise_[0], texture_channel_additive_noise_[1]) textures_ = textures_ * tf.random_uniform( [], texture_pixel_multiplicative_noise_[0], texture_pixel_multiplicative_noise_[1]) textures_ = textures_ + tf.random_uniform( [], texture_pixel_additive_noise_[0], texture_pixel_additive_noise_[1]) textures_ = textures_ + tf.random_normal( textures_.shape, stddev=tf.random_uniform([], texture_gaussian_noise_stddev_[0], texture_gaussian_noise_stddev_[1])) #textures_ = tf.clip_by_value(textures_, 0.0, 1.0) # Render uvf_ = frame_[..., :3] image_ = sample_bilinear(textures_, uvf_) # Composite onto background # FIXME: This only really works with batch_size=1 alpha_ = frame_[..., 3:] image_ = image_ * alpha_ + backgrounds_[0] * (1 - alpha_) # Blur image image_ = image_[tf.newaxis, :, :, :] image_ = tf.nn.depthwise_conv2d(image_, image_gaussian_blur_kernel_, strides=[1, 1, 1, 1], padding='SAME') image_ = image_[0] # Blur alpha alpha_ = alpha_[tf.newaxis, :, :, :] alpha_ = tf.nn.depthwise_conv2d( alpha_, image_gaussian_blur_kernel_[:, :, :1, :], strides=[1, 1, 1, 1], padding='SAME') alpha_ = alpha_[0] # Recomposite blurred image onto background # FIXME: This only really works with batch_size=1 image_ = image_ * alpha_ + backgrounds_[0] * (1 - alpha_) # Add noise to image image_ = image_ * tf.random_uniform( [3], image_channel_multiplicative_noise_[0], image_channel_multiplicative_noise_[1]) image_ = image_ + tf.random_uniform([3], image_channel_additive_noise_[0], image_channel_additive_noise_[1]) image_ = image_ * tf.random_uniform( [], image_pixel_multiplicative_noise_[0], image_pixel_multiplicative_noise_[1]) image_ = image_ + tf.random_uniform([], image_pixel_additive_noise_[0], image_pixel_additive_noise_[1]) image_ = image_ + tf.random_normal( tf.shape(image_), stddev=tf.random_uniform([], image_gaussian_noise_stddev_[0], image_gaussian_noise_stddev_[1])) #image_ = tf.clip_by_value(image_, 0.0, 1.0) return image_ input_images_ = tf.map_fn(render_frame, frames_, dtype=(tf.float32)) # TODO: Can we move image compositing to out of render_frame? # TODO: Move noising of image to out of render_frame to here input_images_ = tf.fake_quant_with_min_max_args(input_images_, min=0., max=1., num_bits=8) input_images_ = tf.identity(input_images_, name='input_images') return input_images_
def main(_): # 可看到所有日志消息 tf.logging.set_verbosity(tf.logging.INFO) # 开始一个新的tensorflow对话 sess = tf.InteractiveSession() model_settings = models.prepare_model_settings( len(input_data.prepare_words_list(FLAGS.wanted_words.split(','))), FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms, FLAGS.window_stride_ms, FLAGS.feature_bin_count, FLAGS.preprocess) audio_processor = input_data.AudioProcessor( FLAGS.data_url, FLAGS.data_dir, FLAGS.silence_percentage, FLAGS.unknown_percentage, FLAGS.wanted_words.split(','), FLAGS.validation_percentage, FLAGS.testing_percentage, model_settings, FLAGS.summaries_dir) fingerprint_size = model_settings['fingerprint_size'] label_count = model_settings['label_count'] time_shift_samples = int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000) #计算每个训练阶段的学习率。由于在训练开始时设置较高的学习率,然后在训练结束时设置较低的学习率通常是有效的, #因此将步骤数和学习率指定为逗号分隔的列表,以定义每个阶段的学习率。 #例如--how_many_training_steps=10000,3000--learning_rate=0.001,0.0001 #将总共运行13000个训练循环,前10000个循环的学习速率为0.001,最后3000个循环的学习速率为0.0001 training_steps_list = list( map(int, FLAGS.how_many_training_steps.split(','))) learning_rates_list = list(map(float, FLAGS.learning_rate.split(','))) if len(training_steps_list) != len(learning_rates_list): raise Exception( '--how_many_training_steps and --learning_rate must be equal length ' 'lists, but are %d and %d long instead' % (len(training_steps_list), len(learning_rates_list))) input_placeholder = tf.placeholder(tf.float32, [None, fingerprint_size], name='fingerprint_input') if FLAGS.quantize: if FLAGS.preprocess == 'average': fingerprint_min = 0.0 fingerprint_max = 2048.0 elif FLAGS.preprocess == 'mfcc': fingerprint_min = -247.0 fingerprint_max = 30.0 else: raise Exception('Unknown preprocess mode "%s" (should be "mfcc" or' ' "average")' % (FLAGS.preprocess)) fingerprint_input = tf.fake_quant_with_min_max_args( input_placeholder, fingerprint_min, fingerprint_max) else: fingerprint_input = input_placeholder logits, dropout_prob = models.create_model(fingerprint_input, model_settings, FLAGS.model_architecture, is_training=True) #定义loss值和优化器 ground_truth_input = tf.placeholder(tf.int64, [None], name='groundtruth_input') #或者,我们可以添加运行时检查,以确定在训练期间何时开始出现NaNs或其他数值错误症状。 control_dependencies = [] if FLAGS.check_nans: checks = tf.add_check_numerics_ops() control_dependencies = [checks] # 在图中创建反向传播和训练评估机制。 with tf.name_scope('cross_entropy'): cross_entropy_mean = tf.losses.sparse_softmax_cross_entropy( labels=ground_truth_input, logits=logits) if FLAGS.quantize: tf.contrib.quantize.create_training_graph(quant_delay=0) with tf.name_scope('train'), tf.control_dependencies(control_dependencies): learning_rate_input = tf.placeholder(tf.float32, [], name='learning_rate_input') train_step = tf.train.GradientDescentOptimizer( learning_rate_input).minimize(cross_entropy_mean) predicted_indices = tf.argmax(logits, 1) correct_prediction = tf.equal(predicted_indices, ground_truth_input) confusion_matrix = tf.confusion_matrix(ground_truth_input, predicted_indices, num_classes=label_count) evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) with tf.get_default_graph().name_scope('eval'): tf.summary.scalar('cross_entropy', cross_entropy_mean) tf.summary.scalar('accuracy', evaluation_step) global_step = tf.train.get_or_create_global_step() increment_global_step = tf.assign(global_step, global_step + 1) saver = tf.train.Saver(tf.global_variables()) # 合并所有摘要并将其写入/tmp/retrain_logs(默认情况下) merged_summaries = tf.summary.merge_all(scope='eval') train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train', sess.graph) validation_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/validation') tf.global_variables_initializer().run() start_step = 1 if FLAGS.start_checkpoint: models.load_variables_from_checkpoint(sess, FLAGS.start_checkpoint) start_step = global_step.eval(session=sess) tf.logging.info('Training from step: %d ', start_step) #保存 graph.pbtxt tf.train.write_graph(sess.graph_def, FLAGS.train_dir, FLAGS.model_architecture + '.pbtxt') # 保存词列表。 with gfile.GFile( os.path.join(FLAGS.train_dir, FLAGS.model_architecture + '_labels.txt'), 'w') as f: f.write('\n'.join(audio_processor.words_list)) # 训练循环. training_steps_max = np.sum(training_steps_list) for training_step in xrange(start_step, training_steps_max + 1): # 找出当前的学习率。 training_steps_sum = 0 for i in range(len(training_steps_list)): training_steps_sum += training_steps_list[i] if training_step <= training_steps_sum: learning_rate_value = learning_rates_list[i] break # 把我们需要用于训练的音频样本拉出来。 train_fingerprints, train_ground_truth = audio_processor.get_data( FLAGS.batch_size, 0, model_settings, FLAGS.background_frequency, FLAGS.background_volume, time_shift_samples, 'training', sess) #运行这一批训练数据的图表 train_summary, train_accuracy, cross_entropy_value, _, _ = sess.run( [ merged_summaries, evaluation_step, cross_entropy_mean, train_step, increment_global_step, ], feed_dict={ fingerprint_input: train_fingerprints, ground_truth_input: train_ground_truth, learning_rate_input: learning_rate_value, dropout_prob: 0.5 }) train_writer.add_summary(train_summary, training_step) tf.logging.info( 'Step #%d: rate %f, accuracy %.1f%%, cross entropy %f' % (training_step, learning_rate_value, train_accuracy * 100, cross_entropy_value)) traintxt = str(train_accuracy * 100) # data是前面运行出的数据,先将其转为字符串才能写入 with open( 'E:\\speech_rocognition_demo\\method3\\tf-keywords\\result\\result_original\\train_low_latency_conv.txt', 'a') as file_handle: file_handle.write(traintxt) # 写入 file_handle.write('\n') # 有时放在循环里面需要自动转行,不然会覆盖上一条数据 is_last_step = (training_step == training_steps_max) if (training_step % FLAGS.eval_step_interval) == 0 or is_last_step: set_size = audio_processor.set_size('validation') total_accuracy = 0 total_conf_matrix = None for i in xrange(0, set_size, FLAGS.batch_size): validation_fingerprints, validation_ground_truth = ( audio_processor.get_data(FLAGS.batch_size, i, model_settings, 0.0, 0.0, 0, 'validation', sess)) # 运行验证步骤并且使用’merged‘方法来获取tensorboard的训练摘要 validation_summary, validation_accuracy, conf_matrix = sess.run( [merged_summaries, evaluation_step, confusion_matrix], feed_dict={ fingerprint_input: validation_fingerprints, ground_truth_input: validation_ground_truth, dropout_prob: 1.0 }) validation_writer.add_summary(validation_summary, training_step) batch_size = min(FLAGS.batch_size, set_size - i) total_accuracy += (validation_accuracy * batch_size) / set_size if total_conf_matrix is None: total_conf_matrix = conf_matrix else: total_conf_matrix += conf_matrix tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix)) tf.logging.info('Step %d: Validation accuracy = %.1f%% (N=%d)' % (training_step, total_accuracy * 100, set_size)) validationtxt = str(total_accuracy * 100) # data是前面运行出的数据,先将其转为字符串才能写入 with open( 'E:\\speech_rocognition_demo\\method3\\tf-keywords\\result\\result_original\\validation_low_latency_conv.txt', 'a') as file_handle: file_handle.write(validationtxt) # 写入 file_handle.write('\n') # 有时放在循环里面需要自动转行,不然会覆盖上一条数据 #定期保存模型checkpoint if (training_step % FLAGS.save_step_interval == 0 or training_step == training_steps_max): checkpoint_path = os.path.join(FLAGS.train_dir, FLAGS.model_architecture + '.ckpt') tf.logging.info('Saving to "%s-%d"', checkpoint_path, training_step) saver.save(sess, checkpoint_path, global_step=training_step) set_size = audio_processor.set_size('testing') tf.logging.info('set_size=%d', set_size) total_accuracy = 0 total_conf_matrix = None for i in xrange(0, set_size, FLAGS.batch_size): test_fingerprints, test_ground_truth = audio_processor.get_data( FLAGS.batch_size, i, model_settings, 0.0, 0.0, 0, 'testing', sess) test_accuracy, conf_matrix = sess.run( [evaluation_step, confusion_matrix], feed_dict={ fingerprint_input: test_fingerprints, ground_truth_input: test_ground_truth, dropout_prob: 1.0 }) batch_size = min(FLAGS.batch_size, set_size - i) total_accuracy += (test_accuracy * batch_size) / set_size if total_conf_matrix is None: total_conf_matrix = conf_matrix else: total_conf_matrix += conf_matrix tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix)) tf.logging.info('Final test accuracy = %.1f%% (N=%d)' % (total_accuracy * 100, set_size))
def main(_): NUM_INPUTS = 4 NUM_CLASSES = 9 # the data, split between train and test sets x_train, y_train, x_test, y_test = generate_simulated_data() x_train = x_train.astype('uint8') x_test = x_test.astype('uint8') print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') # convert class vectors to binary class matrices y_train = y_train.astype('int64') y_test = y_test.astype('int64') tf.logging.set_verbosity(tf.logging.INFO) sess = tf.InteractiveSession() # Figure out the learning rates for each training phase. Since it's often # effective to have high learning rates at the start of training, followed by # lower levels towards the end, the number of steps and learning rates can be # specified as comma-separated lists to define the rate at each stage. For # example --how_many_training_steps=10000,3000 --learning_rate=0.001,0.0001 # will run 13,000 training loops in total, with a rate of 0.001 for the first # 10,000, and 0.0001 for the final 3,000. training_steps_list = list( map(int, FLAGS.how_many_training_steps.split(','))) learning_rates_list = list(map(float, FLAGS.learning_rate.split(','))) if len(training_steps_list) != len(learning_rates_list): raise Exception( '--how_many_training_steps and --learning_rate must be equal length ' 'lists, but are %d and %d long instead' % (len(training_steps_list), len(learning_rates_list))) input_placeholder = tf.placeholder(tf.float32, [None, NUM_INPUTS], name='graph_input') if FLAGS.quantize: input_min, input_max = 0, 256 graph_input = tf.fake_quant_with_min_max_args(input_placeholder, input_min, input_max) else: graph_input = input_placeholder logits, dropout_prob = models.create_three_fc_model(graph_input, NUM_INPUTS, 20, 20, NUM_CLASSES, is_training=True) # Define loss and optimizer ground_truth_input = tf.placeholder(tf.int64, [None], name='groundtruth_input') # Optionally we can add runtime checks to spot when NaNs or other symptoms of # numerical errors start occurring during training. control_dependencies = [] if FLAGS.check_nans: checks = tf.add_check_numerics_ops() control_dependencies = [checks] # Create the back propagation and training evaluation machinery in the graph. with tf.name_scope('cross_entropy'): cross_entropy_mean = tf.losses.sparse_softmax_cross_entropy( labels=ground_truth_input, logits=logits) if FLAGS.quantize: tf.contrib.quantize.create_training_graph(quant_delay=0) with tf.name_scope('train'), tf.control_dependencies(control_dependencies): learning_rate_input = tf.placeholder(tf.float32, [], name='learning_rate_input') train_step = tf.train.GradientDescentOptimizer( learning_rate_input).minimize(cross_entropy_mean) predicted_indices = tf.argmax(logits, 1) correct_prediction = tf.equal(predicted_indices, ground_truth_input) confusion_matrix = tf.confusion_matrix(ground_truth_input, predicted_indices, num_classes=NUM_CLASSES) evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) with tf.get_default_graph().name_scope('eval'): tf.summary.scalar('cross_entropy', cross_entropy_mean) tf.summary.scalar('accuracy', evaluation_step) global_step = tf.train.get_or_create_global_step() increment_global_step = tf.assign(global_step, global_step + 1) saver = tf.train.Saver(tf.global_variables()) # Merge all the summaries and write them out to /tmp/retrain_logs (by default) merged_summaries = tf.summary.merge_all(scope='eval') train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train', sess.graph) validation_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/validation') tf.global_variables_initializer().run() start_step = 1 tf.logging.info('Training from step: %d ', start_step) # Save graph.pbtxt. tf.train.write_graph(sess.graph_def, FLAGS.train_dir, FLAGS.model_architecture + '.pbtxt') # Training loop. training_steps_max = np.sum(training_steps_list) for training_step in xrange(start_step, training_steps_max + 1): # Figure out what the current learning rate is. training_steps_sum = 0 for i in range(len(training_steps_list)): training_steps_sum += training_steps_list[i] if training_step <= training_steps_sum: learning_rate_value = learning_rates_list[i] break # Pull the audio samples we'll use for training. index = (training_step * FLAGS.batch_size) % x_train.shape[0] train_fingerprints = x_train[index:index + FLAGS.batch_size] train_ground_truth = y_train[index:index + FLAGS.batch_size] # Run the graph with this batch of training data. train_summary, train_accuracy, cross_entropy_value, _, _ = sess.run( [ merged_summaries, evaluation_step, cross_entropy_mean, train_step, increment_global_step, ], feed_dict={ graph_input: train_fingerprints, ground_truth_input: train_ground_truth, learning_rate_input: learning_rate_value, dropout_prob: 0.5 }) train_writer.add_summary(train_summary, training_step) tf.logging.info( 'Step #%d: rate %f, accuracy %.1f%%, cross entropy %f' % (training_step, learning_rate_value, train_accuracy * 100, cross_entropy_value)) is_last_step = (training_step == training_steps_max) if (training_step % FLAGS.eval_step_interval) == 0 or is_last_step: set_size = y_test.shape[0] total_accuracy = 0 total_conf_matrix = None for i in xrange(0, set_size, FLAGS.batch_size): validation_fingerprints = x_test[i:i + FLAGS.batch_size] validation_ground_truth = y_test[i:i + FLAGS.batch_size] # Run a validation step and capture training summaries for TensorBoard # with the `merged` op. validation_summary, validation_accuracy, conf_matrix = sess.run( [merged_summaries, evaluation_step, confusion_matrix], feed_dict={ graph_input: validation_fingerprints, ground_truth_input: validation_ground_truth, dropout_prob: 1.0 }) validation_writer.add_summary(validation_summary, training_step) batch_size = min(FLAGS.batch_size, set_size - i) total_accuracy += (validation_accuracy * batch_size) / set_size if total_conf_matrix is None: total_conf_matrix = conf_matrix else: total_conf_matrix += conf_matrix tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix)) tf.logging.info('Step %d: Validation accuracy = %.1f%% (N=%d)' % (training_step, total_accuracy * 100, set_size)) # Save the model checkpoint periodically. if (training_step % FLAGS.save_step_interval == 0 or training_step == training_steps_max): checkpoint_path = os.path.join(FLAGS.train_dir, FLAGS.model_architecture + '.ckpt') tf.logging.info('Saving to "%s-%d"', checkpoint_path, training_step) saver.save(sess, checkpoint_path, global_step=training_step)
x = np.random.random((1, 3)) y = np.random.random((1, 3, 3)) model.train_on_batch(x, y) model.predict(x) # Save tf.keras model in HDF5 format. keras_file = "keras_model.h5" tf.keras.models.save_model(model, keras_file) # Convert to TensorFlow Lite model. converter = tf.lite.TFLiteConverter.from_keras_model_file(keras_file) tflite_model = converter.convert() open("converted_model.tflite", "wb").write(tflite_model) img = tf.placeholder(name="img", dtype=tf.float32, shape=(1, 64, 64, 3)) const = tf.constant([1., 2., 3.]) + tf.constant([1., 4., 4.]) val = img + const out = tf.fake_quant_with_min_max_args(val, min=0., max=1., name="output") with tf.Session() as sess: converter = tf.lite.TFLiteConverter.from_session(sess, [img], [out]) converter.inference_type = tf.lite.constants.QUANTIZED_UINT8 input_arrays = converter.get_input_arrays() converter.quantized_input_stats = { input_arrays[0]: (0., 1.) } # mean, std_dev tflite_model = converter.convert() open("converted_model.tflite", "wb").write(tflite_model) # Load TFLite model and allocate tensors. interpreter = tf.lite.Interpreter(model_path="converted_model.tflite") interpreter.allocate_tensors() # Get input and output tensors. input_details = interpreter.get_input_details()