def __call__(self, image_input, training=False, keep_prob=1.0): """ Runs the CNN producing the embeddings and the gradients. :param image_input: Image input to produce embeddings for. [batch_size, image_size, image_size, 1] :param training: A flag indicating training or evaluation :param keep_prob: A tf placeholder of type tf.float32 indicating the amount of dropout applied :return: Embeddings of size [batch_size, 2048] """ weight_decay = FLAGS.weight_decay activation_fn = tf.nn.relu end_points = {} with tf.variable_scope('NAS', reuse=self.reuse) as scope: with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=training): with slim.arg_scope( nasnet.nasnet_large_arg_scope( weight_decay=FLAGS.weight_decay)): _, endpoints = nasnet.build_nasnet_large( image_input, num_classes=None, is_training=training, final_endpoint='global_pool') feature_map = endpoints['Cell_17'] print(feature_map.shape) self.reuse = True return feature_map
def testNoAuxHeadLargeModel(self): batch_size = 5 height, width = 331, 331 num_classes = 1000 for use_aux_head in (True, False): tf.reset_default_graph() inputs = tf.random_uniform((batch_size, height, width, 3)) tf.train.create_global_step() with slim.arg_scope(nasnet.nasnet_large_arg_scope()): _, end_points = nasnet.build_nasnet_large(inputs, num_classes, use_aux_head=use_aux_head) self.assertEqual('AuxLogits' in end_points, use_aux_head)
def testBuildPreLogitsLargeModel(self): batch_size = 5 height, width = 331, 331 num_classes = None inputs = tf.random_uniform((batch_size, height, width, 3)) tf.train.create_global_step() with slim.arg_scope(nasnet.nasnet_large_arg_scope()): net, end_points = nasnet.build_nasnet_large(inputs, num_classes) self.assertFalse('AuxLogits' in end_points) self.assertFalse('Predictions' in end_points) self.assertTrue(net.op.name.startswith('final_layer/Mean')) self.assertListEqual(net.get_shape().as_list(), [batch_size, 4032])
def testOverrideHParamsLargeModel(self): batch_size = 5 height, width = 331, 331 num_classes = 1000 inputs = tf.random_uniform((batch_size, height, width, 3)) tf.train.create_global_step() config = nasnet.large_imagenet_config() config.set_hparam('data_format', 'NCHW') with slim.arg_scope(nasnet.nasnet_large_arg_scope()): _, end_points = nasnet.build_nasnet_large( inputs, num_classes, config=config) self.assertListEqual( end_points['Stem'].shape.as_list(), [batch_size, 336, 42, 42])
def testNoAuxHeadLargeModel(self): batch_size = 5 height, width = 331, 331 num_classes = 1000 for use_aux_head in (True, False): tf.reset_default_graph() inputs = tf.random_uniform((batch_size, height, width, 3)) tf.train.create_global_step() config = nasnet.large_imagenet_config() config.set_hparam('use_aux_head', int(use_aux_head)) with slim.arg_scope(nasnet.nasnet_large_arg_scope()): _, end_points = nasnet.build_nasnet_large(inputs, num_classes, config=config) self.assertEqual('AuxLogits' in end_points, use_aux_head)
def testNoAuxHeadLargeModel(self): batch_size = 5 height, width = 331, 331 num_classes = 1000 for use_aux_head in (True, False): tf.compat.v1.reset_default_graph() inputs = tf.random.uniform((batch_size, height, width, 3)) tf.compat.v1.train.create_global_step() config = nasnet.large_imagenet_config() config.set_hparam('use_aux_head', int(use_aux_head)) with slim.arg_scope(nasnet.nasnet_large_arg_scope()): _, end_points = nasnet.build_nasnet_large(inputs, num_classes, config=config) self.assertEqual('AuxLogits' in end_points, use_aux_head)
def nasnet_large_arg_scope_for_detection(is_batch_norm_training=False): """Defines the default arg scope for the NASNet-A Large for object detection. This provides a small edit to switch batch norm training on and off. Args: is_batch_norm_training: Boolean indicating whether to train with batch norm. Returns: An `arg_scope` to use for the NASNet Large Model. """ imagenet_scope = nasnet.nasnet_large_arg_scope() with arg_scope(imagenet_scope): with arg_scope([slim.batch_norm], is_training=is_batch_norm_training) as sc: return sc
def nasnet_large(inputs, is_training, opts): with slim.arg_scope(nasnet.nasnet_large_arg_scope( weight_decay=opts.weight_decay, batch_norm_decay=opts.batch_norm_decay, batch_norm_epsilon=opts.batch_norm_epsilon)): config = nasnet.large_imagenet_config() config.set_hparam('dense_dropout_keep_prob', opts.dropout_keep_prob) config.set_hparam('use_aux_head', int(opts.create_aux_logits)) return nasnet.build_nasnet_large( inputs, num_classes=opts.num_classes, is_training=is_training, config=config)
def testBuildLogitsLargeModel(self): batch_size = 5 height, width = 331, 331 num_classes = 1000 inputs = tf.random_uniform((batch_size, height, width, 3)) tf.train.create_global_step() with slim.arg_scope(nasnet.nasnet_large_arg_scope()): logits, end_points = nasnet.build_nasnet_large(inputs, num_classes) auxlogits = end_points['AuxLogits'] predictions = end_points['Predictions'] self.assertListEqual(auxlogits.get_shape().as_list(), [batch_size, num_classes]) self.assertListEqual(logits.get_shape().as_list(), [batch_size, num_classes]) self.assertListEqual(predictions.get_shape().as_list(), [batch_size, num_classes])
def testAllEndPointsShapesLargeModel(self): batch_size = 5 height, width = 331, 331 num_classes = 1000 inputs = tf.random.uniform((batch_size, height, width, 3)) tf.compat.v1.train.create_global_step() with slim.arg_scope(nasnet.nasnet_large_arg_scope()): _, end_points = nasnet.build_nasnet_large(inputs, num_classes) endpoints_shapes = { 'Stem': [batch_size, 42, 42, 336], 'Cell_0': [batch_size, 42, 42, 1008], 'Cell_1': [batch_size, 42, 42, 1008], 'Cell_2': [batch_size, 42, 42, 1008], 'Cell_3': [batch_size, 42, 42, 1008], 'Cell_4': [batch_size, 42, 42, 1008], 'Cell_5': [batch_size, 42, 42, 1008], 'Cell_6': [batch_size, 21, 21, 2016], 'Cell_7': [batch_size, 21, 21, 2016], 'Cell_8': [batch_size, 21, 21, 2016], 'Cell_9': [batch_size, 21, 21, 2016], 'Cell_10': [batch_size, 21, 21, 2016], 'Cell_11': [batch_size, 21, 21, 2016], 'Cell_12': [batch_size, 11, 11, 4032], 'Cell_13': [batch_size, 11, 11, 4032], 'Cell_14': [batch_size, 11, 11, 4032], 'Cell_15': [batch_size, 11, 11, 4032], 'Cell_16': [batch_size, 11, 11, 4032], 'Cell_17': [batch_size, 11, 11, 4032], 'Reduction_Cell_0': [batch_size, 21, 21, 1344], 'Reduction_Cell_1': [batch_size, 11, 11, 2688], 'global_pool': [batch_size, 4032], # Logits and predictions 'AuxLogits': [batch_size, num_classes], 'Logits': [batch_size, num_classes], 'Predictions': [batch_size, num_classes] } self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys()) for endpoint_name in endpoints_shapes: tf.compat.v1.logging.info( 'Endpoint name: {}'.format(endpoint_name)) expected_shape = endpoints_shapes[endpoint_name] self.assertTrue(endpoint_name in end_points) self.assertListEqual( end_points[endpoint_name].get_shape().as_list(), expected_shape)
def _extract_proposal_features(self, preprocessed_inputs, scope): """Extracts first stage RPN features. Extracts features using the first half of the NASNet network. We construct the network in `align_feature_maps=True` mode, which means that all VALID paddings in the network are changed to SAME padding so that the feature maps are aligned. Args: preprocessed_inputs: A [batch, height, width, channels] float32 tensor representing a batch of images. scope: A scope name. Returns: rpn_feature_map: A tensor with shape [batch, height, width, depth] Raises: ValueError: If the created network is missing the required activation. """ del scope if len(preprocessed_inputs.get_shape().as_list()) != 4: raise ValueError('`preprocessed_inputs` must be 4 dimensional, got a ' 'tensor of shape %s' % preprocessed_inputs.get_shape()) with slim.arg_scope(nasnet.nasnet_large_arg_scope()): _, end_points = nasnet.build_nasnet_large( preprocessed_inputs, num_classes=None, is_training=self._is_training, is_batchnorm_training=self._train_batch_norm, final_endpoint='Cell_11') # Note that both 'Cell_10' and 'Cell_11' have equal depth = 2016. rpn_feature_map = tf.concat([end_points['Cell_10'], end_points['Cell_11']], 3) # nasnet.py does not maintain the batch size in the first dimension. # This work around permits us retaining the batch for below. batch = preprocessed_inputs.get_shape().as_list()[0] shape_without_batch = rpn_feature_map.get_shape().as_list()[1:] rpn_feature_map_shape = [batch] + shape_without_batch rpn_feature_map.set_shape(rpn_feature_map_shape) return rpn_feature_map
def create(size): inputs = tf.placeholder(tf.float32, shape=(None, size, size, 3), name="InputHolder") tf.train.get_or_create_global_step() with slim.arg_scope(nasnet.nasnet_large_arg_scope()): logits, end_points = nasnet.build_nasnet_large(inputs, 1001, is_training=False) saver = tf.train.Saver() with tf.Session() as sess: saver.restore(sess, 'pretrained/large/model.ckpt') pb_visual_writer = tf.summary.FileWriter('training') pb_visual_writer.add_graph(sess.graph) saver.save(sess, "pretrained/large/nasnet_dp.ckpt")
def testAllEndPointsShapesLargeModel(self): batch_size = 5 height, width = 331, 331 num_classes = 1000 inputs = tf.random_uniform((batch_size, height, width, 3)) tf.train.create_global_step() with slim.arg_scope(nasnet.nasnet_large_arg_scope()): _, end_points = nasnet.build_nasnet_large(inputs, num_classes) endpoints_shapes = {'Stem': [batch_size, 42, 42, 336], 'Cell_0': [batch_size, 42, 42, 1008], 'Cell_1': [batch_size, 42, 42, 1008], 'Cell_2': [batch_size, 42, 42, 1008], 'Cell_3': [batch_size, 42, 42, 1008], 'Cell_4': [batch_size, 42, 42, 1008], 'Cell_5': [batch_size, 42, 42, 1008], 'Cell_6': [batch_size, 21, 21, 2016], 'Cell_7': [batch_size, 21, 21, 2016], 'Cell_8': [batch_size, 21, 21, 2016], 'Cell_9': [batch_size, 21, 21, 2016], 'Cell_10': [batch_size, 21, 21, 2016], 'Cell_11': [batch_size, 21, 21, 2016], 'Cell_12': [batch_size, 11, 11, 4032], 'Cell_13': [batch_size, 11, 11, 4032], 'Cell_14': [batch_size, 11, 11, 4032], 'Cell_15': [batch_size, 11, 11, 4032], 'Cell_16': [batch_size, 11, 11, 4032], 'Cell_17': [batch_size, 11, 11, 4032], 'Reduction_Cell_0': [batch_size, 21, 21, 1344], 'Reduction_Cell_1': [batch_size, 11, 11, 2688], 'global_pool': [batch_size, 4032], # Logits and predictions 'AuxLogits': [batch_size, num_classes], 'Logits': [batch_size, num_classes], 'Predictions': [batch_size, num_classes]} self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys()) for endpoint_name in endpoints_shapes: tf.logging.info('Endpoint name: {}'.format(endpoint_name)) expected_shape = endpoints_shapes[endpoint_name] self.assertTrue(endpoint_name in end_points) self.assertListEqual(end_points[endpoint_name].get_shape().as_list(), expected_shape)
def write_weights(path, nas_type): checkpoints_dir = os.path.join( path, 'checkpoints', 'NASNet-A_Large_331' if nas_type == 'large' else 'NASNet-A_Mobile_224') print('checkpoints_dir', checkpoints_dir) weights_dir = os.path.join( path, 'weights', 'NASNet-A_Large_331' if nas_type == 'large' else 'NASNet-A_Mobile_224') print('weights_dir', weights_dir) # download model file_checkpoint = os.path.join(checkpoints_dir, 'model.ckpt.index') if not tf.gfile.Exists(file_checkpoint): tf.gfile.MakeDirs(checkpoints_dir) dataset_utils.download_and_uncompress_tarball(url, checkpoints_dir) file_checkpoint = os.path.join(checkpoints_dir, 'model.ckpt') with tf.Graph().as_default(): # Create model architecture image_size = 224 if nas_type == 'mobile' else 331 print('image_size', image_size) num_classes = 1001 inputs_np = np.ones((1, image_size, image_size, 3), dtype=np.float32) #inputs_np = np.load(weights_dir + '/input.npy') print('input', inputs_np.shape) inputs = tf.constant(inputs_np, dtype=tf.float32) with slim.arg_scope(nasnet_mobile_arg_scope() if nas_type == 'mobile' else nasnet_large_arg_scope()): build_nasnet = getattr( nasnet, 'build_nasnet_mobile' if nas_type == 'mobile' else 'build_nasnet_large') logits, _ = build_nasnet(inputs, num_classes=num_classes, is_training=False) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: # Initialize model init_fn = slim.assign_from_checkpoint_fn( file_checkpoint, slim.get_model_variables()) init_fn(sess) # Display model variables for v in slim.get_model_variables(): print('name = {}, shape = {}'.format(v.name, v.get_shape())) # Create graph os.system("rm -rf logs") os.system("mkdir -p logs") writer = tf.summary.FileWriter('logs', graph=tf.get_default_graph()) # conv0 dump_conv2d(sess=sess, path=weights_dir, name='conv0') dump_bn(sess=sess, path=weights_dir, name='conv0_bn') # cell_stem dump_cell_stem_0(sess=sess, path=weights_dir, name='cell_stem_0') dump_cell_stem_1(sess=sess, path=weights_dir, name='cell_stem_1') num_normal_cells = nas_type == 'mobile' and 4 or 6 cell_id = 0 for i in range(3): dump_first_cell(sess=sess, path=weights_dir, name='cell_' + str(cell_id)) cell_id += 1 for _ in range(num_normal_cells - 1): dump_normal_cell(sess=sess, path=weights_dir, name='cell_' + str(cell_id)) cell_id += 1 if i < 2: dump_reduction_cell(sess=sess, path=weights_dir, name='reduction_cell_' + str(i)) else: dump_final_layer(sess, weights_dir, name='final_layer')
def pnasnet_large_arg_scope(weight_decay=4e-5, batch_norm_decay=0.9997, batch_norm_epsilon=0.001): """Default arg scope for the PNASNet Large ImageNet model.""" return nasnet.nasnet_large_arg_scope( weight_decay, batch_norm_decay, batch_norm_epsilon)
def _extract_box_classifier_features(self, proposal_feature_maps, scope): """Extracts second stage box classifier features. This function reconstructs the "second half" of the NASNet-A network after the part defined in `_extract_proposal_features`. Args: proposal_feature_maps: A 4-D float tensor with shape [batch_size * self.max_num_proposals, crop_height, crop_width, depth] representing the feature map cropped to each proposal. scope: A scope name. Returns: proposal_classifier_features: A 4-D float tensor with shape [batch_size * self.max_num_proposals, height, width, depth] representing box classifier features for each proposal. """ del scope # Note that we always feed into 2 layers of equal depth # where the first N channels corresponds to previous hidden layer # and the second N channels correspond to the final hidden layer. hidden_previous, hidden = tf.split(proposal_feature_maps, 2, axis=3) # Note that what follows is largely a copy of build_nasnet_large() within # nasnet.py. We are copying to minimize code pollution in slim. # TODO(shlens,skornblith): Determine the appropriate drop path schedule. # For now the schedule is the default (1.0->0.7 over 250,000 train steps). hparams = nasnet.large_imagenet_config() if not self._is_training: hparams.set_hparam('drop_path_keep_prob', 1.0) # Calculate the total number of cells in the network # -- Add 2 for the reduction cells. total_num_cells = hparams.num_cells + 2 # -- And add 2 for the stem cells for ImageNet training. total_num_cells += 2 normal_cell = nasnet_utils.NasNetANormalCell( hparams.num_conv_filters, hparams.drop_path_keep_prob, total_num_cells, hparams.total_training_steps) reduction_cell = nasnet_utils.NasNetAReductionCell( hparams.num_conv_filters, hparams.drop_path_keep_prob, total_num_cells, hparams.total_training_steps) with arg_scope([slim.dropout, nasnet_utils.drop_path], is_training=self._is_training): with arg_scope([slim.batch_norm], is_training=self._train_batch_norm): with arg_scope([ slim.avg_pool2d, slim.max_pool2d, slim.conv2d, slim.batch_norm, slim.separable_conv2d, nasnet_utils.factorized_reduction, nasnet_utils.global_avg_pool, nasnet_utils.get_channel_index, nasnet_utils.get_channel_dim ], data_format=hparams.data_format): # This corresponds to the cell number just past 'Cell_11' used by # by _extract_proposal_features(). start_cell_num = 12 # Note that this number equals: # start_cell_num + 2 stem cells + 1 reduction cell true_cell_num = 15 with slim.arg_scope(nasnet.nasnet_large_arg_scope()): net = _build_nasnet_base(hidden_previous, hidden, normal_cell=normal_cell, reduction_cell=reduction_cell, hparams=hparams, true_cell_num=true_cell_num, start_cell_num=start_cell_num) proposal_classifier_features = net return proposal_classifier_features
def write_weights(path): checkpoints_dir = os.path.join(path, 'checkpoints', 'NASNet-A_Large_331') print('checkpoints_dir', checkpoints_dir) weights_dir = os.path.join(path, 'weights', 'NASNet-A_Large_331') print('weights_dir', weights_dir) # download model file_checkpoint = os.path.join(checkpoints_dir, 'model.ckpt.index') if not tf.gfile.Exists(file_checkpoint): tf.gfile.MakeDirs(checkpoints_dir) dataset_utils.download_and_uncompress_tarball(url, checkpoints_dir) file_checkpoint = os.path.join(checkpoints_dir, 'model.ckpt') with tf.Graph().as_default(): # Create model architecture image_size = 331 print('image_size', image_size) inputs_np = np.ones((1, image_size, image_size, 3), dtype=np.float32) #inputs_np = np.load(weights_dir + '/input.npy') print('input', inputs_np.shape) inputs = tf.constant(inputs_np, dtype=tf.float32) with slim.arg_scope(nasnet_large_arg_scope()): logits, _ = nasnet.build_nasnet_large(inputs, num_classes=1001, is_training=False) with tf.Session() as sess: # Initialize model init_fn = slim.assign_from_checkpoint_fn(file_checkpoint, slim.get_model_variables()) init_fn(sess) # Display model variables for v in slim.get_model_variables(): print('name = {}, shape = {}'.format(v.name, v.get_shape())) # Create graph os.system("rm -rf logs") os.system("mkdir -p logs") writer = tf.summary.FileWriter('logs', graph=tf.get_default_graph()) # conv0 dump_conv2d(sess=sess, path=weights_dir, name='conv0') dump_bn(sess=sess, path=weights_dir, name='conv0_bn') # cell_stem dump_cell_stem_0(sess=sess, path=weights_dir, name='cell_stem_0') dump_cell_stem_1(sess=sess, path=weights_dir, name='cell_stem_1') dump_first_cell(sess=sess, path=weights_dir, name='cell_0') dump_normal_cell(sess=sess, path=weights_dir, name='cell_1') dump_normal_cell(sess=sess, path=weights_dir, name='cell_2') dump_normal_cell(sess=sess, path=weights_dir, name='cell_3') dump_normal_cell(sess=sess, path=weights_dir, name='cell_4') dump_normal_cell(sess=sess, path=weights_dir, name='cell_5') dump_reduction_cell(sess=sess, path=weights_dir, name='reduction_cell_0') dump_first_cell(sess=sess, path=weights_dir, name='cell_6') dump_normal_cell(sess=sess, path=weights_dir, name='cell_7') dump_normal_cell(sess=sess, path=weights_dir, name='cell_8') dump_normal_cell(sess=sess, path=weights_dir, name='cell_9') dump_normal_cell(sess=sess, path=weights_dir, name='cell_10') dump_normal_cell(sess=sess, path=weights_dir, name='cell_11') dump_reduction_cell(sess=sess, path=weights_dir, name='reduction_cell_1') dump_first_cell(sess=sess, path=weights_dir, name='cell_12') dump_normal_cell(sess=sess, path=weights_dir, name='cell_13') dump_normal_cell(sess=sess, path=weights_dir, name='cell_14') dump_normal_cell(sess=sess, path=weights_dir, name='cell_15') dump_normal_cell(sess=sess, path=weights_dir, name='cell_16') dump_normal_cell(sess=sess, path=weights_dir, name='cell_17') dump_final_layer(sess, weights_dir, name='final_layer')
def process(lines, split_index_list, output_file, model, image_size, batch_size): frame_ids = [] frame_id_to_path = {} frame_id_to_image_ids = {} image_id_to_path = {} image_id_to_coordinates = {} for i in range(len(split_index_list) - 1): frame_path = lines[split_index_list[i]].rstrip() frame_id = ntpath.basename(frame_path) frame_ids.append(frame_id) frame_id_to_path[frame_id] = frame_path num_images = split_index_list[i + 1] - split_index_list[i] - 1 image_ids = [] for j in range(num_images): line = lines[split_index_list[i] + j + 1] fields = line.rstrip().split("\t") image_path = fields[0] image_id = ntpath.basename(image_path) coordinates = fields[1] + "\t" + fields[2] + "\t" + fields[ 3] + "\t" + fields[4] image_path = os.path.join(IMAGES_FOLDER, image_id) image_id_to_path[image_id] = image_path image_id_to_coordinates[image_id] = coordinates image_ids.append(image_id) frame_id_to_image_ids[frame_id] = image_ids if (len(image_id_to_path) < batch_size and i + 1 < len(split_index_list) - 1) or len(frame_ids) == 0: continue print frame_id output = open(output_file, "a") tf.Graph().as_default() with tf.Session(graph=tf.Graph()) as sess: if model == 'inception_v1': with slim.arg_scope(inception.inception_v1_arg_scope()): process_super_batch(frame_ids, frame_id_to_path, frame_id_to_image_ids, \ image_id_to_path, image_id_to_coordinates, \ output, model, image_size, sess) elif model == 'inception_v2': with slim.arg_scope(inception.inception_v2_arg_scope()): process_super_batch(frame_ids, frame_id_to_path, frame_id_to_image_ids, \ image_id_to_path, image_id_to_coordinates, \ output, model, image_size, sess) elif model == 'inception_v3': with slim.arg_scope(inception.inception_v3_arg_scope()): process_super_batch(frame_ids, frame_id_to_path, frame_id_to_image_ids, \ image_id_to_path, image_id_to_coordinates, \ output, model, image_size, sess) elif model == 'inception_v4': with slim.arg_scope(inception.inception_v4_arg_scope()): process_super_batch(frame_ids, frame_id_to_path, frame_id_to_image_ids, \ image_id_to_path, image_id_to_coordinates, \ output, model, image_size, sess) elif model == 'resnet_v1_50' or model == 'resnet_v1_101' or model == 'resnet_v1_152': with slim.arg_scope(resnet_v1.resnet_arg_scope()): process_super_batch(frame_ids, frame_id_to_path, frame_id_to_image_ids, \ image_id_to_path, image_id_to_coordinates, \ output, model, image_size, sess) elif model == 'mobilenet_v1_0.25_128' or model == 'mobilenet_v1_0.50_160' or \ model == 'mobilenet_v1_1.0_224': with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope( is_training=False)): process_super_batch(frame_ids, frame_id_to_path, frame_id_to_image_ids, \ image_id_to_path, image_id_to_coordinates, \ output, model, image_size, sess) elif model == 'inception_resnet_v2': with slim.arg_scope( inception_resnet_v2.inception_resnet_v2_arg_scope()): process_super_batch(frame_ids, frame_id_to_path, frame_id_to_image_ids, \ image_id_to_path, image_id_to_coordinates, \ output, model, image_size, sess) elif model == 'nasnet_mobile': with slim.arg_scope(nasnet.nasnet_mobile_arg_scope()): process_super_batch(frame_ids, frame_id_to_path, frame_id_to_image_ids, \ image_id_to_path, image_id_to_coordinates, \ output, model, image_size, sess) elif model == 'nasnet_large': with slim.arg_scope(nasnet.nasnet_large_arg_scope()): process_super_batch(frame_ids, frame_id_to_path, frame_id_to_image_ids, \ image_id_to_path, image_id_to_coordinates, \ output, model, image_size, sess) elif model == 'vgg_16': with slim.arg_scope(vgg.vgg_arg_scope()): process_super_batch(frame_ids, frame_id_to_path, frame_id_to_image_ids, \ image_id_to_path, image_id_to_coordinates, \ output, model, image_size, sess) output.close() frame_ids = [] frame_id_to_path = {} frame_id_to_image_ids = {} image_id_to_path = {} image_id_to_coordinates = {}
def run_training(path_db, pid, category, task_id, path_unknown, pretrained_dir, tensorflow_dir, path_save, num_epochs=1000, batch_size=32, finetune_last_layer=False, data_augmentation=True, mix_up=False, network_model='inception-v3', restore_all_parameters=False, initial_learning_rate=0.0002, learning_rate_decay_factor=0.7, num_epochs_before_decay=2): ##### start parameters for creating TFRecord files ##### #validation_size = 0.1 validation_size = 0.0 num_shards = 2 random_seed = 0 ##### end parameters for creating TFRecord files ##### dataset_dir = os.path.join(path_db, pid, category) log_dir = path_save tfrecord_filename = pid + '_' + category if _dataset_exists(dataset_dir=dataset_dir, _NUM_SHARDS=num_shards, output_filename=tfrecord_filename): print('Dataset files already exist. Overwrite them.') photo_filenames, class_names = _get_filenames_and_classes( dataset_dir, path_unknown) # dictionary for class name and class ID class_names_to_ids = dict(zip(class_names, range(len(class_names)))) # number of validation examples num_validation = int(validation_size * len(photo_filenames)) # divide to training and validation data random.seed(random_seed) random.shuffle(photo_filenames) training_filenames = photo_filenames[num_validation:] validation_filenames = photo_filenames[:num_validation] # find available GPU ID gpu_id = gpu_utils.pick_gpu_lowest_memory() # if log directory does not exist, create log directory and dataset if not os.path.exists(log_dir): os.makedirs(log_dir) print('found lowest memory gpu id : ' + str(gpu_id)) _convert_dataset(gpu_id, 'train', training_filenames, class_names_to_ids, dataset_dir=dataset_dir, tfrecord_filename=tfrecord_filename, _NUM_SHARDS=num_shards) _convert_dataset(gpu_id, 'validation', validation_filenames, class_names_to_ids, dataset_dir=dataset_dir, tfrecord_filename=tfrecord_filename, _NUM_SHARDS=num_shards) labels_to_class_names = dict(zip(range(len(class_names)), class_names)) write_label_file(labels_to_class_names, dataset_dir) print('finished creating dataset ' + tfrecord_filename) # start training output_label_filepath = os.path.join(dataset_dir, 'labels.txt') if network_model!='inception-v4' and network_model!='inception-v3' and network_model!='resnet-v2-50' and network_model!='resnet-v2-152' and \ network_model!='vgg-16' and network_model!='mobilenet-v1' and network_model!='nasnet-large' and network_model!='nasnet-mobile': print("invalid network model : " + network_model) sys.exit() # find pretrained model if os.path.exists(os.path.join(log_dir, 'model.ckpt')): checkpoint_file = os.path.join(log_dir, 'model.ckpt') else: if network_model == 'inception-v4': checkpoint_file = os.path.join( pretrained_dir, 'inception_resnet_v2_2016_08_30.ckpt') elif network_model == 'inception-v3': checkpoint_file = os.path.join(pretrained_dir, 'inception_v3.ckpt') elif network_model == 'resnet-v2-50': checkpoint_file = os.path.join(pretrained_dir, 'resnet_v2_50.ckpt') elif network_model == 'resnet-v2-152': checkpoint_file = os.path.join(pretrained_dir, 'resnet_v2_152.ckpt') elif network_model == 'vgg-16': checkpoint_file = os.path.join(pretrained_dir, 'vgg_16.ckpt') elif network_model == 'mobilenet-v1': checkpoint_file = os.path.join(pretrained_dir, 'mobilenet_v1_1.0_224.ckpt') elif network_model == 'nasnet-large': checkpoint_file = os.path.join(pretrained_dir, 'nasnet-a_large_04_10_2017', 'model.ckpt') elif network_model == 'nasnet-mobile': checkpoint_file = os.path.join(pretrained_dir, 'nasnet-a_mobile_04_10_2017', 'model.ckpt') else: print("invalid network model : " + network_model) sys.exit() # set image size if network_model == 'inception-v4' or network_model == 'inception-v3' or network_model == 'resnet-v2-50' or network_model == 'resnet-v2-152': image_size = 299 elif network_model == 'vgg-16' or network_model == 'mobilenet-v1' or network_model == 'nasnet-mobile': image_size = 224 elif network_model == 'nasnet-large': image_size = 331 else: print("invalid network model : " + network_model) sys.exit() # create the file pattern of TFRecord files file_pattern = tfrecord_filename + '_%s_*.tfrecord' file_pattern_for_counting = tfrecord_filename labels_to_name, label_list = load_labels(output_label_filepath) num_classes = len(label_list) # create a dataset discription items_to_descriptions = { 'image': 'A 3-channel RGB coloured image that is either ' + ','.join(label_list), 'label': 'A label that is as such -- ' + ','.join([ str(key) + ':' + labels_to_name[key] for key in labels_to_name.keys() ]) } # start training with tf.Graph().as_default() as graph: tf.logging.set_verbosity(tf.logging.INFO) # create dataset and load one batch dataset = get_split('train', dataset_dir, file_pattern, file_pattern_for_counting, labels_to_name, num_classes, items_to_descriptions) images, _, labels = load_batch(dataset, batch_size=batch_size, data_augmentation=data_augmentation, mix_up=mix_up, height=image_size, width=image_size) # number of steps to take before decaying the learning rate and batches per epoch num_batches_per_epoch = int(dataset.num_samples / batch_size) num_steps_per_epoch = num_batches_per_epoch # because one step is one batch processed decay_steps = int(num_epochs_before_decay * num_steps_per_epoch) # create model for inference finetune_vars = [] if network_model == 'inception-v4': with slim.arg_scope(inception_resnet_v2_arg_scope()): logits, end_points = inception_resnet_v2( images, num_classes=dataset.num_classes, is_training=True) finetune_vars = [ 'InceptionResnetV2/Logits', 'InceptionResnetV2/AuxLogits' ] elif network_model == 'inception-v3': with slim.arg_scope(inception_v3_arg_scope()): logits, end_points = inception_v3( images, num_classes=dataset.num_classes, is_training=True) finetune_vars = ['InceptionV3/Logits', 'InceptionV3/AuxLogits'] elif network_model == 'resnet-v2-50': with slim.arg_scope(resnet_arg_scope()): logits, end_points = resnet_v2_50( images, num_classes=dataset.num_classes, is_training=True) finetune_vars = ['resnet_v2_50/logits'] elif network_model == 'resnet-v2-152': with slim.arg_scope(resnet_arg_scope()): logits, end_points = resnet_v2_152( images, num_classes=dataset.num_classes, is_training=True) finetune_vars = ['resnet_v2_152/logits'] elif network_model == 'vgg-16': with slim.arg_scope(vgg_arg_scope()): logits, _ = vgg_16(images, num_classes=dataset.num_classes, is_training=True) finetune_vars = ['vgg_16/fc8'] elif network_model == 'mobilenet-v1': with slim.arg_scope(mobilenet_v1_arg_scope()): logits, end_points = mobilenet_v1( images, num_classes=dataset.num_classes, is_training=True) finetune_vars = ['MobilenetV1/Logits'] elif network_model == 'nasnet-large': with slim.arg_scope(nasnet.nasnet_large_arg_scope()): logits, end_points = nasnet.build_nasnet_large( images, dataset.num_classes) finetune_vars = [ 'final_layer', 'aux_11', 'cell_stem_0/comb_iter_0/left/global_step' ] elif network_model == 'nasnet-mobile': with slim.arg_scope(nasnet.nasnet_mobile_arg_scope()): logits, end_points = nasnet.build_nasnet_mobile( images, dataset.num_classes) finetune_vars = ['final_layer', 'aux_7'] else: print("Invalid network model : " + network_model) sys.exit() # define the scopes that you want to exclude for restoration exclude = [] if not restore_all_parameters: exclude = finetune_vars variables_to_restore = slim.get_variables_to_restore(exclude=exclude) if mix_up: labels.set_shape([batch_size, dataset.num_classes]) logits.set_shape([batch_size, dataset.num_classes]) loss = tf.losses.sigmoid_cross_entropy(labels, logits) else: # perform one-hot-encoding of the labels (Try one-hot-encoding within the load_batch function!) one_hot_labels = slim.one_hot_encoding(labels, dataset.num_classes) # performs the equivalent to tf.nn.sparse_softmax_cross_entropy_with_logits but enhanced with checks loss = tf.losses.softmax_cross_entropy( onehot_labels=one_hot_labels, logits=logits) total_loss = tf.losses.get_total_loss( ) #obtain the regularization losses as well # create the global step for monitoring the learning_rate and training. global_step = tf.train.get_or_create_global_step() # define your exponentially decaying learning rate lr = tf.train.exponential_decay(learning_rate=initial_learning_rate, global_step=global_step, decay_steps=decay_steps, decay_rate=learning_rate_decay_factor, staircase=True) # define optimizer optimizer = tf.train.AdamOptimizer(learning_rate=lr) # create train_op if finetune_last_layer: variables_to_train = get_variables_to_train_by_scopes( finetune_vars) print("finetune variables : " + str(variables_to_train)) train_op = slim.learning.create_train_op( total_loss, optimizer, variables_to_train=variables_to_train) else: train_op = slim.learning.create_train_op(total_loss, optimizer) # define prediction matrix if network_model=='inception-v4' or network_model=='inception-v3' or network_model=='mobilenet-v1' or \ network_model=='nasnet-large' or network_model=='nasnet-mobile': predictions = tf.argmax(end_points['Predictions'], 1) probabilities = end_points['Predictions'] elif network_model == 'resnet-v2-50' or network_model == 'resnet-v2-152': predictions = tf.argmax(end_points['predictions'], 1) probabilities = end_points['predictions'] elif network_model == 'vgg-16': predictions = tf.argmax(logits, 1) probabilities = tf.nn.softmax(logits) else: print("Invalid network model : " + network_model) sys.exit() if mix_up: argmax_labels = tf.argmax(labels, 1) accuracy, accuracy_update = tf.contrib.metrics.streaming_accuracy( predictions, argmax_labels) else: accuracy, accuracy_update = tf.contrib.metrics.streaming_accuracy( predictions, labels) metrics_op = tf.group(accuracy_update, probabilities) # create summaries tf.summary.scalar('losses/Total_Loss', total_loss) tf.summary.scalar('accuracy', accuracy) tf.summary.scalar('learning_rate', lr) my_summary_op = tf.summary.merge_all() # defube training step function that runs both the train_op, metrics_op and updates the global_step concurrently def train_step(sess, train_op, global_step): # check the time for each sess run start_time = time.time() total_loss, global_step_count, _ = sess.run( [train_op, global_step, metrics_op]) time_elapsed = time.time() - start_time # run the logging to print some results logging.info('global step %s: loss: %.4f (%.2f sec/step)', global_step_count, total_loss, time_elapsed) return total_loss, int(global_step_count) # create a saver function that actually restores the variables from a checkpoint file saver = tf.train.Saver(variables_to_restore) def restore_fn(sess): return saver.restore(sess, checkpoint_file) # define your supervisor for running a managed session sv = tf.train.Supervisor(logdir=log_dir, summary_op=None, init_fn=restore_fn) # run the managed session start_train_time = time.time() gpu_options = tf.ConfigProto( gpu_options=tf.GPUOptions(visible_device_list=str(gpu_id), per_process_gpu_memory_fraction=0.4)) with sv.prepare_or_wait_for_session(config=gpu_options) as sess: for step in range(num_steps_per_epoch * num_epochs): # check if training task is not canceled if not controller.check_train_task_alive( pid, category, task_id): print('Training task is canceled.') sv.stop() return False, "", "", output_label_filepath, global_step_count # at the start of every epoch, show the vital information: if step % num_batches_per_epoch == 0: logging.info('Epoch %s/%s', step / num_batches_per_epoch + 1, num_epochs) learning_rate_value, accuracy_value = sess.run( [lr, accuracy]) logging.info('Current Learning Rate: %s', learning_rate_value) logging.info('Current Streaming Accuracy: %s', accuracy_value) # optionally, print your logits and predictions for a sanity check that things are going fine. logits_value, probabilities_value, predictions_value, labels_value = sess.run( [logits, probabilities, predictions, labels]) print('logits: \n', logits_value) print('Probabilities: \n', probabilities_value) print('predictions: \n', predictions_value) print('Labels:\n:', labels_value) # log the summaries every 10 step. if step % 10 == 0: loss, global_step_count = train_step( sess, train_op, sv.global_step) summaries = sess.run(my_summary_op) sv.summary_computed(sess, summaries) # if not, simply run the training step else: loss, global_step_count = train_step( sess, train_op, sv.global_step) # if specific time passes, save model for evaluation time_elapsed_train = time.time() - start_train_time print('training time : ' + str(time_elapsed_train)) # log the final training loss and accuracy logging.info( 'Training Progress : %.2f %% ', 100.0 * step / float(num_steps_per_epoch * num_epochs)) logging.info('Final Loss: %s', loss) logging.info('Global Step: %s', global_step_count) logging.info('Final Accuracy: %s', sess.run(accuracy)) # after all the training has been done, save the log files and checkpoint model logging.info('Finished training! Saving model to disk now.') sv.saver.save(sess, sv.save_path, global_step=sv.global_step) # save graph definition file output_graph_filepath = os.path.join(log_dir, 'graph.pb') export_graph_command_exec = "./network/export_slim_graph.py" if not os.path.exists(export_graph_command_exec): print("fatal error, cannot find command : " + export_graph_command_exec) sys.exit() export_graph_command_env = os.environ.copy() export_graph_command_env["CUDA_VISIBLE_DEVICES"] = '' export_graph_command = [] export_graph_command.append(sys.executable) export_graph_command.append(export_graph_command_exec) export_graph_command.append(network_model) export_graph_command.append(str(dataset.num_classes)) export_graph_command.append(output_graph_filepath) print("start exec:" + " ".join(export_graph_command)) proc = subprocess.Popen(export_graph_command, env=export_graph_command_env) print("export graph process ID=" + str(proc.pid)) controller.upsert_train_child_process(task_id, proc.pid) proc.communicate() controller.delete_train_child_process(task_id, proc.pid) print("finish exec:" + " ".join(export_graph_command)) if not controller.check_train_task_alive(pid, category, task_id): print('Training task is canceled.') sv.stop() return False, "", "", output_label_filepath, global_step_count # save frozon graph, optimized graph, and quantized graph from graph definition and checkpoint latest_checkpoint_filepath = tf.train.latest_checkpoint(log_dir) # you can check output node name by tensorflow/tools/graph_transforms::summarize_graph # https://github.com/tensorflow/models/tree/master/research/slim#Export output_node_names = "" if network_model == 'inception-v4': output_node_names = "InceptionResnetV2/Logits/Predictions" elif network_model == 'inception-v3': output_node_names = "InceptionV3/AuxLogits/SpatialSqueeze,InceptionV3/Predictions/Reshape_1" elif network_model == 'resnet-v2-50': output_node_names = "resnet_v2_50/predictions/Reshape_1" elif network_model == 'resnet-v2-152': output_node_names = "resnet_v2_152/predictions/Reshape_1" elif network_model == 'vgg-16': output_node_names = "vgg_16/fc8/squeezed" elif network_model == 'mobilenet-v1': output_node_names = "MobilenetV1/Predictions/Reshape_1" elif network_model == 'nasnet-large' or network_model == 'nasnet-mobile': output_node_names = "final_layer/predictions" else: print("Invalid network model : " + network_model) sys.exit() output_frozen_graph_filepath = os.path.join( log_dir, 'frozen_graph.pb') freeze_graph_command_exec = os.path.join( tensorflow_dir, "bazel-bin/tensorflow/python/tools/freeze_graph") if not os.path.exists(freeze_graph_command_exec): print("fatal error, cannot find command : " + freeze_graph_command_exec) sys.exit() freeze_graph_command_env = os.environ.copy() freeze_graph_command_env["CUDA_VISIBLE_DEVICES"] = '' freeze_graph_command = [] freeze_graph_command.append(freeze_graph_command_exec) freeze_graph_command.append("--input_graph=" + output_graph_filepath) freeze_graph_command.append("--input_checkpoint=" + latest_checkpoint_filepath) freeze_graph_command.append("--input_binary=true") freeze_graph_command.append("--output_graph=" + output_frozen_graph_filepath) freeze_graph_command.append("--output_node_names=" + output_node_names) print("start exec:" + " ".join(freeze_graph_command)) proc = subprocess.Popen(freeze_graph_command, env=freeze_graph_command_env) print("freeze graph process ID=" + str(proc.pid)) controller.upsert_train_child_process(task_id, proc.pid) proc.communicate() controller.delete_train_child_process(task_id, proc.pid) print("finish exec:" + " ".join(freeze_graph_command)) if not controller.check_train_task_alive(pid, category, task_id): print('Training task is canceled.') sv.stop() return False, "", "", output_label_filepath, global_step_count output_optimized_graph_filepath = os.path.join( log_dir, 'optimized_graph.pb') optimize_graph_command_exec = os.path.join( tensorflow_dir, "bazel-bin/tensorflow/python/tools/optimize_for_inference") if not os.path.exists(optimize_graph_command_exec): print("fatal error, cannot find command : " + optimize_graph_command_exec) sys.exit() optimize_graph_command_env = os.environ.copy() optimize_graph_command_env["CUDA_VISIBLE_DEVICES"] = '' optimize_graph_command = [] optimize_graph_command.append(optimize_graph_command_exec) optimize_graph_command.append("--input=" + output_frozen_graph_filepath) optimize_graph_command.append("--output=" + output_optimized_graph_filepath) optimize_graph_command.append("--input_names=input") optimize_graph_command.append("--output_names=" + output_node_names) optimize_graph_command.append("--frozen_graph=true") print("start exec:" + " ".join(optimize_graph_command)) proc = subprocess.Popen(optimize_graph_command, env=optimize_graph_command_env) print("optimize graph process ID=" + str(proc.pid)) controller.upsert_train_child_process(task_id, proc.pid) proc.communicate() controller.delete_train_child_process(task_id, proc.pid) print("finish exec:" + " ".join(optimize_graph_command)) if not controller.check_train_task_alive(pid, category, task_id): print('Training task is canceled.') sv.stop() return False, "", "", output_label_filepath, global_step_count output_quantized_graph_filepath = os.path.join( log_dir, 'quantized_graph.pb') quantize_graph_command_exec = os.path.join( tensorflow_dir, "bazel-bin/tensorflow/tools/quantization/quantize_graph") if not os.path.exists(quantize_graph_command_exec): print("fatal error, cannot find command : " + quantize_graph_command_exec) sys.exit() quantize_graph_command_env = os.environ.copy() quantize_graph_command_env["CUDA_VISIBLE_DEVICES"] = '' quantize_graph_command = [] quantize_graph_command.append(quantize_graph_command_exec) quantize_graph_command.append("--input=" + output_optimized_graph_filepath) quantize_graph_command.append("--output=" + output_quantized_graph_filepath) quantize_graph_command.append("--input_node_names=input") quantize_graph_command.append("--output_node_names=" + output_node_names) quantize_graph_command.append("--mode=eightbit") print("start exec:" + " ".join(quantize_graph_command)) proc = subprocess.Popen(quantize_graph_command, env=quantize_graph_command_env) print("quantize graph process ID=" + str(proc.pid)) controller.upsert_train_child_process(task_id, proc.pid) proc.communicate() controller.delete_train_child_process(task_id, proc.pid) print("finish exec:" + " ".join(quantize_graph_command)) if not controller.check_train_task_alive(pid, category, task_id): print('Training task is canceled.') sv.stop() return False, "", "", output_label_filepath, global_step_count return True, output_optimized_graph_filepath, output_quantized_graph_filepath, output_label_filepath, global_step_count
def _extract_box_classifier_features(self, proposal_feature_maps, scope): """Extracts second stage box classifier features. This function reconstructs the "second half" of the NASNet-A network after the part defined in `_extract_proposal_features`. Args: proposal_feature_maps: A 4-D float tensor with shape [batch_size * self.max_num_proposals, crop_height, crop_width, depth] representing the feature map cropped to each proposal. scope: A scope name. Returns: proposal_classifier_features: A 4-D float tensor with shape [batch_size * self.max_num_proposals, height, width, depth] representing box classifier features for each proposal. """ del scope # Note that we always feed into 2 layers of equal depth # where the first N channels corresponds to previous hidden layer # and the second N channels correspond to the final hidden layer. hidden_previous, hidden = tf.split(proposal_feature_maps, 2, axis=3) # Note that what follows is largely a copy of build_nasnet_large() within # nasnet.py. We are copying to minimize code pollution in slim. # pylint: disable=protected-access hparams = nasnet._large_imagenet_config(is_training=self._is_training) # pylint: enable=protected-access # Calculate the total number of cells in the network # -- Add 2 for the reduction cells. total_num_cells = hparams.num_cells + 2 # -- And add 2 for the stem cells for ImageNet training. total_num_cells += 2 normal_cell = nasnet_utils.NasNetANormalCell( hparams.num_conv_filters, hparams.drop_path_keep_prob, total_num_cells, hparams.total_training_steps) reduction_cell = nasnet_utils.NasNetAReductionCell( hparams.num_conv_filters, hparams.drop_path_keep_prob, total_num_cells, hparams.total_training_steps) with arg_scope([slim.dropout, nasnet_utils.drop_path], is_training=self._is_training): with arg_scope([slim.batch_norm], is_training=self._train_batch_norm): with arg_scope([slim.avg_pool2d, slim.max_pool2d, slim.conv2d, slim.batch_norm, slim.separable_conv2d, nasnet_utils.factorized_reduction, nasnet_utils.global_avg_pool, nasnet_utils.get_channel_index, nasnet_utils.get_channel_dim], data_format=hparams.data_format): # This corresponds to the cell number just past 'Cell_11' used by # by _extract_proposal_features(). start_cell_num = 12 # Note that this number equals: # start_cell_num + 2 stem cells + 1 reduction cell true_cell_num = 15 with slim.arg_scope(nasnet.nasnet_large_arg_scope()): net = _build_nasnet_base(hidden_previous, hidden, normal_cell=normal_cell, reduction_cell=reduction_cell, hparams=hparams, true_cell_num=true_cell_num, start_cell_num=start_cell_num) proposal_classifier_features = net return proposal_classifier_features
def Train(input_dir, output_dir): # some parameter batch_shape = [batch_size, FLAGS.img_size, FLAGS.img_size, 3] with tf.Graph().as_default(): # Prepare graph train_img = tf.placeholder( tf.float32, shape=[None, FLAGS.img_size, FLAGS.img_size, 3]) train_label = tf.placeholder(tf.float32, shape=[None, 2]) with slim.arg_scope(nasnet.nasnet_large_arg_scope()): logits, end_points = nasnet.build_nasnet_large(train_img, num_classes=2, is_training=True) predict = tf.argmax(end_points['Predictions'], 1) logits = end_points['Logits'] cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=train_label, logits=logits)) learning_rate = tf.placeholder(tf.float32, name='learning_rate') optimizer = tf.train.GradientDescentOptimizer( learning_rate=learning_rate) train_step = optimizer.minimize(cost) accuracy = tf.reduce_mean( tf.cast(tf.equal(predict, tf.argmax(train_label, 1)), tf.float32)) # Run computation with tf.Session() as sess: sess.run(tf.global_variables_initializer()) if FineTune: from tensorflow.python import pywrap_tensorflow reader = pywrap_tensorflow.NewCheckpointReader(FLAGS.model) var_to_shape_map = reader.get_variable_to_shape_map() for key in var_to_shape_map: if 'logits' in key.lower(): print('tensor_name: ', key) exclusion = [ 'Logits/', 'final_layer/', 'aux_11/aux_logits', 'FC/' ] #去除class类里的描述 except_logitis = slim.get_variables_to_restore( exclude=exclusion) #print(except_logitis) init_fn = slim.assign_from_checkpoint_fn( FLAGS.model, except_logitis, ignore_missing_vars=True) init_fn(sess) saver = tf.train.Saver() else: saver = tf.train.Saver( slim.get_model_variables(scope=model_type)) saver.restore(sess, FLAGS.model) for time in range(epoch_num): train_acc = [] count = 0 train_loss = [] val_acc = [] val_loss = [] epoch_learning_rate = init_learning_rate for raw_images, true_labels in load_images_with_true_label( input_dir, train=True): labels = one_hot(np.array(true_labels), 2) img = np.array((raw_images / 255.0) * 2.0 - 1.0) img = np.reshape( img, [batch_size, FLAGS.img_size, FLAGS.img_size, 3]) train_feed_dict = { train_img: img, train_label: labels, learning_rate: epoch_learning_rate } sess.run(train_step, feed_dict=train_feed_dict) batch_loss = cost.eval(feed_dict=train_feed_dict) batch_acc = accuracy.eval(feed_dict=train_feed_dict) train_acc.append(batch_acc) train_loss.append(batch_loss) count += 1 if count % 100 == 0: print('acc: ', np.mean(np.array(train_acc)), ' loss: ', np.mean(np.array(train_loss))) saver.save(sess=sess, save_path='./train_model/%s_%s.ckpt' % (model_type, time)) for raw_images, true_labels in load_images_with_true_label( output_dir, train=False): labels = one_hot(np.array(true_labels), 2) img = np.array((raw_images / 255.0) * 2.0 - 1.0) img = np.reshape( img, [batch_size, FLAGS.img_size, FLAGS.img_size, 3]) train_feed_dict = { train_img: img, train_label: labels, learning_rate: epoch_learning_rate } batch_acc = sess.run([accuracy], feed_dict=train_feed_dict) val_acc.append(batch_acc) count += 1 print('val_acc: ', np.mean(np.array(val_acc)))
def __init__(self, X1, X2, num_classes, skipcon_attn=False, stop_gradient_sim=False, logit_concat_sim=False): def get_nasnet(inputs, num_classes, reuse=True): with tf.variable_scope('Nasnet_block', [inputs], reuse=tf.AUTO_REUSE) as sc: logits, end_points = nasnet.build_nasnet_large( inputs, num_classes) return logits, end_points with slim.arg_scope(nasnet.nasnet_large_arg_scope()): logits_1, end_points_1 = get_nasnet(X1, num_classes) features_1 = end_points_1['global_pool'] with slim.arg_scope(nasnet.nasnet_large_arg_scope()): logits_2, end_points_2 = get_nasnet(X2, num_classes) features_2 = end_points_2['global_pool'] if skipcon_attn: features_1 += tf.reduce_sum(features_1, [1, 2]) features_2 += tf.reduce_sum(features_2, [1, 2]) logits_1_support = logits_1 logits_2_support = logits_2 if stop_gradient_sim: features_1 = tf.stop_gradient(features_1) features_2 = tf.stop_gradient(features_2) logits_1_support = tf.stop_gradient(logits_1_support) logits_2_support = tf.stop_gradient(logits_2_support) def dual_vector_fc(feat_x, reuse=None): with tf.variable_scope("dual_vector_fc_v1") as scope: v = fully_connected(feat_x, 1024, activation_fn=tf.nn.relu, scope=scope, reuse=reuse) with tf.variable_scope("dual_vector_fc_v2") as scope: v = fully_connected(v, 512, activation_fn=None, scope=scope, reuse=reuse) return v if logit_concat_sim: feat_attn_1 = tf.concat([features_1, logits_1_support], axis=-1) feat_attn_2 = tf.concat([features_2, logits_2_support], axis=-1) # pass through 2 dense layer (feature -> 1024 -> 512 > output) feat_attn_1 = dual_vector_fc(features_1) feat_attn_2 = dual_vector_fc(features_2, reuse=True) # normalize features normalize_a = tf.nn.l2_normalize(feat_attn_1, axis=1) normalize_b = tf.nn.l2_normalize(feat_attn_2, axis=1) # calculate similarity of features using cosine similarity sim_12 = tf.reduce_sum(tf.multiply(normalize_a, normalize_b), axis=1) sim_12 = (sim_12 - 0.5) * 32 # to approach 1 self.feature_vector = tf.add(feat_attn_1, 0, name="feature_vector") self.logits_1 = tf.add(logits_1, 0, name="logit_1") self.logits_2 = logits_2 self.feat_attn_1 = feat_attn_1 self.feat_attn_2 = feat_attn_2 self.feat_map = tf.add(features_1, 0, name="feature_map") self.similarity = tf.expand_dims(sim_12, axis=-1)
with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope()): logits, _ = inception_resnet_v2.inception_resnet_v2(processed_images, num_classes=1001, is_training=False) probabilities = tf.nn.softmax(logits) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'inception_resnet_v2.ckpt'), slim.get_model_variables('InceptionResnetV2')) elif model=='nasnet_large': checkpoints_dir = 'C:/Users/cdcd/Downloads/imageClass/nasnet-a_large' image_size = nasnet.build_nasnet_large.default_image_size processed_image = inception_preprocessing.preprocess_image(image, image_size, image_size, is_training=False) processed_images = tf.expand_dims(processed_image, 0) # Create the model, use the default arg scope to configure the batch norm parameters. with slim.arg_scope(nasnet.nasnet_large_arg_scope()): logits, _ = nasnet.build_nasnet_large(processed_images, num_classes=1001, is_training=False) probabilities = tf.nn.softmax(logits) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'model.ckpt'), slim.get_model_variables('')) elif model=='resnet_v2_50': checkpoints_dir = 'C:/Users/cdcd/Downloads/imageClass/resnet_v2_50' image_size = resnet_v2.resnet_v2_50.default_image_size processed_image = inception_preprocessing.preprocess_image(image, image_size, image_size, is_training=False) processed_images = tf.expand_dims(processed_image, 0) # Create the model, use the default arg scope to configure the batch norm parameters. with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits, _ = resnet_v2.resnet_v2_50(processed_images, num_classes=1001, is_training=False)