def predict(self, preprocessed_inputs): """Predict prediction tensors from inputs tensor. Outputs of this function can be passed to loss or postprocess functions. Args: preprocessed_inputs: A float32 tensor with shape [batch_size, height, width, num_channels] representing a batch of images. Returns: prediction_dict: A dictionary holding prediction tensors to be passed to the Loss or Postprocess functions. """ with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, endpoints = resnet_v1.resnet_v1_50(preprocessed_inputs, num_classes=None, is_training=True) print(resnet_v1.resnet_v1_50) net = tf.squeeze(net, axis=[1, 2]) print(net) logits = slim.fully_connected(net, num_outputs=self.num_classes, activation_fn=None, scope='Predict') prediction_dict = {'logits': logits} return prediction_dict
def res_fcn_32s(inputs, num_classes, is_training): with tf.variable_scope('res_fcn_32s'): # Use the structure of res_v1_50 classification network with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_50(inputs, num_classes, is_training=is_training, global_pool=False, output_stride=32) # Deconvolutional layers to recover the size of input image # Padding is 'SAME' for conv layers thus conv layers do not change the size # There are 5 max-pool layers with size reduced by half # Totally size reduced by scale of 2^5 = 32 times # That's also the reason why this model is called fcn_32s # Use bilinear interpolation for upsampling upsample_filter = upsampling.bilinear_upsample_weights(32, num_classes) upsample_filter_tensor = tf.constant(upsample_filter) shape = tf.shape(net) output = tf.nn.conv2d_transpose(net, upsample_filter_tensor, output_shape = tf.stack([shape[0], shape[1] * 32, shape[2] * 32, shape[3]]), strides=[1, 32, 32, 1]) variables = slim.get_variables('res_fcn_32s') # Extract variables that are the same as original vgg-16, they could be intialized # with pre-trained vgg-16 network res_variables = {} for variable in variables: res_variables[variable.name[12:-2]] = variable return output, res_variables
def resnet_tensorboard(): x_input = tf.placeholder(dtype=tf.float32, shape=(None, image_size, image_size, 3)) arg_scope = resnet_utils.resnet_arg_scope() with slim.arg_scope(arg_scope): logits_50, end_points_50 = resnet_v1.resnet_v1_50(x_input, num_classes=1000, is_training=False, global_pool=True, output_stride=None, spatial_squeeze=True, store_non_strided_activations=False, reuse=False, scope='resnet_v1_50') logits_101, end_points_101 = resnet_v1.resnet_v1_101(x_input, num_classes=1000, is_training=False, global_pool=True, output_stride=None, spatial_squeeze=True, store_non_strided_activations=False, reuse=False, scope='resnet_v1_101') config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config= config) as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) summary_writer = tf.summary.FileWriter('/Users/alexwang/data/resnet_summary', graph=sess.graph) summary_writer.close()
def main(): ckpt_path = './resnet_v1_50.ckpt' X = tf.placeholder(tf.float32, shape=[None, 96, 96, 3], name='input') with slim.arg_scope(resnet_arg_scope()): logits, end_points = resnet_v1_50(X, num_classes=1000, is_training=False) final_layer_to_load = end_points['resnet_v1_50/block4'] saver = tf.train.Saver() with tf.Session() as sess: saver.restore(sess, ckpt_path) frozen_graph_def = convert_variables_to_constants( sess, sess.graph_def, output_node_names=[final_layer_to_load.name.split(':')[0]]) frozen_graph = tf.Graph() with frozen_graph.as_default(): tf.import_graph_def(frozen_graph_def, name='') sess = tf.Session(graph=frozen_graph) res = sess.run(final_layer_to_load.name, {'input:0': np.ones(shape=[12, 96, 96, 3])}) print("out shape: {}".format(res.shape))
def model(images, name): # ニューラルネットワークを計算グラフで作成する x_image = tf.reshape(tf.cast(images, tf.float32), [-1, 64, 64, 1]) x_image = tf.image.resize_images(x_image, [224, 224]) x_image = tf.image.grayscale_to_rgb(x_image) #net, end_points = vgg.vgg_16(x_image, num_classes=2, is_training=False, dropout_keep_prob=0.5, # spatial_squeeze=True, # scope='vgg_16', # fc_conv_padding='VALID', # global_pool=False) net, end_points = resnet_v1.resnet_v1_50( x_image, num_classes=2, is_training=False, global_pool=True, output_stride=None, spatial_squeeze=True, store_non_strided_activations=False, reuse=None, scope='resnet_v1_50') ## 形状変更 #const1 = tf.constant(255, tf.float32) #reshape_image = tf.reshape(tf.cast(images, tf.float32), [-1, 64, 64, 1]) #x_image = tf.divide(reshape_image, const1) # ## 第2層 (畳み込み層) #W_conv1 = weight_variable([5, 5, 1, 32]) #b_conv1 = bias_variable([32]) #y_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) # ## 第3層 (プーリング層) #y_pool1 = max_pool_2x2(y_conv1) # ## 第4層 (畳み込み層) #W_conv2 = weight_variable([5, 5, 32, 64]) #b_conv2 = bias_variable([64]) #y_conv2 = tf.nn.relu(conv2d(y_pool1, W_conv2) + b_conv2) # ## 第5層 (プーリング層) #y_pool2 = max_pool_2x2(y_conv2) # ## 形状変更 #y_pool2_flat = tf.reshape(y_pool2, [-1, 32 * 32 * 64]) # ## 第6層 (全結合層) #W_fc1 = weight_variable([32 * 32 * 64, 1024]) #b_fc1 = bias_variable([1024]) #y_fc1 = tf.nn.relu(tf.matmul(y_pool2_flat, W_fc1) + b_fc1) # ## 第7層 (全結合層) #W_fc2 = weight_variable([1024, 2]) #b_fc2 = bias_variable([2]) #y = tf.matmul(y_fc1, W_fc2) + b_fc2 y = end_points["resnet_v1_50/spatial_squeeze"] soft = tf.nn.softmax(y) return name, soft
def endpoints(image, is_training): if image.get_shape().ndims != 4: raise ValueError('Input must be of size [batch, height, width, 3]') image = image - tf.constant(_RGB_MEAN, dtype=tf.float32, shape=(1,1,1,3)) with tf.contrib.slim.arg_scope(resnet_arg_scope(batch_norm_decay=0.9, weight_decay=0.0)): _, endpoints = resnet_v1_50(image, num_classes=None, is_training=is_training, global_pool=True) endpoints['model_output'] = endpoints['global_pool'] = tf.reduce_mean( endpoints['resnet_v1_50/block4'], [1, 2], name='pool5') return endpoints, 'resnet_v1_50'
def predict(self, preprocessed_inputs): with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, endpoints = resnet_v1.resnet_v1_50( preprocessed_inputs, num_classes=None, is_training=self._is_training) net = tf.squeeze(net, axis=[1, 2]) logits = slim.fully_connected(net, num_outputs=self.num_classes, activation_fn=None, scope='Predict') prediction_dict = {'logits': logits} return prediction_dict
def build_FPN(images, config, is_training, backbone='resnet50'): # images: [batch, h, w, channels] # Return: pyramid_feature Dict{P2, P3, P4, P5} of feature maps from different level of the # feature pyramid. Each is [batch, height, width, channels] pyramid = {} # build backbone network with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=1e-5)): if backbone == "resnet50": logits, end_points = resnet_v1.resnet_v1_50( images, is_training=is_training, scope='resnet_v1_50') pyramid['C2'] = end_points[ 'resnet_v1_50/block1/unit_2/bottleneck_v1'] pyramid['C3'] = end_points[ 'resnet_v1_50/block2/unit_3/bottleneck_v1'] pyramid['C4'] = end_points[ 'resnet_v1_50/block3/unit_5/bottleneck_v1'] pyramid['C5'] = end_points[ 'resnet_v1_50/block4/unit_3/bottleneck_v1'] elif backbone == "resnet101": logits, end_points = resnet_v1.resnet_v1_101( images, is_training=is_training, scope='resnet_v1_101') pyramid['C2'] = end_points[ 'resnet_v1_101/block1/unit_2/bottleneck_v1'] pyramid['C3'] = end_points[ 'resnet_v1_101/block2/unit_3/bottleneck_v1'] pyramid['C4'] = end_points[ 'resnet_v1_101/block3/unit_22/bottleneck_v1'] pyramid['C5'] = end_points[ 'resnet_v1_101/block4/unit_3/bottleneck_v1'] else: print("Unkown backbone : ", backbone) # build FPN pyramid_feature = {} arg_scope = _extra_conv_arg_scope_with_bn() with tf.variable_scope('FPN'): with slim.arg_scope(arg_scope): pyramid_feature['P5'] = slim.conv2d(pyramid['C5'], config.TOP_DOWN_PYRAMID_SIZE, 1) for i in range(4, 1, -1): upshape = tf.shape(pyramid['C%d' % i]) u = tf.image.resize_bilinear(pyramid_feature['P%d' % (i+1)], \ size = (upshape[1], upshape[2])) c = slim.conv2d(pyramid['C%d' % i], config.TOP_DOWN_PYRAMID_SIZE, 1) s = tf.add(c, u) pyramid_feature['P%d' % i] = slim.conv2d( s, config.TOP_DOWN_PYRAMID_SIZE, 3) return pyramid_feature
def resnet_forward(self, x, layer, scope): x = 255.0 * (0.5 * (x + 1.0)) # subtract means mean = tf.constant([123.68, 116.779, 103.939], dtype=tf.float32, shape=[1, 1, 1, 3], name='img_mean') # RGB means from VGG paper x = x - mean # send through resnet with slim.arg_scope(resnet_arg_scope()): _, layers = resnet_v1_50(x, num_classes=1000, is_training=False, reuse=self.reuse_resnet) self.reuse_resnet = True return layers['resnet_v1_50/' + layer]
def net_feats(ims, num_classes=None, is_training=True, reuse=False, scope=''): if net_type == 'resnet': feats = resnet_v1.resnet_v1_50(ims, num_classes=num_classes, reuse=reuse, is_training=is_training, scope='%s_resnet_v1_50' % scope)[0] feats = slim.flatten(feats) elif net_type == 'vgg': feats = vgg.vgg_16(ims, num_classes=None, scope='%s_vgg_16' % scope, reuse=reuse, is_training=is_training)[0] print 'CNN feature shape:', shape(feats) return feats
def tensorflow_resnet_model(self, images): image_shape = [224, 224, 3] inputs = images = tf.placeholder(dtype=tf.float32, shape=[self.config.BATCH_SIZE] + image_shape) with tf.contrib.slim.arg_scope(resnet_utils.resnet_arg_scope()): logits, endPoints = resnet_v1_50(inputs, num_classes=1000) probs = tf.nn.softmax(endPoints['predictions']) saver = tf.train.Saver() sess = tf.Session() saver.restore(sess, "./resnet_v1_50.ckpt") img1 = imread('./laska.png', mode='RGB') img1 = imresize(img1, (224, 224)) prob = sess.run(probs, feed_dict={inputs: [img1]})[0] print(len(prob)) preds = (np.argsort(prob)[::-1]) for p in preds: print(class_names[p], prob[p])
fid_test = h5py.File(path_test, 'r') print("-" * 60) print("test part") s1_test = fid_test['sen1'] print(s1_test.shape) s2_test = fid_test['sen2'] print(s2_test.shape) class_num = 17 # 声明占位符 x = tf.placeholder(tf.float32, [None, 32, 32, 10], name="x") # logits = model.model(x, False, 0.0, class_num, 0.0) # logits, _ = inception_resnet_v2.inception_resnet_v2(x, class_num, is_training=False) net, _ = resnet_v1.resnet_v1_50(x, is_training=False) net = tf.squeeze(net, axis=[1, 2]) # 去除第一、第二个维度 logits = slim.fully_connected(net, num_outputs=class_num, activation_fn=None, scope='predict') # logits, _ = vgg.vgg_16(x, class_num, False) # with slim.arg_scope(pre_trained_resnet.resnet_arg_scope(0.0)): # net, _ = pre_trained_resnet.resnet_v1_50(x) # net = tf.squeeze(net, axis=[1, 2]) # 去除第一、第二个维度 # logits = slim.fully_connected(net, num_outputs=class_num, # activation_fn=None, scope='predict') logits = tf.nn.softmax(logits) logits = tf.clip_by_value(logits, 1e-10, 1) result = tf.argmax(logits, 1)
preprocessing_function=preprocess_vgg ) vx = np.concatenate((vx1,vx2), axis=0) vy = np.concatenate((vy1,vy2), axis=0) print(vx.shape,vy.shape) # グラフの初期化Variable v1/weights already exists,reuse=True or reuse=tf.AUTO_REUS 対策 tf.reset_default_graph() # モデル定義 # https://gist.github.com/omoindrot/dedc857cdc0e680dfb1be99762990c9c/ images = tf.placeholder(tf.float32, (None, 224, 224, 3), name='images') labels = tf.placeholder(tf.int32, (None, len(classes)), name='labels') is_training = tf.placeholder(tf.bool) with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits, end_points = resnet_v1.resnet_v1_50(images, is_training=is_training, num_classes=len(classes)) # モデルの演算の種類を列挙する # A = set() # for item in sess.graph.get_operations(): # A.add(item.type) # 全体をrestoreするときに使うやつ restorer = tf.train.Saver() # 1. 既存のweightをロードする関数 logitの全結合のW,bは使わない(そもそもサイズが合わない) # weights/resnet_v1_50.ckptは http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz からDL # https://github.com/NVIDIA-Jetson/tf_to_trt_image_classification/blob/master/scripts/download_models.sh を参照 variables_to_restore = slim.get_variables_to_restore(exclude=['resnet_v1_50/logits/weights', 'resnet_v1_50/logits/biases']) init_fn = slim.assign_from_checkpoint_fn('../weights/resnet_v1_50.ckpt', variables_to_restore, ignore_missing_vars=True)
print(IN, PB, OUT) else: exit(0) # 初期設定 tf.reset_default_graph() tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True sess = tf.Session(config=tf_config) # モデルの構築 images = tf.placeholder(tf.float32, (None, 224, 224, 3), name='images') labels = tf.placeholder(tf.int32, (None, 1, 1, 8), name='labels') with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits, end_points = resnet_v1.resnet_v1_50(images, is_training=False, num_classes=8) # チェックポイントの読み込み saver = tf.train.Saver() saver.restore(save_path=IN, sess=sess) # freeze graph output_nodes = ['resnet_v1_50/SpatialSqueeze'] frozen_graph = tf.graph_util.convert_variables_to_constants( sess, sess.graph.as_graph_def(), output_node_names=output_nodes) from convert_relu6 import convertRelu6 frozen_graph = convertRelu6(frozen_graph) # pbとして保存 with open(PB, 'wb') as f:
classes = np.array(list_) print len(classes) train_dataset, val_dataset, test_dataset = create_datasets(classes[:, 1], num_samples=NUM_IMAGES, val_fraction=0.05, test_fraction=0.05) num_classes = len(classes) # should be 1000 print num_classes with tf.device('/gpu:1'): x = tf.placeholder(tf.float32, shape=[None, input_data.IMAGE_WIDTH * input_data.IMAGE_HEIGHT, 3]) y_ = tf.placeholder(tf.float32, shape=[None, num_classes]) keep_prob = tf.placeholder(tf.float32) x_reshaped = tf.reshape(x, [-1, input_data.IMAGE_WIDTH, input_data.IMAGE_HEIGHT, 3]) y_logit = resnet_v1_50(x_reshaped,1000) print ("y_prediction's shape is:") print tf.shape(y_logit) print ("label y's shape is") print tf.shape(y_) # # Training cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y_logit, labels=y_)) train_step = tf.train.AdamOptimizer(0.01).minimize(cross_entropy) print tf.shape(tf.argmax(y_logit, 1)) print tf.shape(tf.argmax(y_, 1)) correct_prediction = tf.equal(tf.argmax(y_logit, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
(num_val_records, num_classes)) # iterator iterator = tf.data.Iterator.from_structure( batched_train_dataset.output_types, batched_train_dataset.output_shapes) images, labels = iterator.get_next() print("image_shape:", images.shape) print("labels:", labels.shape) train_init_op = iterator.make_initializer(batched_train_dataset) val_init_op = iterator.make_initializer(batched_val_dataset) with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits, _ = resnet_v1.resnet_v1_50(images, num_classes=num_classes, is_training=True) logits = tf.squeeze(logits) variables_to_restore = tf.contrib.framework.get_variables_to_restore( exclude=["resnet_v1_50/logits", "resnet_v1_50/AuxLogits"]) init_fn = tf.contrib.framework.assign_from_checkpoint_fn( "./resnet_v1_50/resnet_v1_50.ckpt", variables_to_restore) logits_variables = tf.contrib.framework.get_variables( "resnet_v1_50/logits") + tf.contrib.framework.get_variables( "resnet_v1_50/AuxLogits") logits_init = tf.variables_initializer(logits_variables) # Loss function: predictions = tf.to_int32(tf.argmax(logits, 1))
import sys tf.reset_default_graph() model = sys.argv[1] input_image = tf.placeholder(tf.float32, shape=(None, 224, 224, 3), name='input_placeholder') if model == 'se': outputs = SE_ResNet(input_image, 1000, is_training=False, data_format='channels_last') elif model == 'res': outputs = resnet_v1_50(input_image, 1000, is_training=False, scope='resnet_v1_50') elif model == 'scale': outputs = scale_resnet_v1_50(input_image, 1000, is_training=False, scope='resnet_v1_50') elif model == 'next': outputs = SE_ResNeXt(input_image, 1000, is_training=False, data_format='channels_last') saver = tf.train.Saver() with tf.Session() as sess:
def export(): # Create index->synset mapping synsets = [] with open(SYNSET_FILE) as f: synsets = f.read().splitlines() # Create synset->metadata mapping texts = {} with open(METADATA_FILE) as f: for line in f.read().splitlines(): parts = line.split('\t') assert len(parts) == 2 texts[parts[0]] = parts[1] with tf.Graph().as_default(): # Build inference model. # Please refer to Tensorflow inception model for details. # Input transformation. # serialized_tf_example = tf.placeholder(tf.string, name='tf_example') # feature_configs = { # 'image/encoded': tf.FixedLenFeature( # shape=[], dtype=tf.string), # } # tf_example = tf.parse_example(serialized_tf_example, feature_configs) # jpegs = tf_example['image/encoded'] serialized_tf_example = tf.placeholder(tf.string, name='tf_example') feature_configs = { 'x': tf.FixedLenFeature(shape=[], dtype=tf.float32), } tf_example = tf.parse_example(serialized_tf_example, feature_configs) # reshape the input image to its original dimension tf_example['x'] = tf.reshape(tf_example['x'], (1, 224, 224, 3)) input_tensor = tf.identity( tf_example['x'], name='x') # use tf.identity() to assign name # images = tf.map_fn(preprocess_image, jpegs, dtype=tf.float32) # Run inference. with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_50(input_tensor, 1000, is_training=False) # logits, _ = inception_model.inference(images, NUM_CLASSES + 1) # Transform output to topK result. values, indices = tf.nn.top_k(net, NUM_TOP_CLASSES) # Create a constant string Tensor where the i'th element is # the human readable class description for the i'th index. # Note that the 0th index is an unused background class # (see inception model definition code). class_descriptions = ['unused background'] for s in synsets: class_descriptions.append(texts[s]) class_tensor = tf.constant(class_descriptions) table = tf.contrib.lookup.index_to_string_table_from_tensor( class_tensor) classes = table.lookup(tf.to_int64(indices)) # Restore variables from training checkpoint. # variable_averages = tf.train.ExponentialMovingAverage( # inception_model.MOVING_AVERAGE_DECAY) # variables_to_restore = variable_averages.variables_to_restore() # saver = tf.train.Saver(variables_to_restore) saver = tf.train.Saver() with tf.Session() as sess: # Restore variables from training checkpoints. # ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) # if ckpt and ckpt.model_checkpoint_path: # saver.restore(sess, ckpt.model_checkpoint_path) # Assuming model_checkpoint_path looks something like: # /my-favorite-path/imagenet_train/model.ckpt-0, # extract global_step from it. # global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] # print('Successfully loaded model from %s at step=%s.' % # (ckpt.model_checkpoint_path, global_step)) # else: # print('No checkpoint file found at %s' % FLAGS.checkpoint_dir) # return # Export inference model. saver.restore( sess, os.path.join(pre_trained_model_dir, "resnet_v1_50.ckpt")) print("Model", model_name, "restored.") output_path = os.path.join( tf.compat.as_bytes(FLAGS.output_dir), tf.compat.as_bytes(str(FLAGS.model_version))) print('Exporting trained model to', output_path) builder = tf.saved_model.builder.SavedModelBuilder(output_path) # Build the signature_def_map. classify_inputs_tensor_info = tf.saved_model.utils.build_tensor_info( serialized_tf_example) classes_output_tensor_info = tf.saved_model.utils.build_tensor_info( classes) scores_output_tensor_info = tf.saved_model.utils.build_tensor_info( values) classification_signature = ( tf.saved_model.signature_def_utils.build_signature_def( inputs={ tf.saved_model.signature_constants.CLASSIFY_INPUTS: classify_inputs_tensor_info }, outputs={ tf.saved_model.signature_constants.CLASSIFY_OUTPUT_CLASSES: classes_output_tensor_info, tf.saved_model.signature_constants.CLASSIFY_OUTPUT_SCORES: scores_output_tensor_info }, method_name=tf.saved_model.signature_constants. CLASSIFY_METHOD_NAME)) predict_inputs_tensor_info = tf.saved_model.utils.build_tensor_info( input_tensor) prediction_signature = ( tf.saved_model.signature_def_utils.build_signature_def( inputs={'images': predict_inputs_tensor_info}, outputs={ 'classes': classes_output_tensor_info, 'scores': scores_output_tensor_info }, method_name=tf.saved_model.signature_constants. PREDICT_METHOD_NAME)) builder.add_meta_graph_and_variables( sess, [tf.saved_model.tag_constants.SERVING], signature_def_map={ 'predict_images': prediction_signature, tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: classification_signature, }, main_op=tf.tables_initializer(), strip_default_attrs=True) builder.save() print('Successfully exported model to %s' % FLAGS.output_dir)
def resnet(images, OUTPUT_CLASS): logits, _ = resnet_v1_50(images, num_classes=OUTPUT_CLASS) return logits
import tensorflow as tf import resnet_v1 from tensorflow.examples.tutorials.mnist import input_data from read_input import imgnet imgnet_reader = imgnet() imgnet_reader.read_data_sets("../../big_data/Imagenet_dataset/") x = tf.placeholder("float", shape=[None, 224, 224, 3]) y_ = tf.placeholder("float", shape=[None, 10]) pred = resnet_v1.resnet_v1_50(x) cross_entropy = -tf.reduce_sum(y_ * tf.log(pred)) #update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) #if mode == 'bn': # if update_ops: # updates = tf.group(*update_ops) # with tf.control_dependencies([updates]): # train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) #elif mode == 'ln' or mode == 'cln': # train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) sess.run(tf.initialize_all_variables()) new_cn_val = -np.inf for i in range(500): batch = imgnet_reader.next_batch(50) if i % 100 == 0: cn_val = accuracy.eval(feed_dict={x: batch[0], y_: batch[1]}) print("step %d, training accuracy %g" % (i, cn_val))
def __feature_extract(self, input_tensor): _, end_points = resnet_v1.resnet_v1_50(inputs=input_tensor, is_training=self.__training, scope='resnet_v1_50') return [end_points['pool5'], end_points['pool4'], end_points['pool3'], end_points['pool2']]
for item in y_score[i]: tmp_str += (str(item) + ' ') g.write(tmp_str + '\n') with tf.variable_scope('BN_switch'): is_training = tf.placeholder(tf.bool) with tf.variable_scope('Input'): with tf.variable_scope('Input_x'): x = tf.placeholder(tf.float32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 1]) with tf.variable_scope('Input_y'): y_ = tf.placeholder(tf.int32, shape=[None, NUM_LABELS]) y_conv, end_point = resnet_v1_50(inputs=x, num_classes=NUM_LABELS, is_training=is_training) with tf.name_scope('Loss'): slim.losses.softmax_cross_entropy(logits=y_conv, onehot_labels=y_) total_loss = slim.losses.get_total_loss() tf.summary.scalar('loss', total_loss) with tf.name_scope('Train_step'): train_step = tf.train.AdamOptimizer(lr).minimize(total_loss) with tf.name_scope('Accuracy'): y_conv_softmax = tf.nn.softmax(y_conv) distribution = [tf.arg_max(y_, 1), tf.arg_max(y_conv_softmax, 1)] correct_prediction = tf.equal(distribution[0], distribution[1]) accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
def build_pspnet(inputs, label_size, num_classes, preset_model='PSPNet-Res50', pooling_type="MAX", weight_decay=1e-5, upscaling_method="bilinear", is_training=True, pretrained_dir="models"): """ Builds the PSPNet model. Arguments: inputs: The input tensor label_size: Size of the final label tensor. We need to know this for proper upscaling preset_model: Which model you want to use. Select which ResNet model to use for feature extraction num_classes: Number of classes pooling_type: Max or Average pooling Returns: PSPNet model """ inputs = mean_image_subtraction(inputs) if preset_model == 'PSPNet-Res50': with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_50( inputs, is_training=is_training, scope='resnet_v1_50') # PSPNet requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn( os.path.join(pretrained_dir, 'resnet_v1_50.ckpt'), slim.get_model_variables('resnet_v1_50')) elif preset_model == 'PSPNet-Res101': with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_101( inputs, is_training=is_training, scope='resnet_v1_101') # PSPNet requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn( os.path.join(pretrained_dir, 'resnet_v1_101.ckpt'), slim.get_model_variables('resnet_v1_101')) elif preset_model == 'PSPNet-Res152': with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_152( inputs, is_training=is_training, scope='resnet_v1_152') # PSPNet requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn( os.path.join(pretrained_dir, 'resnet_v1_152.ckpt'), slim.get_model_variables('resnet_v1_152')) else: raise ValueError( "Unsupported ResNet model '%s'. This function only supports ResNet 50, ResNet 101, and ResNet 152" % (preset_model)) f = [ end_points['pool5'], end_points['pool4'], end_points['pool3'], end_points['pool2'] ] feature_map_shape = [int(x / 8.0) for x in label_size] psp = PyramidPoolingModule(f[2], feature_map_shape=feature_map_shape, pooling_type=pooling_type) net = slim.conv2d(psp, 512, [3, 3], activation_fn=None) net = slim.batch_norm(net) net = tf.nn.relu(net) if upscaling_method.lower() == "conv": net = ConvUpscaleBlock(net, 256, kernel_size=[3, 3], scale=2) net = ConvBlock(net, 256) net = ConvUpscaleBlock(net, 128, kernel_size=[3, 3], scale=2) net = ConvBlock(net, 128) net = ConvUpscaleBlock(net, 64, kernel_size=[3, 3], scale=2) net = ConvBlock(net, 64) elif upscaling_method.lower() == "bilinear": net = Upsampling(net, label_size) net = slim.dropout(net, keep_prob=(0.9)) net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, scope='logits') return net, init_fn
def model(images, text_scale=512, weight_decay=1e-5, is_training=True): """ define the model, we use slim's implemention of resnet """ images = mean_image_subtraction(images) with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_50(images, is_training=is_training, scope='resnet_v1_50') with tf.variable_scope('feature_fusion', values=[end_points.values]): batch_norm_params = { 'decay': 0.997, 'epsilon': 1e-5, 'scale': True, 'is_training': is_training } with slim.arg_scope( [slim.conv2d], activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, weights_regularizer=slim.l2_regularizer(weight_decay)): f = [ end_points['pool5'], end_points['pool4'], end_points['pool3'], end_points['pool2'] ] for i in range(4): print('Shape of f_{} {}'.format(i, f[i].shape)) g = [None, None, None, None] h = [None, None, None, None] num_outputs = [None, 128, 64, 32] for i in range(4): if i == 0: h[i] = f[i] else: c1_1 = slim.conv2d(tf.concat([g[i - 1], f[i]], axis=-1), num_outputs[i], 1) h[i] = slim.conv2d(c1_1, num_outputs[i], 3) if i <= 2: g[i] = unpool(h[i]) else: g[i] = slim.conv2d(h[i], num_outputs[i], 3) print('Shape of h_{} {}, g_{} {}'.format( i, h[i].shape, i, g[i].shape)) # here we use a slightly different way for regression part, # we first use a sigmoid to limit the regression range, and also # this is do with the angle map F_score = slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) # 4 channel of axis aligned bbox and 1 channel rotation angle geo_map = slim.conv2d( g[3], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * text_scale angle_map = (slim.conv2d( g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) - 0.5) * np.pi / 2 # angle is between [-45, 45] F_geometry = tf.concat([geo_map, angle_map], axis=-1) return F_score, F_geometry
def build_refinenet(inputs, num_classes, preset_model='RefineNet-Res101', weight_decay=1e-5, is_training=True, upscaling_method="bilinear", pretrained_dir="models"): """ Builds the RefineNet model. Arguments: inputs: The input tensor preset_model: Which model you want to use. Select which ResNet model to use for feature extraction num_classes: Number of classes Returns: RefineNet model """ inputs = mean_image_subtraction(inputs) if preset_model == 'RefineNet-Res50': with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_50( inputs, is_training=is_training, scope='resnet_v1_50') # RefineNet requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn( os.path.join(pretrained_dir, 'resnet_v1_50.ckpt'), slim.get_model_variables('resnet_v1_50')) elif preset_model == 'RefineNet-Res101': with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_101( inputs, is_training=is_training, scope='resnet_v1_101') # RefineNet requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn( os.path.join(pretrained_dir, 'resnet_v1_101.ckpt'), slim.get_model_variables('resnet_v1_101')) elif preset_model == 'RefineNet-Res152': with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_152( inputs, is_training=is_training, scope='resnet_v1_152') # RefineNet requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn( os.path.join(pretrained_dir, 'resnet_v1_152.ckpt'), slim.get_model_variables('resnet_v1_152')) else: raise ValueError( "Unsupported ResNet model '%s'. This function only supports ResNet 101 and ResNet 152" % (preset_model)) f = [ end_points['pool5'], end_points['pool4'], end_points['pool3'], end_points['pool2'] ] g = [None, None, None, None] h = [None, None, None, None] for i in range(4): h[i] = slim.conv2d(f[i], 256, 1) g[0] = RefineBlock(high_inputs=None, low_inputs=h[0]) g[1] = RefineBlock(g[0], h[1]) g[2] = RefineBlock(g[1], h[2]) g[3] = RefineBlock(g[2], h[3]) # g[3]=Upsampling(g[3],scale=4) if upscaling_method.lower() == "conv": net = ConvUpscaleBlock(net, 256, kernel_size=[3, 3], scale=2) net = ConvBlock(net, 256) net = ConvUpscaleBlock(net, 128, kernel_size=[3, 3], scale=2) net = ConvBlock(net, 128) net = ConvUpscaleBlock(net, 64, kernel_size=[3, 3], scale=2) net = ConvBlock(net, 64) elif upscaling_method.lower() == "bilinear": net = Upsampling(net, label_size) net = slim.conv2d(g[3], num_classes, [1, 1], activation_fn=None, scope='logits') return net, init_fn
def _feature_extractor(self, input, mode, scope=None, relu_leakiness=0.1): image = tf.placeholder_with_default(input, (None, 300, 300, 3), 'input_image') pyramid_map = { 'C1': 'FeatureX1/resnet_v1_50/conv1/Relu:0', 'C2': 'FeatureX1/resnet_v1_50/block1/unit_2/bottleneck_v1', 'C3': 'FeatureX1/resnet_v1_50/block2/unit_3/bottleneck_v1', 'C4': 'FeatureX1/resnet_v1_50/block3/unit_5/bottleneck_v1', 'C5': 'FeatureX1/resnet_v1_50/block4/unit_3/bottleneck_v1', } if scope is not None: for key, value in pyramid_map.iteritems(): pyramid_map[key] = scope + "/" + value with tf.variable_scope("FeatureX1"): with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=0.000005)): logits, end_points = resnet_v1.resnet_v1_50( image, 1000, is_training=self.mode == 'train') pyramid = pyramid_network.build_pyramid(pyramid_map, end_points) extra_train_ops = [] py_features = [pyramid['P5']] with tf.variable_scope("FeatureX2"): with tf.variable_scope("pyramid_2"): x = pyramid['P2'] with tf.variable_scope("block_0"): x, extra_train_ops = resnet_utils.residual( x, 256, 64, resnet_utils.stride_arr(2), mode, extra_train_ops, relu_leakiness, activate_before_residual=True) with tf.variable_scope("block_1"): x, extra_train_ops = resnet_utils.residual( x, 64, 64, resnet_utils.stride_arr(2), mode, extra_train_ops, relu_leakiness, activate_before_residual=False) with tf.variable_scope("block_2"): x, extra_train_ops = resnet_utils.residual( x, 64, 64, resnet_utils.stride_arr(2), mode, extra_train_ops, relu_leakiness, activate_before_residual=False) py_features.append(x) with tf.variable_scope("pyramid_3"): x = pyramid['P3'] with tf.variable_scope("block_0"): x, extra_train_ops = resnet_utils.residual( x, 256, 64, resnet_utils.stride_arr(2), mode, extra_train_ops, relu_leakiness, activate_before_residual=True) with tf.variable_scope("block_1"): x, extra_train_ops = resnet_utils.residual( x, 64, 64, resnet_utils.stride_arr(2), mode, extra_train_ops, relu_leakiness, activate_before_residual=False) py_features.append(x) with tf.variable_scope("pyramid_4"): x = pyramid['P4'] with tf.variable_scope("block_0"): x, extra_train_ops = resnet_utils.residual( x, 256, 64, resnet_utils.stride_arr(2), mode, extra_train_ops, relu_leakiness, activate_before_residual=True) py_features.append(x) x = tf.concat(py_features, axis=3, name='concat') with tf.variable_scope("block_0"): x, extra_train_ops = resnet_utils.residual( x, 448, 256, resnet_utils.stride_arr(2), mode, extra_train_ops, relu_leakiness, activate_before_residual=True) with tf.variable_scope("block_1"): x, extra_train_ops = resnet_utils.residual( x, 256, 256, resnet_utils.stride_arr(2), mode, extra_train_ops, relu_leakiness, activate_before_residual=False) global_avg = tf.reduce_mean(x, [1, 2], name='global_avg') feature = tf.nn.l2_normalize(global_avg, 0, name='Feature') return feature, extra_train_ops
def create_graph( self, sess, global_step, training_iters, learning_rate=0.001, decay_rate=0.95, momentum=0.2, ): with sess.graph.as_default(): pyramid_map = self._networks_map[self.backbones] net, end_points = resnet_v1.resnet_v1_50(self.images) p5 = end_points[pyramid_map["C5"]] p4 = end_points[pyramid_map["C4"]] p3 = end_points[pyramid_map["C3"]] p2 = end_points[pyramid_map["C2"]] p1 = end_points[pyramid_map["C1"]] batch_norm_params = { 'decay': 0.997, 'epsilon': 1e-5, 'scale': True, 'updates_collections': tf.GraphKeys.UPDATE_OPS, 'fused': None, # Use fused batch norm if possible. } with tf.variable_scope("ResNet_Unet", [p5, p4, p3, p2, p1]) as sc: with slim.arg_scope( [slim.conv2d], weights_regularizer=slim.l2_regularizer(0.0001), weights_initializer=slim.variance_scaling_initializer( ), activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm if True else None, normalizer_params=batch_norm_params): with slim.arg_scope([slim.batch_norm], **batch_norm_params): "attention" p5_64 = slim.conv2d(p5, 64, [3, 3], stride=1, scope='channel_64_p5_conv3' ) ##channel form 2048 to 64 p5_64 = slim.conv2d(p5_64, 64, [1, 1], stride=1, activation_fn=None, scope='channel_64_p5_conv1') "channel se" p5_cse = tf.reduce_mean(p5, [1, 2], keep_dims=True) p5_cse_conv = slim.conv2d(p5_cse, 64, [1, 1], stride=1, activation_fn=None, scope='cse_p5_conv') p5_cse_sig = tf.sigmoid(p5_cse_conv, name='cse_p5_sig') p5_cse = p5_64 * p5_cse_sig "spatial se" p5_sse = slim.conv2d(p5, 1, [1, 1], stride=1, activation_fn=None, scope='sse_p5_conv') p5_sse_sig = tf.sigmoid(p5_sse, name='cse_p5_sig') p5_sse = p5_64 * p5_sse_sig "shortcut" p5_64_shortcut = slim.conv2d( p5, 64, [1, 1], stride=1, activation_fn=None, scope='channel_64_p5_shortcut') p5_att = p5_cse + p5_sse + p5_64_shortcut '''gau''' p5_gp = tf.reduce_mean(p5_att, [1, 2], keep_dims=True) p5_gp_conv = slim.conv2d(p5_gp, 64, [1, 1], stride=1, activation_fn=None, scope='gau_conv_p5gp') p5_gp_conv_sig = tf.sigmoid(p5_gp_conv, name='gau_p5_sig') p4_conv = slim.conv2d(p4, 64, [3, 3], stride=1, scope='gau_conv_p4') p4_conv = slim.conv2d(p4_conv, 64, [1, 1], stride=1, activation_fn=None, scope='p4_conv1') p4_attention = p4_conv * p5_gp_conv_sig p5_up = tf.image.resize_bilinear( p5_att, [tf.shape(p4)[1], tf.shape(p4)[2]], name='gau_p5_upscale') p4_add = p4_attention + p5_up '''gau''' p4_gp = tf.reduce_mean(p4_add, [1, 2], keep_dims=True) p4_gp_conv = slim.conv2d(p4_gp, 64, [1, 1], stride=1, activation_fn=None, scope='gau_conv_p4gp') p4_gp_conv_sig = tf.sigmoid(p4_gp_conv, name='gau_p4_sig') p3_conv = slim.conv2d(p3, 64, [3, 3], stride=1, scope='gau_conv_p3') p3_conv = slim.conv2d(p3_conv, 64, [1, 1], stride=1, activation_fn=None, scope='p3_conv1') p3_attention = p3_conv * p4_gp_conv_sig p4_up = tf.image.resize_bilinear( p4_add, [tf.shape(p3)[1], tf.shape(p3)[2]], name='gau_p4_upscale') p3_add = p3_attention + p4_up '''gau''' p3_gp = tf.reduce_mean(p3_add, [1, 2], keep_dims=True) p3_gp_conv = slim.conv2d(p3_gp, 64, [1, 1], stride=1, activation_fn=None, scope='gau_conv_p3gp') p3_gp_conv_sig = tf.sigmoid(p3_gp_conv, name='gau_p3_sig') p2_conv = slim.conv2d(p2, 64, [3, 3], stride=1, scope='gau_conv_p2') p2_conv = slim.conv2d(p2_conv, 64, [1, 1], stride=1, activation_fn=None, scope='p2_conv1') p2_attention = p2_conv * p3_gp_conv_sig p3_up = tf.image.resize_bilinear( p3_add, [tf.shape(p2)[1], tf.shape(p2)[2]], name='gau_p3_upscale') p2_add = p2_attention + p3_up '''gau''' p2_gp = tf.reduce_mean(p2_add, [1, 2], keep_dims=True) p2_gp_conv = slim.conv2d(p2_gp, 64, [1, 1], stride=1, activation_fn=None, scope='gau_conv_p2gp') p2_gp_conv_sig = tf.sigmoid(p2_gp_conv, name='gau_p2_sig') p1_conv = slim.conv2d(p1, 64, [3, 3], stride=1, scope='gau_conv_p1') p1_conv = slim.conv2d(p1_conv, 64, [1, 1], stride=1, activation_fn=None, scope='p1_conv1') p1_attention = p1_conv * p2_gp_conv_sig p2_up = tf.image.resize_bilinear( p2_add, [tf.shape(p1)[1], tf.shape(p1)[2]], name='gau_p2_upscale') p1_add = p1_attention + p2_up print("p1 is ", p1_add.get_shape()) outputs_128 = tf.image.resize_bilinear( p1_add, [ tf.shape(self.images)[1], tf.shape(self.images)[2] ], name='gau_p2_upscale') outputs_128 = slim.conv2d(outputs_128, 64, [3, 3], stride=1, scope="output_mask1") outputs2 = slim.conv2d(outputs_128, self.class_num, [3, 3], stride=1, scope="output_mask2", activation_fn=None) self.outputs3 = slim.conv2d(outputs2, self.class_num, [3, 3], stride=1, scope="output_mask3", activation_fn=None) self.output_softmax = self.pixel_wise_softmax(self.outputs3) self.mask = tf.argmax(self.output_softmax, axis=3, name="Mask") oneHot_mask = tf.one_hot(self.mask, self.class_num) self.oneHot_mask_flatten = slim.flatten(oneHot_mask) oneHot_label = tf.one_hot(self.labels, self.class_num) self.labels_flatten = slim.flatten(oneHot_label) self.oneHot_label_reshape = tf.reshape(oneHot_label, [-1, self.class_num]) self.output_softmax_reshape = tf.reshape(self.output_softmax, [-1, self.class_num]) self.d_loss = self.dice_coefficient_loss(self.labels_flatten, self.oneHot_mask_flatten) #self.loss = -tf.reduce_mean(self.labels_flatten * tf.log(tf.clip_by_value(self.outputs_flatten, 1e-10, 1.0))) self.loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( labels=self.oneHot_label_reshape, logits=self.output_softmax_reshape)) self.t_loss = self.loss + self.d_loss self.learning_rate_node = tf.train.exponential_decay( learning_rate=learning_rate, global_step=global_step, decay_steps=training_iters * 4, decay_rate=decay_rate, staircase=True) # self.optimizer = tf.train.MomentumOptimizer(learning_rate=self.learning_rate_node, momentum=momentum, # ).minimize(self.loss, global_step=global_step) self.optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(self.t_loss, global_step=global_step)
def build_gcn(inputs, num_classes, preset_model='GCN-Res101', weight_decay=1e-5, is_training=True, upscaling_method="bilinear", pretrained_dir="models"): """ Builds the GCN model. Arguments: inputs: The input tensor preset_model: Which model you want to use. Select which ResNet model to use for feature extraction num_classes: Number of classes Returns: GCN model """ inputs = mean_image_subtraction(inputs) if preset_model == 'GCN-Res50': with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_50(inputs, is_training=is_training, scope='resnet_v1_50') # GCN requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn(os.path.join(pretrained_dir, 'resnet_v1_50.ckpt'), slim.get_model_variables('resnet_v1_50')) elif preset_model == 'GCN-Res101': with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_101(inputs, is_training=is_training, scope='resnet_v1_101') # GCN requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn(os.path.join(pretrained_dir, 'resnet_v1_101.ckpt'), slim.get_model_variables('resnet_v1_101')) elif preset_model == 'GCN-Res152': with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_152(inputs, is_training=is_training, scope='resnet_v1_152') # GCN requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn(os.path.join(pretrained_dir, 'resnet_v1_152.ckpt'), slim.get_model_variables('resnet_v1_152')) else: raise ValueError("Unsupported ResNet model '%s'. This function only supports ResNet 101 and ResNet 152" % (preset_model)) res = [end_points['pool5'], end_points['pool4'], end_points['pool3'], end_points['pool2']] down_5 = GlobalConvBlock(res[0], n_filters=21, size=3) down_5 = BoundaryRefinementBlock(down_5, n_filters=21, kernel_size=[3, 3]) down_5 = ConvUpscaleBlock(down_5, n_filters=21, kernel_size=[3, 3], scale=2) down_4 = GlobalConvBlock(res[1], n_filters=21, size=3) down_4 = BoundaryRefinementBlock(down_4, n_filters=21, kernel_size=[3, 3]) down_4 = tf.add(down_4, down_5) down_4 = BoundaryRefinementBlock(down_4, n_filters=21, kernel_size=[3, 3]) down_4 = ConvUpscaleBlock(down_4, n_filters=21, kernel_size=[3, 3], scale=2) down_3 = GlobalConvBlock(res[2], n_filters=21, size=3) down_3 = BoundaryRefinementBlock(down_3, n_filters=21, kernel_size=[3, 3]) down_3 = tf.add(down_3, down_4) down_3 = BoundaryRefinementBlock(down_3, n_filters=21, kernel_size=[3, 3]) down_3 = ConvUpscaleBlock(down_3, n_filters=21, kernel_size=[3, 3], scale=2) down_2 = GlobalConvBlock(res[3], n_filters=21, size=3) down_2 = BoundaryRefinementBlock(down_2, n_filters=21, kernel_size=[3, 3]) down_2 = tf.add(down_2, down_3) down_2 = BoundaryRefinementBlock(down_2, n_filters=21, kernel_size=[3, 3]) down_2 = ConvUpscaleBlock(down_2, n_filters=21, kernel_size=[3, 3], scale=2) net = BoundaryRefinementBlock(down_2, n_filters=21, kernel_size=[3, 3]) net = ConvUpscaleBlock(net, n_filters=21, kernel_size=[3, 3], scale=2) net = BoundaryRefinementBlock(net, n_filters=21, kernel_size=[3, 3]) net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, scope='logits') return net, init_fn
def model(images, labels): # ニューラルネットワークを計算グラフで作成する # 形状変更 #const1 = tf.constant(255, tf.float32) x_image = tf.reshape(tf.cast(images, tf.float32), [-1, 64, 64, 1]) #x_image = tf.divide(reshape_image, const1) x_image = tf.image.resize_images(x_image, [250, 250]) x_image = tf.image.grayscale_to_rgb(x_image) x_image = tf.map_fn(lambda img: tf.random_crop(img, [224, 224, 1]), x_image) x_image = tf.map_fn(tf.image.random_flip_left_right, x_image) #net, end_points = vgg.vgg_16(x_image, num_classes=2, is_training=True, dropout_keep_prob=0.5, # spatial_squeeze=True, # scope='vgg_16', # fc_conv_padding='VALID', # global_pool=False) net, end_points = resnet_v1.resnet_v1_50( x_image, num_classes=2, is_training=True, global_pool=True, output_stride=None, spatial_squeeze=True, store_non_strided_activations=False, reuse=None, scope='resnet_v1_50') ## 第2層 (畳み込み層) #W_conv1 = weight_variable([5, 5, 1, 32]) #b_conv1 = bias_variable([32]) #y_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) # ## 第3層 (プーリング層) #y_pool1 = max_pool_2x2(y_conv1) # ## 第4層 (畳み込み層) #W_conv2 = weight_variable([5, 5, 32, 64]) #b_conv2 = bias_variable([64]) #y_conv2 = tf.nn.relu(conv2d(y_pool1, W_conv2) + b_conv2) # ## 第5層 (プーリング層) #y_pool2 = max_pool_2x2(y_conv2) # ## 形状変更 #y_pool2_flat = tf.reshape(y_pool2, [-1, 32 * 32 * 64]) # ## 第6層 (全結合層) #W_fc1 = weight_variable([32 * 32 * 64, 1024]) #b_fc1 = bias_variable([1024]) #y_fc1 = tf.nn.relu(tf.matmul(y_pool2_flat, W_fc1) + b_fc1) # ## 第7層 (全結合層) #W_fc2 = weight_variable([1024, 2]) #b_fc2 = bias_variable([2]) #y = tf.matmul(y_fc1, W_fc2) + b_fc2 y = end_points["resnet_v1_50/spatial_squeeze"] cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot( labels, depth=2, dtype=tf.float32), logits=y) soft = tf.nn.softmax(y) pred = tf.equal(tf.argmax(y, 1), labels) accuracy = tf.reduce_mean(tf.cast(pred, tf.float32)) # 損失関数を計算グラフを作成する #t = tf.placeholder("float", [None, 2]) #cross_entropy = -tf.reduce_sum(t * tf.log(y)) # 次の(1)、(2)を行うための計算グラフを作成する。 # (1) 損失関数に対するネットワークを構成するすべての変数の勾配を計算する。 # (2) 勾配方向に学習率分移動して、すべての変数を更新する。 train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) #train_step = tf.train.MomentumOptimizer(learning_rate=0.01,momentum=0.9,use_nesterov=False).minimize(cross_entropy) return train_step, accuracy