def load(self, **kwargs): session = kwargs["session"] assert isinstance(session, tf.Session) x_input = tf.placeholder(self.x_dtype, shape=(None, ) + self.x_shape) with slim.arg_scope(resnet_v2.resnet_arg_scope()): resnet_v2.resnet_v2_152(x_input, num_classes=self.n_class, is_training=False, reuse=tf.AUTO_REUSE) model_path = get_model_path('resnet_v2_152') if not os.path.exists(model_path): os.makedirs(model_path) urllib.request.urlretrieve( 'http://download.tensorflow.org/models/resnet_v2_152_2017_04_14.tar.gz', os.path.join(model_path, 'resnet_v2_152_2017_04_14.tar.gz'), show_progress) tar = tarfile.open( os.path.join(model_path, 'resnet_v2_152_2017_04_14.tar.gz')) file_names = tar.getnames() for file_name in file_names: tar.extract(file_name, model_path) saver = tf.train.Saver(slim.get_model_variables(scope='resnet_v2')) saver.restore(session, os.path.join(model_path, 'resnet_v2_152.ckpt'))
def CNN(inputs): with tf.variable_scope("CNN"): # layer = slim.conv2d(inputs, 64, [8,8], [2,4], normalizer_fn=slim.batch_norm, activation_fn=None) # layer [B H//2 W//4 64] # tf.summary.image('zoom', tf.transpose (layer, [3, 1, 2, 0]), max_outputs=6) # layer = utils_nn.resNet50(layer, True, [2,1]) # [N H//32 W 2048] # tf.summary.image('2_res50', tf.transpose (layer, [3, 1, 2, 0]), max_outputs=6) # with slim.arg_scope(inception.inception_v3_arg_scope()): # with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=True): # layer, _ = inception.inception_v3_base(inputs, final_endpoint="Mixed_5d") # layer = utils_nn.resNet101(inputs, True) with slim.arg_scope(resnet_v2.resnet_arg_scope()): layer, _ = resnet_v2.resnet_v2_152(inputs, None, is_training=True, global_pool=False, output_stride=16) # 直接将网络拉到256 [N 1 256 256] with tf.variable_scope("Normalize"): layer = slim.conv2d(layer, 1024, [2, 2], [2, 1], normalizer_fn=slim.batch_norm, activation_fn=None) layer = slim.conv2d(layer, 512, [1, 1], normalizer_fn=slim.batch_norm, activation_fn=None) layer = slim.conv2d(layer, 256, [1, 1], normalizer_fn=slim.batch_norm, activation_fn=None) return layer
def create_resnet_model(img_dim): pre_image = tf.placeholder(tf.float32, [None, None, 3]) processed_image = cnn_preprocessing.preprocess_for_eval(pre_image/255.0, img_dim, img_dim) images = tf.placeholder(tf.float32, [None, img_dim, img_dim, 3]) # mean = tf.constant([123.68, 116.779, 103.939], dtype=tf.float32, shape=[1, 1, 1, 3], name='img_mean') # processed_images = images - mean with slim.arg_scope(resnet_utils.resnet_arg_scope()): probs, endpoints = resnet_v2.resnet_v2_152(images, num_classes=1001, is_training = False) print(endpoints['resnet_v2_152/block4']) init_fn = slim.assign_from_checkpoint_fn( 'Data/CNNModels/resnet_v2_152.ckpt', slim.get_model_variables('resnet_v2_152')) sess = tf.Session() init_fn(sess) return { 'images_placeholder' : images, 'block4' : endpoints['resnet_v2_152/block4'], 'session' : sess, 'processed_image' : processed_image, 'pre_image' : pre_image, 'probs' : probs }
def RES(inputs, seq_len, reuse = False): with tf.variable_scope("OCR", reuse=reuse): print("inputs shape:",inputs.shape) # layer = utils_nn.resNet101V2(inputs, True) # N H W/16 2048 # layer = utils_nn.resNet50(inputs, True, [2,1]) # (N H/16 W 2048) # with slim.arg_scope(inception.inception_v3_arg_scope()): # with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=True): # layer, _ = inception.inception_v3_base(inputs, final_endpoint="Mixed_5d") with slim.arg_scope(resnet_v2.resnet_arg_scope()): layer, _ = resnet_v2.resnet_v2_152(inputs, None, is_training=True, global_pool=False, output_stride=16) print("ResNet shape:",layer.shape) # 直接将网络拉到256 [N 1 512 256] with tf.variable_scope("Normalize"): layer = slim.conv2d(layer, 1024, [2,2], [2,1], normalizer_fn=slim.batch_norm, activation_fn=None) layer = slim.conv2d(layer, 512, [1,1], normalizer_fn=slim.batch_norm, activation_fn=None) layer = slim.conv2d(layer, 256, [1,1], normalizer_fn=slim.batch_norm, activation_fn=None) # layer = utils_nn.resNet101(inputs, True) # with tf.variable_scope("ResNext"): # layer = slim.conv2d(inputs, 64, [2,4], [2,4], normalizer_fn=slim.batch_norm, activation_fn=None) # tf.summary.image('1_2_4_zoom', tf.transpose (layer, [3, 1, 2, 0]), max_outputs=6) # layer = utils_nn.resNext50(layer, True, [2,1]) # (N H/16 W 2048) # tf.summary.image('2_res50', tf.transpose (layer, [3, 1, 2, 0]), max_outputs=6) temp_layer = layer # with tf.variable_scope("Normalize"): # layer = slim.conv2d(layer, 1024, [1,1], normalizer_fn=slim.batch_norm, activation_fn=None) # layer = slim.conv2d(layer, 512, [1,1], normalizer_fn=slim.batch_norm, activation_fn=None) # layer = slim.conv2d(layer, 256, [1,1], normalizer_fn=slim.batch_norm, activation_fn=None) # layer = slim.conv2d(layer, 128, [1,1], normalizer_fn=slim.batch_norm, activation_fn=None) # 将图像高度和宽度 // [2, 4] # layer = slim.avg_pool2d(layer, [2, 4], [2, 4]) print("ResNet shape:",layer.shape) # 增加坐标信息,增加的个数为 embedd_size # max_width_height, embedd_size # max_width_height 为缩放后的 w 的最大宽度,实际上的最大图片宽度为 max_width_height * 4 with tf.variable_scope("Coordinates"): max_width_height = MAX_IMAGE_WIDTH//8 embedd_size = 64 layer = Coordinates(layer, max_width_height, embedd_size) print("Coordinates shape:",layer.shape) with tf.variable_scope("LSTM"): layer = tf.squeeze(layer, squeeze_dims=1) print("SEQ shape:",layer.shape) layer = LSTM(layer, 256+embedd_size, seq_len) # N, W*H, 256 print("lstm shape:",layer.shape) return layer, temp_layer
def test_batch(): TRAIN_DATA = './data/my_data/train/' TEST_DATA = './data/my_data/test/' VAL_DATA = './data/my_data/val/' BATCH = 5 os.environ['CUDA_VISIBLE_DEVICES'] = '0' tfx = tf.placeholder(tf.float32, [None, 60, 120, 3]) tfy = tf.placeholder(tf.float32, [None, 5, 11]) # _, end_points = vgg.vgg_16(tfx, num_classes=2) # _, end_points = vgg.vgg_16(tfx, num_classes=2) # fc8 = slim.fully_connected(end_points['vgg_16/fc7'], num_outputs=55) out, end_points = resnet_v2.resnet_v2_152(tfx, num_classes=55) # out, end_points = vgg.vgg_19(tfx, num_classes=55) out = tf.reshape(out, (-1, 5, 11)) loss = tf.losses.softmax_cross_entropy(tfy[0], out[0]) + \ tf.losses.softmax_cross_entropy(tfy[1], out[1]) + \ tf.losses.softmax_cross_entropy(tfy[2], out[2]) + \ tf.losses.softmax_cross_entropy(tfy[3], out[3]) + \ tf.losses.softmax_cross_entropy(tfy[4], out[4]) train_op = tf.train.MomentumOptimizer(0.0005, 0.9).minimize(loss) # train_op = tf.train.AdamOptimizer(0.001).minimize(loss) config = tf.ConfigProto(allow_soft_placement=True) # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9) config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() print('test batch') saver.restore(sess, './models_crop/transfer_learn_235000') all_test_names = [pic for pic in os.listdir(TEST_DATA)] df = pd.DataFrame({"file_name": [], "file_code": []}) test_time = int(len(all_test_names) // BATCH) for idx in range(test_time): img_bch = [] for sub_i in range(BATCH): img = cv2.imread(TEST_DATA + all_test_names[idx * BATCH + sub_i]) resize_img = resize(img, (60, 120)) crop_img = np.asarray(resize_img, np.float32) crop_img = crop_img / 255. img_bch.append(crop_img) out_put = sess.run(out, feed_dict={tfx: img_bch}) for num in range(BATCH): out_labels = out_put[num].argmax(axis=1) out_str = ''.join(str(s) for s in out_labels if s != 10) df.loc[idx * BATCH + num] = [all_test_names[idx * BATCH + num], out_str] print('idx--all', idx, '--', test_time) # if idx > 30: # break df.to_csv("./csv/sample_submit_0528.csv", index=None)
def train(): TRAIN_DATA = './data/my_data/train/' TEST_DATA = './data/my_data/test/' VAL_DATA = './data/my_data/val/' BATCH = 5 os.environ['CUDA_VISIBLE_DEVICES'] = '0' tfx = tf.placeholder(tf.float32, [None, 60, 120, 3]) tfy = tf.placeholder(tf.float32, [None, 5, 11]) # _, end_points = vgg.vgg_16(tfx, num_classes=2) # _, end_points = vgg.vgg_16(tfx, num_classes=2) # fc8 = slim.fully_connected(end_points['vgg_16/fc7'], num_outputs=55) out, end_points = resnet_v2.resnet_v2_152(tfx, num_classes=55) # out, end_points = vgg.vgg_19(tfx, num_classes=55) out = tf.reshape(out, (-1, 5, 11)) loss = tf.losses.softmax_cross_entropy(tfy[0], out[0]) + \ tf.losses.softmax_cross_entropy(tfy[1], out[1]) + \ tf.losses.softmax_cross_entropy(tfy[2], out[2]) + \ tf.losses.softmax_cross_entropy(tfy[3], out[3]) + \ tf.losses.softmax_cross_entropy(tfy[4], out[4]) train_op = tf.train.MomentumOptimizer(0.0005, 0.9).minimize(loss) # train_op = tf.train.AdamOptimizer(0.001).minimize(loss) config = tf.ConfigProto(allow_soft_placement=True) # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9) config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() # saver.restore(sess, './models_crop/transfer_learn_105000') # slim.assign_from_checkpoint_fn('./data/pre_trained/resnet_v2_152.ckpt', # slim.get_trainable_variables(), # ignore_missing_vars=True) # saver.restore(sess, './data/pre_trained/resnet_v1_50.ckpt') all_train_names = [pic for pic in os.listdir(TRAIN_DATA) ] + [pic for pic in os.listdir(VAL_DATA)] for i in range(1000000): batch_idx = np.random.randint(0, len(all_train_names), BATCH) bat_imgs, bat_labels = load_img_labels_by_name(batch_idx) one_hot_labels = parse_labels(bat_labels) # print(one_hot_labels) losses, _ = sess.run((loss, train_op), feed_dict={ tfx: bat_imgs, tfy: one_hot_labels }) if i % 2 == 0: print(i, 'loss', losses) if i % 5000 == 0: saver.save(sess, './models_vgg19/transfer_learn_%d' % i)
def resnet152(inputs, num_classes, is_training, global_pool = True): with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits, end_points = resnet_v2.resnet_v2_152(inputs, num_classes, global_pool = global_pool, reuse=tf.AUTO_REUSE) predictions = { "classes": tf.argmax(logits, axis=1), "probabilities": end_points["predictions"] } return logits, predictions
def logits_and_labels(self, xs_ph): xs_ph = xs_ph * 2.0 - 1.0 with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits, end_points = resnet_v2.resnet_v2_152( xs_ph, num_classes=self.n_class, is_training=False, reuse=tf.AUTO_REUSE) predicted_labels = tf.argmax(end_points['predictions'], 1) return logits, predicted_labels
def __call__(self, ens_x_input, vgg_x_input, inc_x_input, tcd_x_input): """Constructs model and return probabilities for given input.""" reuse = True if self.built else None logits = None aux_logits = None weights = [[0.7, 0.1], [0.2, 0.1]] all_inputs = [[ens_x_input, tcd_x_input], [inc_x_input, tcd_x_input]] scopes = [ inception_resnet_v2.inception_resnet_v2_arg_scope(), inception.inception_v3_arg_scope() ] reuse_flags = [reuse, True] for model_idx, model in enumerate( [inception_resnet_v2.inception_resnet_v2, inception.inception_v3]): with slim.arg_scope(scopes[model_idx]): for idx, inputs in enumerate(all_inputs[model_idx]): result = model(inputs, num_classes=self.num_classes, is_training=False, reuse=reuse_flags[idx]) weight = weights[model_idx][idx] # :1 is for slicing out the background class if logits == None: logits = result[0][:, 1:] * weight aux_logits = result[1]['AuxLogits'][:, 1:] * weight else: logits += result[0][:, 1:] * weight aux_logits += result[1]['AuxLogits'][:, 1:] * weight with slim.arg_scope(vgg.vgg_arg_scope()): weight = 0.1 result = vgg.vgg_16(vgg_x_input, num_classes=1000, is_training=False) logits += result[0] * weight with slim.arg_scope(resnet_utils.resnet_arg_scope()): weight = 0.05 result = resnet_v2.resnet_v2_152(vgg_x_input, num_classes=self.num_classes, reuse=reuse) logits += tf.squeeze(result[0])[:, 1:] * weight self.built = True aux_weight = 0.8 logits += aux_logits * aux_weight predictions = layers_lib.softmax(logits) return predictions
def __call__(self, x_input): """Constructs model and return probabilities for given input.""" reuse = True if self.built else None x_input = image_normalize(x_input, normalization_method[8]) with slim.arg_scope(resnet_v2.resnet_arg_scope()): _, end_points = resnet_v2.resnet_v2_152( x_input, num_classes=self.num_classes, is_training=False, reuse=reuse) self.built = True output = end_points['predictions'] # Strip off the extra reshape op at the output probs = output.op.inputs[0] return probs
def choose_net(inputs, net_name): if net_name == 'vgg': net_out, end_points = vgg.vgg_19(inputs, num_classes=2) # is_training=False elif net_name == 'resnet': net_out, end_points = resnet_v2.resnet_v2_152(inputs, num_classes=2) net_out = tf.reshape(net_out, (-1, 2)) elif net_name == 'fpn_vgg': fpn_net = fpn.FPN(inputs=inputs, net_name='vgg_19') net_out = fpn_net.net_scores elif net_name == 'fpn_res': fpn_net = fpn.FPN(inputs=inputs, net_name='resnet_v1_101') net_out = fpn_net.net_scores else: raise ValueError('the chosen model ') return net_out
def _build_base_net(inputs, net_opts, is_training, reuse=None): "return a net from slim research nets. is_training must be an actual boolean, not a tensor, which is why we're abstracting this out." if net_opts['base_net'] == 'resnet_v1_152': with slim.arg_scope(resnet_v1.resnet_arg_scope()) as scope: inputs = _vgg_preprocess(inputs) base_net, _ = resnet_v1.resnet_v1_152( inputs, is_training=False if net_opts['is_batchnorm_fixed'] else is_training, global_pool=False, reuse=reuse) if DEBUG: print('resnet_out:') print(base_net.shape.as_list()) elif net_opts['base_net'] == 'resnet_v2_152': with slim.arg_scope(resnet_v2.resnet_arg_scope()) as scope: inputs = _vgg_preprocess(inputs) base_net, _ = resnet_v2.resnet_v2_152( inputs, is_training=False if net_opts['is_batchnorm_fixed'] else is_training, global_pool=False, reuse=reuse) if DEBUG: print('resnet_out:') print(base_net.shape.as_list()) elif net_opts['base_net'] == 'inception_v3': #WARNING: is_train for inception controls not just batch norm but dropout. So it's a little awkward. We may need more functionality here later TODO ''' TODO add inception preprocessing: https://github.com/tensorflow/models/blob/master/research/slim/preprocessing/preprocessing_factory.py #WARNING untested. Not sure I fully understand slim scopes yet base_scope = 'InceptionV3' with slim.arg_scope(inception.inception_v3_arg_scope()) as scope: with slim.variable_scope(scope, base_scope, [inputs, None], reuse=False) as scope: with slim.arg_scope([layers_lib.batch_norm, layers_lib.dropout], is_training=is_training): base_net, _ = inception_v3_base(inputs,scope=scope) ''' elif net_opts['base_net'] == 'nothing': # a nothing for debugging purposes base_net = inputs else: raise Exception("basenet name not recognized") return base_net
def resnet(filenames, session_id, res, perplexity, early_exaggeration, learning_rate, dpi): # Clean up model tf.reset_default_graph() # Load images images = np.zeros((len(filenames), 224, 224, 3), dtype=np.float32) for i, imageName in enumerate(filenames): print i, imageName img = skimage.io.imread(imageName) if len(img.shape) == 2: # we have a 2D, black and white image but vgg16 needs 3 channels img = np.expand_dims(img, 2) img = np.repeat(img, 3, axis=2) img = scipy.misc.imresize(img, (224, 224)) images[i, :, :, :] = img in_images = tf.placeholder(tf.float32, images.shape) with slim.arg_scope(resnet_v2.resnet_arg_scope()): model, intermed = resnet_v2.resnet_v2_152(in_images, None, is_training=False) restored_variables = tf.contrib.framework.get_variables_to_restore() restorer = tf.train.Saver(restored_variables) with tf.Session() as sess: img_net_path = 'models/resnet_v2_152.ckpt' restorer.restore(sess, img_net_path) features = sess.run(model, feed_dict={in_images: images}) # Clean up model tf.reset_default_graph() features = features.squeeze() # remove dimensions that are only 1 long utils.save_features_to_csv_file( features, filenames, session_id, 'Resnet_features:Resolution:%d_Perplexity:%d_EarlyExaggeration:%d_LearningRate:%d_DPI:%d.csv' % (res, perplexity, early_exaggeration, learning_rate, dpi)) return features
def main(_): batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3] num_classes = 1001 ensemble_type = FLAGS.ensemble_type tf.logging.set_verbosity(tf.logging.INFO) checkpoint_path_list = [ FLAGS.checkpoint_path_inception_v1, FLAGS.checkpoint_path_inception_v2, FLAGS.checkpoint_path_inception_v3, FLAGS.checkpoint_path_inception_v4, FLAGS.checkpoint_path_inception_resnet_v2, FLAGS.checkpoint_path_resnet_v1_101, FLAGS.checkpoint_path_resnet_v1_152, FLAGS.checkpoint_path_resnet_v2_101, FLAGS.checkpoint_path_resnet_v2_152, FLAGS.checkpoint_path_vgg_16, FLAGS.checkpoint_path_vgg_19 ] normalization_method = [ 'default', 'default', 'default', 'default', 'global', 'caffe_rgb', 'caffe_rgb', 'default', 'default', 'caffe_rgb', 'caffe_rgb' ] pred_list = [] for idx, checkpoint_path in enumerate(checkpoint_path_list, 1): with tf.Graph().as_default(): if int(FLAGS.test_idx) == 20 and idx in [3]: continue if int(FLAGS.test_idx) in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 ] and int(FLAGS.test_idx) != idx: continue # Prepare graph if idx in [1, 2, 6, 7, 10, 11]: _x_input = tf.placeholder(tf.float32, shape=batch_shape) x_input = tf.image.resize_images(_x_input, [224, 224]) else: _x_input = tf.placeholder(tf.float32, shape=batch_shape) x_input = _x_input x_input = image_normalize(x_input, normalization_method[idx - 1]) if idx == 1: with slim.arg_scope(inception.inception_v1_arg_scope()): _, end_points = inception.inception_v1( x_input, num_classes=num_classes, is_training=False) elif idx == 2: with slim.arg_scope(inception.inception_v2_arg_scope()): _, end_points = inception.inception_v2( x_input, num_classes=num_classes, is_training=False) elif idx == 3: with slim.arg_scope(inception.inception_v3_arg_scope()): _, end_points = inception.inception_v3( x_input, num_classes=num_classes, is_training=False) elif idx == 4: with slim.arg_scope(inception.inception_v4_arg_scope()): _, end_points = inception.inception_v4( x_input, num_classes=num_classes, is_training=False) elif idx == 5: with slim.arg_scope(inception.inception_resnet_v2_arg_scope()): _, end_points = inception.inception_resnet_v2( x_input, num_classes=num_classes, is_training=False) elif idx == 6: with slim.arg_scope(resnet_v1.resnet_arg_scope()): _, end_points = resnet_v1.resnet_v1_101(x_input, num_classes=1000, is_training=False) elif idx == 7: with slim.arg_scope(resnet_v1.resnet_arg_scope()): _, end_points = resnet_v1.resnet_v1_152(x_input, num_classes=1000, is_training=False) elif idx == 8: with slim.arg_scope(resnet_v2.resnet_arg_scope()): _, end_points = resnet_v2.resnet_v2_101( x_input, num_classes=num_classes, is_training=False) elif idx == 9: with slim.arg_scope(resnet_v2.resnet_arg_scope()): _, end_points = resnet_v2.resnet_v2_152( x_input, num_classes=num_classes, is_training=False) elif idx == 10: with slim.arg_scope(vgg.vgg_arg_scope()): _, end_points = vgg.vgg_16(x_input, num_classes=1000, is_training=False) end_points['predictions'] = tf.nn.softmax( end_points['vgg_16/fc8']) elif idx == 11: with slim.arg_scope(vgg.vgg_arg_scope()): _, end_points = vgg.vgg_19(x_input, num_classes=1000, is_training=False) end_points['predictions'] = tf.nn.softmax( end_points['vgg_19/fc8']) #end_points = tf.reduce_mean([end_points1['Predictions'], end_points2['Predictions'], end_points3['Predictions'], end_points4['Predictions']], axis=0) #predicted_labels = tf.argmax(end_points, 1) # Run computation saver = tf.train.Saver(slim.get_model_variables()) session_creator = tf.train.ChiefSessionCreator( scaffold=tf.train.Scaffold(saver=saver), checkpoint_filename_with_path=checkpoint_path, master=FLAGS.master) pred_in = [] filenames_list = [] with tf.train.MonitoredSession( session_creator=session_creator) as sess: for filenames, images in load_images(FLAGS.input_dir, batch_shape): #if idx in [1,2,6,7,10,11]: # # 16x299x299x3 # images = zoom(images, (1, 0.7491638795986622, 0.7491638795986622, 1), order=2) filenames_list.extend(filenames) end_points_dict = sess.run(end_points, feed_dict={_x_input: images}) if idx in [6, 7, 10, 11]: end_points_dict['predictions'] = \ np.concatenate([np.zeros([FLAGS.batch_size, 1]), np.array(end_points_dict['predictions'].reshape(-1, 1000))], axis=1) try: pred_in.extend(end_points_dict['Predictions'].reshape( -1, num_classes)) except KeyError: pred_in.extend(end_points_dict['predictions'].reshape( -1, num_classes)) pred_list.append(pred_in) if ensemble_type == 'mean': pred = np.mean(pred_list, axis=0) labels = np.argmax( pred, axis=1 ) # model_num X batch X class_num ==(np.mean)==> batch X class_num ==(np.argmax)==> batch elif ensemble_type == 'vote': pred = np.argmax( pred_list, axis=2 ) # model_num X batch X class_num ==(np.mean)==> batch X class_num ==(np.argmax)==> batch labels = np.median(pred, axis=0) with tf.gfile.Open(FLAGS.output_file, 'w') as out_file: for filename, label in zip(filenames_list, labels): out_file.write('{0},{1}\n'.format(filename, label))
def model_fn(model_name, batch_size): if model_name=='vgg19': x = tf.placeholder(tf.float32, shape=(batch_size, 224, 224, 3)) y = tf.placeholder(tf.float32, shape=(batch_size,1000)) output, _ = vgg.vgg_19(x, 1000) loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=output) elif model_name=='resnet200': x = tf.placeholder(tf.float32, shape=(batch_size, 224, 224, 3)) y = tf.placeholder(tf.float32, shape=(batch_size,1,1, 1000)) output, _ = resnet_v2.resnet_v2_200(x, 1000) loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=output) elif model_name=='resnet101': x = tf.placeholder(tf.float32, shape=(batch_size, 224, 224, 3)) y = tf.placeholder(tf.float32, shape=(batch_size,1,1, 1000)) output, _ = resnet_v2.resnet_v2_101(x, 1000) loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=output) elif model_name=='resnet152': x = tf.placeholder(tf.float32, shape=(batch_size, 224, 224, 3)) y = tf.placeholder(tf.float32, shape=(batch_size,1,1, 1000)) output, _ = resnet_v2.resnet_v2_152(x, 1000) loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=output) elif model_name=='nasnet_cifar': x = tf.placeholder(tf.float32, shape=(batch_size, 224, 224, 3)) y = tf.placeholder(tf.float32, shape=(batch_size,1000)) output, _ = nasnet.build_nasnet_cifar(x, 1000) loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=output) elif model_name=='mobile_net': x = tf.placeholder(tf.float32, shape=(batch_size, 224, 224, 3)) y = tf.placeholder(tf.float32, shape=(batch_size,1000)) output, _ = mobilenet_v2.mobilenet(x, 1000) loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=output) elif model_name=='inceptionv3': x = tf.placeholder(tf.float32, shape=(batch_size, 224, 224, 3)) y = tf.placeholder(tf.float32, shape=(batch_size, 1000)) output, _ = inception.inception_v3(x, 1000) loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=output) elif model_name=='transformer': dm = DatasetManager('wmt14') dm.maybe_download_data_files() dm.load_vocab() transformer = transf.Transformer( num_heads=8, d_model=512, d_ff=2048, model_name=model_name, tf_sess_config=dict(allow_soft_placement=True) ) train_params = dict( learning_rate=1e-4, batch_size=batch_size, seq_len=10, max_steps=300000, ) transformer.build_model('wmt14', dm.source_id2word, dm.target_id2word, 0,**train_params) loss = transformer._loss elif model_name=='bert': #bert_config = modeling.BertConfig.from_json_file('bert/bert_large/bert_config.json') bert_large_config_path = 'bert/pre-trained/large/cased_L-24_H-1024_A-16/bert_config.json' bert_config = modeling.BertConfig.from_json_file(bert_large_config_path) model = new_model_fn_builder(bert_config) features = {} features['input_ids']= tf.cast(100*tf.placeholder(tf.float32,shape=(batch_size,128)),tf.int32) features['input_mask'] = tf.cast(100*tf.placeholder(tf.float32,shape=(batch_size,128)),tf.int32) features['segment_ids']=tf.cast(100*tf.placeholder(tf.float32,shape=(batch_size,128)),tf.int32) features['start_positions'] = tf.cast(100*tf.placeholder(tf.float32,shape=(batch_size,)),tf.int32) features['end_positions'] =tf.cast(100*tf.placeholder(tf.float32,shape=(batch_size,)),tf.int32) loss = model(features) elif model_name == 'small': slim = tf.contrib.slim x = tf.placeholder(tf.float32, shape=(batch_size, 224, 224, 3)) y = tf.placeholder(tf.float32, shape=(batch_size, 1000)) v= tf.get_variable(name='large_variable',shape=(3000,224, 224, 3),trainable=True) x = tf.slice(v,[0,0,0,0],tf.shape(x),name='large_slice') net = slim.max_pool2d(x, [2, 2], 2) net = slim.conv2d(net, 128, [5, 5],trainable=False) net = slim.max_pool2d(net, [2, 2], 2) net = slim.conv2d(net, 128, [5, 5],trainable=False) net = slim.max_pool2d(net, [2, 2], 2) net = slim.conv2d(net, 128, [5, 5],trainable=False) net = slim.max_pool2d(net, [2, 2], 2) net = slim.flatten(net) net = slim.fully_connected(net, 1024, activation_fn=tf.nn.sigmoid,trainable=False) net = slim.fully_connected(net, 1000, activation_fn=None,trainable=False) loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=net) optimizer = tf.train.AdamOptimizer(learning_rate=0.2, beta1=0.9, beta2=0.98, epsilon=1e-9).minimize( tf.reduce_sum(loss)) # TODO: Make lr, beta, epsilon value of parameter """ if opt == 'Adam': optimizer = tf.train.AdamOptimizer(learning_rate=0.2, beta1=0.9, beta2=0.98, epsilon=1e-9).minimize( tf.reduce_sum(loss)) elif opt == 'GradientDescent': optimizer = tf.train.GradientDescentOptimizer( learning_rate=0.2).minimize(tf.reduce_sum(loss)) """ return optimizer
def model_fn(model_name, batch_size): if model_name == "vgg19": from tensorflow.contrib.slim.nets import vgg x = tf.placeholder(tf.float32, shape=(batch_size, 224, 224, 3)) y = tf.placeholder(tf.float32, shape=(batch_size, 1000)) output, _ = vgg.vgg_19(x, 1000) loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=output) elif model_name == "resnet200": from tensorflow.contrib.slim.nets import resnet_v2 x = tf.placeholder(tf.float32, shape=(batch_size, 224, 224, 3)) y = tf.placeholder(tf.float32, shape=(batch_size, 1, 1, 1000)) output, _ = resnet_v2.resnet_v2_200(x, 1000) loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=output) elif model_name == "resnet101": from tensorflow.contrib.slim.nets import resnet_v2 x = tf.placeholder(tf.float32, shape=(batch_size, 224, 224, 3)) y = tf.placeholder(tf.float32, shape=(batch_size, 1, 1, 1000)) output, _ = resnet_v2.resnet_v2_101(x, 1000) loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=output) elif model_name == "resnet152": from tensorflow.contrib.slim.nets import resnet_v2 x = tf.placeholder(tf.float32, shape=(batch_size, 224, 224, 3)) y = tf.placeholder(tf.float32, shape=(batch_size, 1, 1, 1000)) output, _ = resnet_v2.resnet_v2_152(x, 1000) loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=output) elif model_name == "nasnet_cifar": from tensorflow.contrib.slim.nets import nasnet x = tf.placeholder(tf.float32, shape=(batch_size, 224, 224, 3)) y = tf.placeholder(tf.float32, shape=(batch_size, 1000)) output, _ = nasnet.build_nasnet_cifar(x, 1000) loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=output) elif model_name == "mobile_net": from tensorflow.contrib.slim.nets import mobilenet_v2 x = tf.placeholder(tf.float32, shape=(batch_size, 224, 224, 3)) y = tf.placeholder(tf.float32, shape=(batch_size, 1000)) output, _ = mobilenet_v2.mobilenet(x, 1000) loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=output) elif model_name == "inceptionv3": from tensorflow.contrib.slim.nets import inception_v3 x = tf.placeholder(tf.float32, shape=(batch_size, 224, 224, 3)) y = tf.placeholder(tf.float32, shape=(batch_size, 1000)) output, _ = inception_v3.inception_v3(x, 1000) loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=output) elif model_name == "transformer": import modeltransformer.transformer as transf from modeltransformer.data import DatasetManager dm = DatasetManager("wmt14") dm.maybe_download_data_files() dm.load_vocab() transformer = transf.Transformer( num_heads=8, d_model=512, d_ff=2048, model_name=model_name, tf_sess_config=dict(allow_soft_placement=True)) train_params = dict( learning_rate=1e-4, batch_size=batch_size, seq_len=10, max_steps=300000, ) transformer.build_model("wmt14", dm.source_id2word, dm.target_id2word, 0, **train_params) loss = transformer._loss elif model_name == "bert": from bert.runsquad import new_model_fn_builder import modeling bert_config = modeling.BertConfig.from_json_file( "bert/bert_large/bert_config.json") model = new_model_fn_builder(bert_config) features = {} features["input_ids"] = tf.cast( 100 * tf.placeholder(tf.float32, shape=(batch_size, 128)), tf.int32) features["input_mask"] = tf.cast( 100 * tf.placeholder(tf.float32, shape=(batch_size, 128)), tf.int32) features["segment_ids"] = tf.cast( 100 * tf.placeholder(tf.float32, shape=(batch_size, 128)), tf.int32) features["start_positions"] = tf.cast( 100 * tf.placeholder(tf.float32, shape=(batch_size, )), tf.int32) features["end_positions"] = tf.cast( 100 * tf.placeholder(tf.float32, shape=(batch_size, )), tf.int32) loss = model(features) elif model_name == "small": slim = tf.contrib.slim x = tf.placeholder(tf.float32, shape=(batch_size, 224, 224, 3)) y = tf.placeholder(tf.float32, shape=(batch_size, 1000)) v = tf.get_variable(name="large_variable", shape=(3000, 224, 224, 3), trainable=True) x = tf.slice(v, [0, 0, 0, 0], tf.shape(x), name="large_slice") net = slim.max_pool2d(x, [2, 2], 2) net = slim.conv2d(net, 128, [5, 5], trainable=False) net = slim.max_pool2d(net, [2, 2], 2) net = slim.conv2d(net, 128, [5, 5], trainable=False) net = slim.max_pool2d(net, [2, 2], 2) net = slim.conv2d(net, 128, [5, 5], trainable=False) net = slim.max_pool2d(net, [2, 2], 2) net = slim.flatten(net) net = slim.fully_connected(net, 1024, activation_fn=tf.nn.sigmoid, trainable=False) net = slim.fully_connected(net, 1000, activation_fn=None, trainable=False) loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=net) optimizer = tf.train.AdamOptimizer(learning_rate=0.2, beta1=0.9, beta2=0.98, epsilon=1e-9).minimize( tf.reduce_sum(loss)) return optimizer
def main(): # Load preprocessed data processed_data = np.load(INPUT_DATA) training_images = processed_data[0] n_training_example = len(training_images) training_labels = processed_data[1] validation_images = processed_data[2] validation_labels = processed_data[3] testing_images = processed_data[4] testing_labels = processed_data[5] print("%d training examples, %d validation examples and %d testing examples." % ( n_training_example, len(validation_labels), len(testing_labels))) images = tf.placeholder(tf.float32, [None, 224, 224, 3], name='input_images') labels = tf.placeholder(tf.int64, [None], name='labels') with slim.arg_scope(resnet_v2.resnet_arg_scope()): net, _ = resnet_v2.resnet_v2_152(images, N_CLASSES, is_training=False) logits = tf.reshape(net, [-1, 5]) # From (?, 1, 1, 5) to (?, 5) with tf.name_scope('evaluation'): correct_prediction = tf.equal(tf.argmax(logits, 1), labels) evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # Define loss function and training process tf.losses.softmax_cross_entropy( tf.one_hot(labels, N_CLASSES), logits, weights=1.0) total_loss = tf.losses.get_total_loss() variables_to_restore = get_tuned_variables() # Restore the remaining variables saver_pre_trained = tf.train.Saver( var_list=variables_to_restore) train_vars = get_trainable_variables() # Performs gradient decent on the trainable variables optimizer = tf.train.MomentumOptimizer(learning_rate=LEARNING_RATE, momentum=0.9) grads = optimizer.compute_gradients(total_loss, var_list=train_vars) minimize_op = optimizer.apply_gradients(grads) saver = tf.train.Saver() with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) print('Loading tuned variables from %s' % CKPT_FILE) saver_pre_trained.restore(sess, CKPT_FILE) start = 0 end = BATCH for i in range(STEPS): _, loss = sess.run([minimize_op, total_loss], feed_dict={ images: training_images[start:end], labels: training_labels[start:end]}) if i % 30 == 0 or i + 1 == STEPS: saver.save(sess, TRAIN_FILE, global_step=i) validation_accuracy = sess.run(evaluation_step, feed_dict={ images: validation_images, labels: validation_labels}) print('Step %d: Training loss is %.1f Validation accuracy = %.1f%%' % ( i, loss, validation_accuracy * 100.0)) start = end if start == n_training_example: start = 0 end = start + BATCH if end > n_training_example: end = n_training_example test_accuracy = sess.run(evaluation_step, feed_dict={ images: testing_images, labels: testing_labels}) print('Final test accuracy = %.1f%%' % (test_accuracy * 100))
if label == 2: return handwaving[random.choice(handwaving.files[:-20])] if label == 3: return running[random.choice(running.files[:-20])] if label == 4: return jogging[random.choice(jogging.files[:-20])] if label == 5: return walking[random.choice(walking.files[:-20])] if TRAIN: if USE_LIVE_CNN: #Currently not used pretrained_input = tf.placeholder(tf.float32, shape=(None, 224, 224, 3)) with slim.arg_scope(resnet_v2.resnet_arg_scope()): net, endpoints = resnet_v2.resnet_v2_152(pretrained_input, is_training=False) pretrained_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) #Build TCN TCN = TemporalConvNet([1, 2048], [1, 1], [5, 3], [1, 1], [1, 1], conv1DOp, ClassificationCELoss(1e-3, 6)) TCN.buildNetwork(pretrained_input, tf.reshape(net, [-1, 2048])) TCN.initNetwork() #Load pretrained resnet saver = tf.train.Saver(pretrained_variables) with tf.Session() as sess: saver.restore(sess, 'pretrained/resnet_v2_152/resnet_v2_152.ckpt') else: #Use static preprocessed inputs
num_batches_per_epoch = int(tr_num_samples / batch_size) num_steps_per_epoch = num_batches_per_epoch #validation dataset va_dataset, labels_encoding, va_num_samples = build_picture_dataset( "datasets/dogscats/valid") va_dataset = preprocessing_images_2(va_dataset, 299, 299) va_dataset = va_dataset.batch(batch_size) iterator = tr_dataset.make_initializable_iterator() X, y = iterator.get_next() #X = tf.placeholder(tf.float32, shape=[None, 299, 299, 3], name="X") with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits, end_points = resnet_v2.resnet_v2_152(X, num_classes=1001, is_training=training) resnetsaver = tf.train.Saver() intermediary = 100 n_outputs = len(labels_encoding) with tf.name_scope("new_output_layer"): dogcats_logits = tf.layers.dense(logits, intermediary, activation=tf.nn.relu, name="dogscats_logits") dogcats_logits = tf.layers.dense(dogcats_logits, n_outputs, name="dogscats_logits2")
reshaped = np.reshape(reshaped, [-1, 28, 28, 1]) # Reshape 784 to 28x28x1 # Reshape to 299x299 images, then duplicate the single monochrome channel # across 3 RGB layers resized = zoom(reshaped, [1.0, RESIZE_FACTOR, RESIZE_FACTOR, 1.0]) resized = np.repeat(resized, 3, 3) # add color channels return resized images = tf.placeholder(tf.float32, shape=[None, 224, 224, 3], name='images') labels = tf.placeholder(tf.float32, shape=[None, 1, 1, 10], name='labels') with slim.arg_scope(nn_architecture.resnet_arg_scope( )): # https://kwotsin.github.io/tech/2017/02/11/transfer-learning.html logits, endpoints = nn_architecture.resnet_v2_152(inputs=images, num_classes=10) retrain = [ 'resnet_v2_152/logits', 'resnet_v2_152/block4' ] # could also remove some of the input channels (probably Conv2d_1a_3x3) to get rid of 3 color channel inputs and accept monochrome variables_to_restore = slim.get_variables_to_restore( exclude=retrain ) # this checks the current graph, so no custom nodes can be defined at this point, only those from create_network() sess = tf.Session() # #tensorboard logging tf.summary.FileWriter("log/tensorboard", sess.graph) saver = tf.train.Saver(variables_to_restore) saver.restore(sess, CHECKPOINT_PATH)
from tensorflow.contrib.slim.nets import vgg as vgg from tensorflow.contrib.slim.nets import resnet_v2 as resnet_v2 from tensorflow.contrib.slim.nets import resnet_v1 as resnet_v1 from tensorflow.python.client import device_lib import os import cv2 os.environ['CUDA_VISIBLE_DEVICES'] = '0' loss_unbalance_w = 1.05 print([ x.name for x in device_lib.list_local_devices() if x.device_type == 'GPU' ]) tfx = tf.placeholder(tf.float32, [None, 224, 224, 1]) tfy = tf.placeholder(tf.float32, [None, 2]) out, end_points = resnet_v2.resnet_v2_152(tfx, num_classes=2) # 将VGG16升级为VGG19试试呢 out = tf.reshape(out, [-1, 2]) # net_flatten = tf.reshape(fc8, [-1, 1*6*2]) # out = tf.layers.dense(net_flatten, 2, name='vgg_out') loss = tf.losses.softmax_cross_entropy(tfy, out) # aa, bb = tf.nn.softmax(tfy), tf .nn.softmax(out) # loss = -tf.reduce_mean(aa[0][0]*tf.log(bb[0][0]) + aa[0][1]*tf.log(bb[0][1])*loss_unbalance_w) train_op = tf.train.MomentumOptimizer(0.0005, 0.9).minimize(loss) # out, end_points = vgg.vgg_16(tfx, num_classes=2) # loss = tf.losses.softmax_cross_entropy(tfy, out) # train_op = tf.train.MomentumOptimizer(0.0005, 0.9).minimize(loss) correct_prediction = tf.equal(tf.argmax(out, 1), tf.argmax(tfy, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) config = tf.ConfigProto(allow_soft_placement=True) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) config.gpu_options.allow_growth = True