def _build_graph(self): # build model self.net = vgg.VGG19(model_weights=self.model_weights, pooling_type=self.pooling_type, verbose=self.verbose) self._initialize_images() self.net = self.net.build_model(self.content_img) style_loss = self.sum_style_loss() content_loss = self.sum_content_loss() # total variation denoising tv_loss = tf.image.total_variation(self.net['input']) alpha = self.content_weight beta = self.style_weight theta = self.tv_weight # linear combination between the loss components self.total_loss = alpha * content_loss + beta * style_loss + theta * tv_loss
height = 224 >> 2 channel = 3 n_outputs = 10 model_name = "models/vgg19/digists" data_path = "../data_img/MNIST/train/" # Step 0: Global Parameters epochs = 2 lr_rate = 0.0001 batch_size = 32 # Step 1: Create Model # model = vgg.VGG11((height, width, channel), classes = n_outputs, filters = 8) # model = vgg.VGG13((height, width, channel), classes = n_outputs, filters = 8) # model = vgg.VGG16((height, width, channel), classes = n_outputs, filters = 8) model = vgg.VGG19((height, width, channel), classes = n_outputs, filters = 8) # Step 2: Define Metrics model.compile(optimizer= tf.keras.optimizers.Adam(learning_rate = lr_rate), loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True), metrics = ['accuracy']) print(model.summary()) if sys.argv[1] == "train": # Step 3: Load data X_train, Y_train, X_test, Y_test = loader.load_data(data_path,width,height,True,0.8,False) # Step 4: Training # Create a function that saves the model's weights cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath = model_name, save_weights_only=True, verbose=0, save_freq="epoch")
The original caffemodel: www.robots.ox.ac.uk/~vgg/research/very_deep/ has been translated in to numpy's ndarray: https://mega.nz/#!xZ8glS6J!MAnE91ND_WyfZ_8mvkuSa2YcA7q-1ehfSm-Q1fxOvvs This implementation is adapted from : https://github.com/machrisaa/tensorflow-vgg.git ''' import sys sys.path.append('./utils') import numpy as np import tensorflow as tf import img import vgg # load images and vgg19 coefficients bgr = np.array([ img.convert_img(img.resize_img(img.load_img('./data/img/tiger.jpg'))), img.convert_img(img.resize_img(img.load_img('./data/img/file.jpg'))), ]) vgg19 = vgg.VGG19('./data/vgg19.npy') # build vgg19 _, height, width, _ = bgr.shape x_bgr = vgg19.input_bgr(height, width) vgg19.build_upto(x_bgr, 'prob') # object classification with tf.Session() as sess: prob = vgg19.layers['prob'].eval(feed_dict={x_bgr: bgr}) vgg19.predict(prob)
def load_and_train(options): # unpack parameters sty_imgs = options.sty_imgs sty_weights = np.array(options.sty_weights) cont_img = options.cont_img output_file = options.output_file output_scale = options.output_scale learn_rate = options.learn_rate alpha = np.float32(options.alpha) beta = np.float32(options.beta) num_epoch = options.num_epoch vgg19_loc = options.vgg19_loc # load images and vgg19 coefficients sty_features = load_sty_features() cont_feature = load_cont_feature() cont = load_cont_img(cont_img, output_scale) vgg_obj = vgg.VGG19(vgg19_loc) cont_ten = comp_cont_ten(cont, cont_feature, vgg_obj) gram, gram_coef = comp_sty_gram(load_sty_imgs(sty_imgs), sty_weights, sty_features, vgg_obj) # model cont_remix = tf.Variable(cont) vgg_obj.build_upto(cont_remix, 'pool5', False) # style loss function gamma = np.float32(1.0 / len(sty_features)) gram_style = {} for style in sty_features: this_shape = vgg_obj.layers[style].get_shape().as_list() this_Ml = this_shape[1] * this_shape[2] reshaped = tf.reshape(vgg_obj.layers[style], (-1, this_shape[3])) gram_style[style] = tf.matmul(tf.transpose(reshaped), reshaped) / (this_Ml**2) loss_style = tf.constant(np.float32(0.0)) for style in sty_features: loss_style += tf.reduce_sum( tf.square(gram_style[style] - gram[style])) * gram_coef[style] # content loss function loss_content = tf.reduce_mean( tf.square(vgg_obj.layers[cont_feature] - cont_ten)) # punish local pixel noise loss_noise = tf.reduce_mean( tf.abs( tf.nn.max_pool(cont_remix, ksize=[1, 3, 3, 1], strides=[1, 1, 1, 1], padding='VALID') - tf.nn.max_pool(-cont_remix, ksize=[1, 3, 3, 1], strides=[1, 1, 1, 1], padding='VALID'))) # train step loss = gamma * loss_style + alpha * loss_content + beta * loss_noise err = float('inf') train_step = tf.train.AdamOptimizer(learn_rate).minimize(loss) with tf.Session() as sess: tf.initialize_all_variables().run() for idx in range(num_epoch): sess.run(train_step) # list all errors this_loss_content = alpha * loss_content.eval() this_loss_style = gamma * loss_style.eval() this_loss_noise = beta * loss_noise.eval() this_err = this_loss_content + this_loss_style + this_loss_noise print('epoch', idx, ': content loss', this_loss_content, 'style loss', this_loss_style, 'noise loss', this_loss_noise) if this_err < err: err = this_err output = cont_remix.eval()[0, :, :, :] # save image img.save_img(output_file, img.revert_img(output))
# Next, let's instantiate a VGG19 model for the content image: # In[3]: import vgg import keras.backend as K import keras.layers as kl import keras.models as km # Note that we'll be working quite a bit with the TensorFlow objects that underlie Keras content_model_input = kl.Input(tensor=K.tf.Variable(content_img)) content_base_model = vgg.VGG19(input_tensor=content_model_input) evaluator = K.function([content_base_model.input],[content_base_model.output]) feature_maps = evaluator([content_img]) # In[4]: # The function defines above provides the output of the last activation in VGG19. However, this is not the layer that we need. Indeed, in the original neural style transfer paper, the authors found that good aesthetic properties were found by matching on the (unactivated) feature maps in the second convolution of the fourth block, called 'block4_conv2' (have a look at the VGG file if you're confused by what this means). We can generate a new Keras model that does this for us easily: # In[5]: # Define the layer outputs that we are interested in
dtype='float32') style_reference_data = fluid.layers.data(name='style_reference_image', shape=(3, img_nrows, img_ncols), dtype='float32') combination_data = fluid.layers.data(name='combination_image', shape=(3, img_nrows, img_ncols), dtype='float32', stop_gradient=False) # combine the 3 images into a single tensor input_tensor = fluid.layers.concat( [base_data, style_reference_data, combination_data]) # build the VGG19 network with our 3 images as input # the model will be loaded with pre-trained weights model = vgg.VGG19() outputs_dict = model.net(input=input_tensor) # compute the neural style loss # first we need to define 4 util functions # the gram matrix of an image tensor (feature-wise outer product) def gram_matrix(x): assert len(x.shape) == 3 features = fluid.layers.reshape(x, (-1, x.shape[0], x.shape[1] * x.shape[2])) gram = fluid.layers.matmul(features, features, False, True) gram = fluid.layers.squeeze(gram, [0])
def create_model(input_img, output_layers): # Instantiate full VGG model w/ input img base_model = vgg.VGG19(input_tensor=kl.Input(tensor=K.tf.Variable(input_img))) return km.Model(inputs=base_model.inputs, outputs=[base_model.get_layer(n).output for n in output_layers])
content_img = np.expand_dims(pixel_means(content_img), axis=0) style_img = np.expand_dims(pixel_means(style_img), axis=0) # Define the layer outputs that we are interested in content_layers = ['block4_conv2'] # Create content model content_model = create_model(content_img, content_layers) # Create style model style_layers = ['block1_relu1', 'block2_relu1', 'block3_relu1', 'block4_relu1', 'block5_relu1'] style_model = create_model(style_img, style_layers) # Instantiate blend model # Note that the blend model input is same shape/size as content image blend_base_model = vgg.VGG19(input_tensor=kl.Input(shape=content_img.shape[1:])) # blend_outputs = content_outputs + style_outputs blend_outputs = [blend_base_model.get_layer(n).output for n in content_layers] + [blend_base_model.get_layer(n).output for n in style_layers] blend_model = km.Model(inputs=blend_base_model.inputs, outputs=blend_outputs) # Separate the model outputs into those intended for comparison with the content layer and the style layer blend_content_outputs = [blend_model.outputs[0]] blend_style_outputs = blend_model.outputs[1:] content_loss = content_layer_loss(content_model.output, blend_content_outputs[0]) content_loss_evaluator = K.function([blend_model.input], [content_loss]) # For a correctly implemented gram_matrix, the following code will produce 113934860.0
fake_patchs = self.G_net(lr) logits_fake = self.D_net(fake_patchs) feature_fake = self.vgg((fake_patchs + 1) / 2.) feature_real = self.vgg((hr + 1) / 2.) g_gan_loss = 1e-3 * self.loss_fn1(logits_fake, tlx.ones_like(logits_fake)) g_gan_loss = tlx.ops.reduce_mean(g_gan_loss) mse_loss = self.loss_fn2(fake_patchs, hr) vgg_loss = 2e-6 * self.loss_fn2(feature_fake, feature_real) g_loss = mse_loss + vgg_loss + g_gan_loss return g_loss G = SRGAN_g() D = SRGAN_d() VGG = vgg.VGG19(pretrained=True, end_with='pool4', mode='dynamic') # automatic init layers weights shape with input tensor. # Calculating and filling 'in_channels' of each layer is a very troublesome thing. # So, just use 'init_build' with input shape. 'in_channels' of each layer will be automaticlly set. G.init_build(tlx.nn.Input(shape=(8, 3, 96, 96))) D.init_build(tlx.nn.Input(shape=(8, 3, 384, 384))) def train(): G.set_train() D.set_train() VGG.set_eval() train_ds = TrainData() train_ds_img_nums = len(train_ds) train_ds = DataLoader(train_ds, batch_size=batch_size,
def train(): # Seeds torch.manual_seed(SEED) torch.cuda.manual_seed(SEED) np.random.seed(SEED) random.seed(SEED) # Device device = ("cuda" if torch.cuda.is_available() else "cpu") # Dataset and Dataloader transform = transforms.Compose([ transforms.Resize(TRAIN_IMAGE_SIZE), transforms.CenterCrop(TRAIN_IMAGE_SIZE), # transforms.Grayscale(num_output_channels=3), transforms.ToTensor(), transforms.Lambda(lambda x: x.mul(255)) ]) train_dataset = datasets.ImageFolder(DATASET_PATH, transform=transform) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True) # Load networks TransformerNetwork = transformer.TransformerNetwork().to(device) if USE_LATEST_CHECKPOINT is True: files = glob.glob( "/home/clng/github/fast-neural-style-pytorch/models/checkpoint*") if len(files) == 0: print("use latest checkpoint but no checkpoint found") else: files.sort(key=os.path.getmtime, reverse=True) latest_checkpoint_path = files[0] print("using latest checkpoint %s" % (latest_checkpoint_path)) params = torch.load(latest_checkpoint_path, map_location=device) TransformerNetwork.load_state_dict(params) VGG = vgg.VGG19().to(device) # Get Style Features imagenet_neg_mean = torch.tensor([-103.939, -116.779, -123.68], dtype=torch.float32).reshape(1, 3, 1, 1).to(device) style_image = utils.load_image(STYLE_IMAGE_PATH) if ADJUST_BRIGHTNESS == "1": style_image = cv2.cvtColor(style_image, cv2.COLOR_BGR2GRAY) style_image = utils.hist_norm(style_image, [0, 64, 96, 128, 160, 192, 255], [0, 0.05, 0.15, 0.5, 0.85, 0.95, 1], inplace=True) elif ADJUST_BRIGHTNESS == "2": style_image = cv2.cvtColor(style_image, cv2.COLOR_BGR2GRAY) style_image = cv2.equalizeHist(style_image) elif ADJUST_BRIGHTNESS == "3": a = 1 # hsv = cv2.cvtColor(style_image, cv2.COLOR_BGR2HSV) # hsv = utils.auto_brightness(hsv) # style_image = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR) style_image = ensure_three_channels(style_image) sname = os.path.splitext(os.path.basename(STYLE_IMAGE_PATH))[0] + "_train" cv2.imwrite( "/home/clng/datasets/bytenow/neural_styles/{s}.jpg".format(s=sname), style_image) style_tensor = utils.itot(style_image, max_size=TRAIN_STYLE_SIZE).to(device) style_tensor = style_tensor.add(imagenet_neg_mean) B, C, H, W = style_tensor.shape style_features = VGG(style_tensor.expand([BATCH_SIZE, C, H, W])) style_gram = {} for key, value in style_features.items(): style_gram[key] = utils.gram(value) # Optimizer settings optimizer = optim.Adam(TransformerNetwork.parameters(), lr=ADAM_LR) # Loss trackers content_loss_history = [] style_loss_history = [] total_loss_history = [] batch_content_loss_sum = 0 batch_style_loss_sum = 0 batch_total_loss_sum = 0 # Optimization/Training Loop batch_count = 1 start_time = time.time() for epoch in range(NUM_EPOCHS): print("========Epoch {}/{}========".format(epoch + 1, NUM_EPOCHS)) for content_batch, _ in train_loader: # Get current batch size in case of odd batch sizes curr_batch_size = content_batch.shape[0] # Free-up unneeded cuda memory # torch.cuda.empty_cache() # Zero-out Gradients optimizer.zero_grad() # Generate images and get features content_batch = content_batch[:, [2, 1, 0]].to(device) generated_batch = TransformerNetwork(content_batch) content_features = VGG(content_batch.add(imagenet_neg_mean)) generated_features = VGG(generated_batch.add(imagenet_neg_mean)) # Content Loss MSELoss = nn.MSELoss().to(device) content_loss = CONTENT_WEIGHT * \ MSELoss(generated_features['relu3_4'], content_features['relu3_4']) batch_content_loss_sum += content_loss # Style Loss style_loss = 0 for key, value in generated_features.items(): s_loss = MSELoss(utils.gram(value), style_gram[key][:curr_batch_size]) style_loss += s_loss style_loss *= STYLE_WEIGHT batch_style_loss_sum += style_loss.item() # Total Loss total_loss = content_loss + style_loss batch_total_loss_sum += total_loss.item() # Backprop and Weight Update total_loss.backward() optimizer.step() # Save Model and Print Losses if (((batch_count - 1) % SAVE_MODEL_EVERY == 0) or (batch_count == NUM_EPOCHS * len(train_loader))): # Print Losses print("========Iteration {}/{}========".format( batch_count, NUM_EPOCHS * len(train_loader))) print("\tContent Loss:\t{:.2f}".format(batch_content_loss_sum / batch_count)) print("\tStyle Loss:\t{:.2f}".format(batch_style_loss_sum / batch_count)) print("\tTotal Loss:\t{:.2f}".format(batch_total_loss_sum / batch_count)) print("Time elapsed:\t{} seconds".format(time.time() - start_time)) # Save Model checkpoint_path = SAVE_MODEL_PATH + "checkpoint_" + str( batch_count - 1) + ".pth" torch.save(TransformerNetwork.state_dict(), checkpoint_path) print("Saved TransformerNetwork checkpoint file at {}".format( checkpoint_path)) # Save sample generated image sample_tensor = generated_batch[0].clone().detach().unsqueeze( dim=0) sample_image = utils.ttoi(sample_tensor.clone().detach()) sample_image_path = SAVE_IMAGE_PATH + "sample0_" + str( batch_count - 1) + ".png" utils.saveimg(sample_image, sample_image_path) print("Saved sample tranformed image at {}".format( sample_image_path)) # Save loss histories content_loss_history.append(batch_total_loss_sum / batch_count) style_loss_history.append(batch_style_loss_sum / batch_count) total_loss_history.append(batch_total_loss_sum / batch_count) # Iterate Batch Counter batch_count += 1 stop_time = time.time() # Print loss histories print("Done Training the Transformer Network!") print("Training Time: {} seconds".format(stop_time - start_time)) print("========Content Loss========") print(content_loss_history) print("========Style Loss========") print(style_loss_history) print("========Total Loss========") print(total_loss_history) # Save TransformerNetwork weights TransformerNetwork.eval() TransformerNetwork.cpu() final_path = SAVE_MODEL_PATH + STYLE_NAME + ".pth" print("Saving TransformerNetwork weights at {}".format(final_path)) torch.save(TransformerNetwork.state_dict(), final_path) print("Done saving final model") # Plot Loss Histories if (PLOT_LOSS): utils.plot_loss_hist(content_loss_history, style_loss_history, total_loss_history)