def _build_graph(self):
        # build model
        self.net = vgg.VGG19(model_weights=self.model_weights,
                             pooling_type=self.pooling_type,
                             verbose=self.verbose)

        self._initialize_images()
        self.net = self.net.build_model(self.content_img)

        style_loss = self.sum_style_loss()
        content_loss = self.sum_content_loss()
        # total variation denoising
        tv_loss = tf.image.total_variation(self.net['input'])

        alpha = self.content_weight
        beta = self.style_weight
        theta = self.tv_weight

        # linear combination between the loss components
        self.total_loss = alpha * content_loss + beta * style_loss + theta * tv_loss
Ejemplo n.º 2
0
height     = 224 >> 2
channel    = 3
n_outputs  = 10
model_name = "models/vgg19/digists"
data_path  = "../data_img/MNIST/train/"

# Step 0: Global Parameters
epochs     = 2
lr_rate    = 0.0001
batch_size = 32

# Step 1: Create Model
# model = vgg.VGG11((height, width, channel), classes = n_outputs, filters = 8)
# model = vgg.VGG13((height, width, channel), classes = n_outputs, filters = 8)
# model = vgg.VGG16((height, width, channel), classes = n_outputs, filters = 8)
model = vgg.VGG19((height, width, channel), classes = n_outputs, filters = 8)

# Step 2: Define Metrics
model.compile(optimizer= tf.keras.optimizers.Adam(learning_rate = lr_rate),
              loss     = tf.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics  = ['accuracy'])
print(model.summary())

if sys.argv[1] == "train":
    # Step 3: Load data
    X_train, Y_train, X_test, Y_test = loader.load_data(data_path,width,height,True,0.8,False)
    # Step 4: Training
    # Create a function that saves the model's weights
    cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath = model_name,
                                                     save_weights_only=True,
                                                     verbose=0, save_freq="epoch")
Ejemplo n.º 3
0
The original caffemodel:
	www.robots.ox.ac.uk/~vgg/research/very_deep/
has been translated in to numpy's ndarray:
	https://mega.nz/#!xZ8glS6J!MAnE91ND_WyfZ_8mvkuSa2YcA7q-1ehfSm-Q1fxOvvs
This implementation is adapted from :
	https://github.com/machrisaa/tensorflow-vgg.git
'''
import sys
sys.path.append('./utils')
import numpy as np
import tensorflow as tf
import img
import vgg

# load images and vgg19 coefficients
bgr = np.array([
    img.convert_img(img.resize_img(img.load_img('./data/img/tiger.jpg'))),
    img.convert_img(img.resize_img(img.load_img('./data/img/file.jpg'))),
])
vgg19 = vgg.VGG19('./data/vgg19.npy')

# build vgg19
_, height, width, _ = bgr.shape
x_bgr = vgg19.input_bgr(height, width)
vgg19.build_upto(x_bgr, 'prob')

# object classification
with tf.Session() as sess:
    prob = vgg19.layers['prob'].eval(feed_dict={x_bgr: bgr})
vgg19.predict(prob)
def load_and_train(options):
    # unpack parameters
    sty_imgs = options.sty_imgs
    sty_weights = np.array(options.sty_weights)
    cont_img = options.cont_img
    output_file = options.output_file
    output_scale = options.output_scale
    learn_rate = options.learn_rate
    alpha = np.float32(options.alpha)
    beta = np.float32(options.beta)
    num_epoch = options.num_epoch
    vgg19_loc = options.vgg19_loc
    # load images and vgg19 coefficients
    sty_features = load_sty_features()
    cont_feature = load_cont_feature()
    cont = load_cont_img(cont_img, output_scale)
    vgg_obj = vgg.VGG19(vgg19_loc)
    cont_ten = comp_cont_ten(cont, cont_feature, vgg_obj)
    gram, gram_coef = comp_sty_gram(load_sty_imgs(sty_imgs), sty_weights,
                                    sty_features, vgg_obj)
    # model
    cont_remix = tf.Variable(cont)
    vgg_obj.build_upto(cont_remix, 'pool5', False)
    # style loss function
    gamma = np.float32(1.0 / len(sty_features))
    gram_style = {}
    for style in sty_features:
        this_shape = vgg_obj.layers[style].get_shape().as_list()
        this_Ml = this_shape[1] * this_shape[2]
        reshaped = tf.reshape(vgg_obj.layers[style], (-1, this_shape[3]))
        gram_style[style] = tf.matmul(tf.transpose(reshaped),
                                      reshaped) / (this_Ml**2)
    loss_style = tf.constant(np.float32(0.0))
    for style in sty_features:
        loss_style += tf.reduce_sum(
            tf.square(gram_style[style] - gram[style])) * gram_coef[style]
    # content loss function
    loss_content = tf.reduce_mean(
        tf.square(vgg_obj.layers[cont_feature] - cont_ten))
    # punish local pixel noise
    loss_noise = tf.reduce_mean(
        tf.abs(
            tf.nn.max_pool(cont_remix,
                           ksize=[1, 3, 3, 1],
                           strides=[1, 1, 1, 1],
                           padding='VALID') -
            tf.nn.max_pool(-cont_remix,
                           ksize=[1, 3, 3, 1],
                           strides=[1, 1, 1, 1],
                           padding='VALID')))
    # train step
    loss = gamma * loss_style + alpha * loss_content + beta * loss_noise
    err = float('inf')
    train_step = tf.train.AdamOptimizer(learn_rate).minimize(loss)
    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        for idx in range(num_epoch):
            sess.run(train_step)
            # list all errors
            this_loss_content = alpha * loss_content.eval()
            this_loss_style = gamma * loss_style.eval()
            this_loss_noise = beta * loss_noise.eval()
            this_err = this_loss_content + this_loss_style + this_loss_noise
            print('epoch', idx, ': content loss', this_loss_content,
                  'style loss', this_loss_style, 'noise loss', this_loss_noise)
            if this_err < err:
                err = this_err
                output = cont_remix.eval()[0, :, :, :]
    # save image
    img.save_img(output_file, img.revert_img(output))
Ejemplo n.º 5
0

# Next, let's instantiate a VGG19 model for the content image:

# In[3]:


import vgg
import keras.backend as K
import keras.layers as kl
import keras.models as km

# Note that we'll be working quite a bit with the TensorFlow objects that underlie Keras
content_model_input = kl.Input(tensor=K.tf.Variable(content_img))

content_base_model = vgg.VGG19(input_tensor=content_model_input)
evaluator = K.function([content_base_model.input],[content_base_model.output])
feature_maps = evaluator([content_img])


# In[4]:



# The function defines above provides the output of the last activation in VGG19.  However, this is not the layer that we need.  Indeed, in the original neural style transfer paper, the authors found that good aesthetic properties were found by matching on the (unactivated) feature maps in the second convolution of the fourth block, called 'block4_conv2' (have a look at the VGG file if you're confused by what this means).  We can generate a new Keras model that does this for us easily:

# In[5]:



# Define the layer outputs that we are interested in
Ejemplo n.º 6
0
                              dtype='float32')
style_reference_data = fluid.layers.data(name='style_reference_image',
                                         shape=(3, img_nrows, img_ncols),
                                         dtype='float32')
combination_data = fluid.layers.data(name='combination_image',
                                     shape=(3, img_nrows, img_ncols),
                                     dtype='float32',
                                     stop_gradient=False)

# combine the 3 images into a single tensor
input_tensor = fluid.layers.concat(
    [base_data, style_reference_data, combination_data])

# build the VGG19 network with our 3 images as input
# the model will be loaded with pre-trained weights
model = vgg.VGG19()
outputs_dict = model.net(input=input_tensor)

# compute the neural style loss
# first we need to define 4 util functions

# the gram matrix of an image tensor (feature-wise outer product)


def gram_matrix(x):
    assert len(x.shape) == 3

    features = fluid.layers.reshape(x,
                                    (-1, x.shape[0], x.shape[1] * x.shape[2]))
    gram = fluid.layers.matmul(features, features, False, True)
    gram = fluid.layers.squeeze(gram, [0])
Ejemplo n.º 7
0
def create_model(input_img, output_layers):

    # Instantiate full VGG model w/ input img
    base_model = vgg.VGG19(input_tensor=kl.Input(tensor=K.tf.Variable(input_img)))
    return km.Model(inputs=base_model.inputs, outputs=[base_model.get_layer(n).output for n in output_layers])
Ejemplo n.º 8
0
content_img = np.expand_dims(pixel_means(content_img), axis=0)
style_img = np.expand_dims(pixel_means(style_img), axis=0)

# Define the layer outputs that we are interested in
content_layers = ['block4_conv2']

# Create content model
content_model = create_model(content_img, content_layers)

# Create style model
style_layers = ['block1_relu1', 'block2_relu1', 'block3_relu1', 'block4_relu1', 'block5_relu1']
style_model = create_model(style_img, style_layers)

# Instantiate blend model
# Note that the blend model input is same shape/size as content image
blend_base_model = vgg.VGG19(input_tensor=kl.Input(shape=content_img.shape[1:]))

# blend_outputs = content_outputs + style_outputs
blend_outputs = [blend_base_model.get_layer(n).output for n in content_layers] + [blend_base_model.get_layer(n).output for n in style_layers]

blend_model = km.Model(inputs=blend_base_model.inputs, outputs=blend_outputs)

# Separate the model outputs into those intended for comparison with the content layer and the style layer
blend_content_outputs = [blend_model.outputs[0]]
blend_style_outputs = blend_model.outputs[1:]

content_loss = content_layer_loss(content_model.output, blend_content_outputs[0])

content_loss_evaluator = K.function([blend_model.input], [content_loss])

# For a correctly implemented gram_matrix, the following code will produce 113934860.0
Ejemplo n.º 9
0
        fake_patchs = self.G_net(lr)
        logits_fake = self.D_net(fake_patchs)
        feature_fake = self.vgg((fake_patchs + 1) / 2.)
        feature_real = self.vgg((hr + 1) / 2.)
        g_gan_loss = 1e-3 * self.loss_fn1(logits_fake,
                                          tlx.ones_like(logits_fake))
        g_gan_loss = tlx.ops.reduce_mean(g_gan_loss)
        mse_loss = self.loss_fn2(fake_patchs, hr)
        vgg_loss = 2e-6 * self.loss_fn2(feature_fake, feature_real)
        g_loss = mse_loss + vgg_loss + g_gan_loss
        return g_loss


G = SRGAN_g()
D = SRGAN_d()
VGG = vgg.VGG19(pretrained=True, end_with='pool4', mode='dynamic')
# automatic init layers weights shape with input tensor.
# Calculating and filling 'in_channels' of each layer is a very troublesome thing.
# So, just use 'init_build' with input shape. 'in_channels' of each layer will be automaticlly set.
G.init_build(tlx.nn.Input(shape=(8, 3, 96, 96)))
D.init_build(tlx.nn.Input(shape=(8, 3, 384, 384)))


def train():
    G.set_train()
    D.set_train()
    VGG.set_eval()
    train_ds = TrainData()
    train_ds_img_nums = len(train_ds)
    train_ds = DataLoader(train_ds,
                          batch_size=batch_size,
Ejemplo n.º 10
0
def train():
    # Seeds
    torch.manual_seed(SEED)
    torch.cuda.manual_seed(SEED)
    np.random.seed(SEED)
    random.seed(SEED)

    # Device
    device = ("cuda" if torch.cuda.is_available() else "cpu")

    # Dataset and Dataloader
    transform = transforms.Compose([
        transforms.Resize(TRAIN_IMAGE_SIZE),
        transforms.CenterCrop(TRAIN_IMAGE_SIZE),
        # transforms.Grayscale(num_output_channels=3),
        transforms.ToTensor(),
        transforms.Lambda(lambda x: x.mul(255))
    ])
    train_dataset = datasets.ImageFolder(DATASET_PATH, transform=transform)
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=BATCH_SIZE,
                                               shuffle=True)

    # Load networks
    TransformerNetwork = transformer.TransformerNetwork().to(device)

    if USE_LATEST_CHECKPOINT is True:
        files = glob.glob(
            "/home/clng/github/fast-neural-style-pytorch/models/checkpoint*")
        if len(files) == 0:
            print("use latest checkpoint but no checkpoint found")
        else:
            files.sort(key=os.path.getmtime, reverse=True)
            latest_checkpoint_path = files[0]
            print("using latest checkpoint %s" % (latest_checkpoint_path))
            params = torch.load(latest_checkpoint_path, map_location=device)
            TransformerNetwork.load_state_dict(params)

    VGG = vgg.VGG19().to(device)

    # Get Style Features
    imagenet_neg_mean = torch.tensor([-103.939, -116.779, -123.68],
                                     dtype=torch.float32).reshape(1, 3, 1,
                                                                  1).to(device)
    style_image = utils.load_image(STYLE_IMAGE_PATH)
    if ADJUST_BRIGHTNESS == "1":
        style_image = cv2.cvtColor(style_image, cv2.COLOR_BGR2GRAY)
        style_image = utils.hist_norm(style_image,
                                      [0, 64, 96, 128, 160, 192, 255],
                                      [0, 0.05, 0.15, 0.5, 0.85, 0.95, 1],
                                      inplace=True)
    elif ADJUST_BRIGHTNESS == "2":
        style_image = cv2.cvtColor(style_image, cv2.COLOR_BGR2GRAY)
        style_image = cv2.equalizeHist(style_image)
    elif ADJUST_BRIGHTNESS == "3":
        a = 1
        # hsv = cv2.cvtColor(style_image, cv2.COLOR_BGR2HSV)
        # hsv = utils.auto_brightness(hsv)
        # style_image = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
    style_image = ensure_three_channels(style_image)
    sname = os.path.splitext(os.path.basename(STYLE_IMAGE_PATH))[0] + "_train"
    cv2.imwrite(
        "/home/clng/datasets/bytenow/neural_styles/{s}.jpg".format(s=sname),
        style_image)

    style_tensor = utils.itot(style_image,
                              max_size=TRAIN_STYLE_SIZE).to(device)

    style_tensor = style_tensor.add(imagenet_neg_mean)
    B, C, H, W = style_tensor.shape
    style_features = VGG(style_tensor.expand([BATCH_SIZE, C, H, W]))
    style_gram = {}
    for key, value in style_features.items():
        style_gram[key] = utils.gram(value)

    # Optimizer settings
    optimizer = optim.Adam(TransformerNetwork.parameters(), lr=ADAM_LR)

    # Loss trackers
    content_loss_history = []
    style_loss_history = []
    total_loss_history = []
    batch_content_loss_sum = 0
    batch_style_loss_sum = 0
    batch_total_loss_sum = 0

    # Optimization/Training Loop
    batch_count = 1
    start_time = time.time()
    for epoch in range(NUM_EPOCHS):
        print("========Epoch {}/{}========".format(epoch + 1, NUM_EPOCHS))
        for content_batch, _ in train_loader:
            # Get current batch size in case of odd batch sizes
            curr_batch_size = content_batch.shape[0]

            # Free-up unneeded cuda memory
            # torch.cuda.empty_cache()

            # Zero-out Gradients
            optimizer.zero_grad()

            # Generate images and get features
            content_batch = content_batch[:, [2, 1, 0]].to(device)
            generated_batch = TransformerNetwork(content_batch)
            content_features = VGG(content_batch.add(imagenet_neg_mean))
            generated_features = VGG(generated_batch.add(imagenet_neg_mean))

            # Content Loss
            MSELoss = nn.MSELoss().to(device)
            content_loss = CONTENT_WEIGHT * \
                MSELoss(generated_features['relu3_4'],
                        content_features['relu3_4'])
            batch_content_loss_sum += content_loss

            # Style Loss
            style_loss = 0
            for key, value in generated_features.items():
                s_loss = MSELoss(utils.gram(value),
                                 style_gram[key][:curr_batch_size])
                style_loss += s_loss
            style_loss *= STYLE_WEIGHT
            batch_style_loss_sum += style_loss.item()

            # Total Loss
            total_loss = content_loss + style_loss
            batch_total_loss_sum += total_loss.item()

            # Backprop and Weight Update
            total_loss.backward()
            optimizer.step()

            # Save Model and Print Losses
            if (((batch_count - 1) % SAVE_MODEL_EVERY == 0)
                    or (batch_count == NUM_EPOCHS * len(train_loader))):
                # Print Losses
                print("========Iteration {}/{}========".format(
                    batch_count, NUM_EPOCHS * len(train_loader)))
                print("\tContent Loss:\t{:.2f}".format(batch_content_loss_sum /
                                                       batch_count))
                print("\tStyle Loss:\t{:.2f}".format(batch_style_loss_sum /
                                                     batch_count))
                print("\tTotal Loss:\t{:.2f}".format(batch_total_loss_sum /
                                                     batch_count))
                print("Time elapsed:\t{} seconds".format(time.time() -
                                                         start_time))

                # Save Model
                checkpoint_path = SAVE_MODEL_PATH + "checkpoint_" + str(
                    batch_count - 1) + ".pth"
                torch.save(TransformerNetwork.state_dict(), checkpoint_path)
                print("Saved TransformerNetwork checkpoint file at {}".format(
                    checkpoint_path))

                # Save sample generated image
                sample_tensor = generated_batch[0].clone().detach().unsqueeze(
                    dim=0)
                sample_image = utils.ttoi(sample_tensor.clone().detach())
                sample_image_path = SAVE_IMAGE_PATH + "sample0_" + str(
                    batch_count - 1) + ".png"
                utils.saveimg(sample_image, sample_image_path)
                print("Saved sample tranformed image at {}".format(
                    sample_image_path))

                # Save loss histories
                content_loss_history.append(batch_total_loss_sum / batch_count)
                style_loss_history.append(batch_style_loss_sum / batch_count)
                total_loss_history.append(batch_total_loss_sum / batch_count)

            # Iterate Batch Counter
            batch_count += 1

    stop_time = time.time()
    # Print loss histories
    print("Done Training the Transformer Network!")
    print("Training Time: {} seconds".format(stop_time - start_time))
    print("========Content Loss========")
    print(content_loss_history)
    print("========Style Loss========")
    print(style_loss_history)
    print("========Total Loss========")
    print(total_loss_history)

    # Save TransformerNetwork weights
    TransformerNetwork.eval()
    TransformerNetwork.cpu()
    final_path = SAVE_MODEL_PATH + STYLE_NAME + ".pth"
    print("Saving TransformerNetwork weights at {}".format(final_path))
    torch.save(TransformerNetwork.state_dict(), final_path)
    print("Done saving final model")

    # Plot Loss Histories
    if (PLOT_LOSS):
        utils.plot_loss_hist(content_loss_history, style_loss_history,
                             total_loss_history)