Ejemplo n.º 1
0
def main():

    # load images
    imgs = []
    paths = [data_dir + './lenna.jpg', data_dir + './cat.jpg']
    for i in range(len(paths)):
        img = img2array(paths[i], desired_size=[512, 512], expand=True)
        imgs.append(torch.from_numpy(img))
    imgs = torch.cat(imgs)

    B, H, W, C = imgs.shape

    loc = torch.Tensor([[-1., 1.], [-1., 1.]])
    imgs, loc = Variable(imgs), Variable(loc)
    sensor = glimpse_network(h_g=128, h_l=128, g=64, k=3, s=2, c=3)
    g_t = sensor(imgs, loc)

    rnn = core_network(input_size=256, hidden_size=256)
    h_t = Variable(torch.zeros(g_t.shape[0], 256))
    h_t = rnn(g_t, h_t)

    classifier = action_network(256, 10)
    a_t = classifier(h_t)

    loc_net = location_network(256, 2, 0.11)
    mu, l_t = loc_net(h_t)

    base = baseline_network(256, 1)
    b_t = base(h_t)

    print("g_t: {}".format(g_t.shape))
    print("h_t: {}".format(h_t.shape))
    print("l_t: {}".format(l_t.shape))
    print("a_t: {}".format(a_t.shape))
    print("b_t: {}".format(b_t.shape))
Ejemplo n.º 2
0
def loadImages(path, batch_size, extension=".png"):
    """
    Returns np array of all files with the specified extension.
    :param string: path to folder 
    :param string: file type
    :return numpy array: Samples * height * width * channels
    """
    filenames = glob.glob(path + "*" + extension)
    imgs = []
    shuffle(filenames)
    for filename in filenames[:batch_size]:
        imgs.append(img2array(filename, DIMS, expand=True))
    return np.concatenate(imgs, axis=0)
Ejemplo n.º 3
0
def main():

    # load images
    imgs = []
    paths = [data_dir + './lenna.jpg', data_dir + './cat.jpg']
    for i in range(len(paths)):
        img = img2array(paths[i], desired_size=[512, 512], expand=True)
        imgs.append(torch.from_numpy(img))
    imgs = Variable(torch.cat(imgs))

    loc = torch.Tensor(2, 2).uniform_(-1, 1)
    loc = Variable(loc)

    if TEST_GLIMPSE:

        ret = retina(g=64, k=3, s=2)
        glimpse = ret.foveate(imgs, loc).data.numpy()
        print("Glimpse: {}".format(glimpse.shape))

        rows, cols = glimpse.shape[0], glimpse.shape[1]
        fig, axs = plt.subplots(nrows=rows, ncols=cols, figsize=(5, 2))
        for i in range(rows):
            for j in range(cols):
                axs[i, j].imshow(glimpse[i, j, :])
                axs[i, j].get_xaxis().set_visible(False)
                axs[i, j].get_yaxis().set_visible(False)
        if SAVE:
            plt.savefig(plot_dir + 'glimpses.png', format='png', dpi=300,
                        bbox_inches='tight')

    if TEST_BOUNDING:

        fig, ax = plt.subplots(nrows=1, ncols=2)
        coords = denormalize(imgs.shape[1], loc.data)
        imgs = imgs.data.numpy()
        for i in range(len(imgs)):
            ax[i].imshow(imgs[i])
            size = 64
            for j in range(3):
                rect = bounding_box(
                    coords[i, 0], coords[i, 1], size, color='r'
                )
                ax[i].add_patch(rect)
                size = size * 2
            ax[i].get_xaxis().set_visible(False)
            ax[i].get_yaxis().set_visible(False)
        if SAVE:
            plt.savefig(plot_dir + 'bbox.png', format='png', dpi=300,
                        bbox_inches='tight')

    plt.show()
def main():
    # paths
    data_dir = "../data/"

    # load images
    imgs = []
    paths = [data_dir + "./lenna.jpg", data_dir + "./cat.jpg"]
    for i in range(len(paths)):
        img = img2array(paths[i], desired_size=[512, 512], expand=True)
        imgs.append(torch.from_numpy(img))
    imgs = torch.cat(imgs).permute(0, 3, 1, 2)

    # loc = torch.Tensor(2, 2).uniform_(-1, 1)
    loc = torch.from_numpy(np.array([[0.0, 0.0], [0.0, 0.0]]))

    num_patches = 5
    scale = 2
    patch_size = 10

    ret = Retina(g=patch_size, k=num_patches, s=scale)
    glimpse = ret.foveate(imgs, loc).data.numpy()

    glimpse = np.reshape(glimpse, [2, num_patches, 3, patch_size, patch_size])
    glimpse = np.transpose(glimpse, [0, 1, 3, 4, 2])

    merged = []
    for i in range(len(glimpse)):
        g = glimpse[i]
        g = list(g)
        g = [array2img(l) for l in g]
        res = reduce(merge_images, list(g))
        merged.append(res)

    merged = [np.asarray(l, dtype="float32") / 255.0 for l in merged]

    fig, axs = plt.subplots(nrows=2, ncols=1)
    for i, ax in enumerate(axs.flat):
        axs[i].imshow(merged[i])
        axs[i].get_xaxis().set_visible(False)
        axs[i].get_yaxis().set_visible(False)
    plt.show()
Ejemplo n.º 5
0
def main():

    # load images
    imgs = []
    paths = [data_dir + './lenna.jpg', data_dir + './cat.jpg']
    for i in range(len(paths)):
        img = img2array(paths[i], desired_size=[512, 512], expand=True)
        imgs.append(torch.from_numpy(img))
    imgs = Variable(torch.cat(imgs))

    B, H, W, C = imgs.shape

    l_t_prev = torch.Tensor(B, 2).uniform_(-1, 1)
    l_t_prev = Variable(l_t_prev)
    h_t_prev = Variable(torch.zeros(B, 256))

    ram = RecurrentAttention(64, 3, 2, 3, 128, 128, 256, 10, 0.11)
    h_t, l_t = ram(imgs, l_t_prev, h_t_prev)

    print("h_t: {}".format(h_t.shape))
    print("l_t: {}".format(l_t.shape))
Ejemplo n.º 6
0
def main():

    # load images
    imgs = []
    paths = [data_dir + './lenna.jpg', data_dir + './cat.jpg']
    for i in range(len(paths)):
        img = img2array(paths[i], desired_size=[512, 512], expand=True)
        imgs.append(torch.from_numpy(img))
    imgs = Variable(torch.cat(imgs))
    imgs = imgs.permute(0, 3, 1, 2)

    # loc = torch.Tensor(2, 2).uniform_(-1, 1)
    loc = torch.from_numpy(np.array([[0., 0.], [0., 0.]]))
    loc = Variable(loc)

    ret = retina(g=64, k=3, s=2)
    glimpse = ret.foveate(imgs, loc).data.numpy()

    glimpse = np.reshape(glimpse, [2, 3, 3, 64, 64])
    glimpse = np.transpose(glimpse, [0, 1, 3, 4, 2])

    merged = []
    for i in range(len(glimpse)):
        g = glimpse[i]
        g = list(g)
        g = [array2img(l) for l in g]
        res = reduce(merge_images, list(g))
        merged.append(res)

    merged = [np.asarray(l, dtype='float32') / 255.0 for l in merged]

    fig, axs = plt.subplots(nrows=2, ncols=1)
    for i, ax in enumerate(axs.flat):
        axs[i].imshow(merged[i])
        axs[i].get_xaxis().set_visible(False)
        axs[i].get_yaxis().set_visible(False)
    plt.show()
Ejemplo n.º 7
0
import numpy as np
from PIL import Image
import tensorflow as tf

from utils import img2array, array2img
from stn import spatial_transformer_network as transformer

DIMS = (600, 600)
data_dir = './data/'

# load 4 cat images
img1 = img2array(data_dir + 'cat1.jpg', DIMS, expand=True)  # , view=True)
img2 = img2array(data_dir + 'cat2.jpg', DIMS, expand=True)
img3 = img2array(data_dir + 'cat3.jpg', DIMS, expand=True)
img4 = img2array(data_dir + 'cat4.jpg', DIMS, expand=True)

input_img = np.concatenate([img1, img2, img3, img4], axis=0)
B, H, W, C = input_img.shape
print("Input Img Shape: {}".format(input_img.shape))

# identity transform
theta = np.array([[1., 0, 0], [0, 1., 0]])

x = tf.placeholder(tf.float32, [None, H, W, C])

with tf.variable_scope('spatial_transformer'):
    theta = theta.astype('float32')
    theta = theta.flatten()

    # define loc net weight and bias
    loc_in = H * W * C
Ejemplo n.º 8
0
	image = cv2.imread(img_path)
	c0 =  cv2.calcHist([image], [0], None, [256], [0.0,255.0])
	c1 =  cv2.calcHist([image], [1], None, [256], [0.0,255.0])
	c2 =  cv2.calcHist([image], [2], None, [256], [0.0,255.0])
	return np.concatenate((c0,c1,c2)).reshape(768,)


# 把图片转化成数值:
X_img = []
X_colors = []
X_isbns = []
Y = []
for isbn in pos_list:
    try:
        img_path = '../covers/%s.jpg'%isbn
        X_img.append(img2array(img_path,img_h,img_w))
        X_colors.append(getColorHist(img_path))
        Y.append(1)
        X_isbns.append(isbn)
    except Exception as e:
        print('Error for picture: %s.jpg'%isbn,e)
for isbn in neg_list:
    try:
        img_path = '../covers/%s.jpg'%isbn
        X_img.append(img2array(img_path,img_h,img_w))
        X_colors.append(getColorHist(img_path))
        Y.append(0)
        X_isbns.append(isbn)
    except Exception as e:
        print('Error for picture: %s.jpg'%isbn,e)
import sys
sys.path.append("..")

import torch

import model
import utils

if __name__ == "__main__":
    # paths
    plot_dir = "../plots/"
    data_dir = "../data/"

    # load images
    imgs = []
    paths = [data_dir + "./lenna.jpg", data_dir + "./cat.jpg"]
    for i in range(len(paths)):
        img = utils.img2array(paths[i], desired_size=[512, 512], expand=True)
        imgs.append(torch.from_numpy(img))
    imgs = torch.cat(imgs).permute((0, 3, 1, 2))

    B, C, H, W = imgs.shape
    l_t_prev = torch.FloatTensor(B, 2).uniform_(-1, 1)
    h_t_prev = torch.zeros(B, 256)

    ram = model.RecurrentAttention(64, 3, 2, C, 128, 128, 0.11, 256, 10)
    h_t, l_t, _, _ = ram(imgs, l_t_prev, h_t_prev)

    assert h_t.shape == (B, 256)
    assert l_t.shape == (B, 2)
Ejemplo n.º 10
0
def convert(target_image, input_img, originals, batch_size=32, iterations=300):
    """
    Method for training a convnet to learn transformation parameters for
    a target transformation. The parameters are supplied to the STN.
    :params np.array: Target image, in this case we just have one (square) 
    :params np.array: Input masks, shapes to be converted into the target image
    :params list: Filename
    :params np.array: Files to be converted.
    """
    #S, H, W, C = input_img.shape
    H, W, C = (DIMS[0], DIMS[1], 3)
    x = tf.placeholder(tf.float32, [None, H, W, C])
    target_batch = [target_image[0] for i in range(batch_size)]
    food_x = tf.placeholder(tf.float32, [None, H, W, C])
    target = tf.placeholder(tf.float32, [None, H, W, C])
    with tf.variable_scope('spatial_transformer'):
        theta = np.array([1.1, .0, .0, .0, 1.1, .0]).astype('float32')

        # Conv Layers
        h0 = lrelu(conv2d(target, 64, name='t_h0_conv'))

        # h0 is (128 x 128 x self.df_dim)
        #h1 = lrelu(instance_norm(conv2d(h0, 64*2, name='t_h1_conv'), 't_bn1'))
        h1 = lrelu(instance_norm(conv2d(h0, 64, name='t_h1_conv'), 't_bn1'))

        # h1 is (64 x 64 x self.df_dim*2)
        #h2 = lrelu(instance_norm(conv2d(h1, 64*4, name='t_h2_conv'), 't_bn2'))
        h2 = lrelu(instance_norm(conv2d(h1, 64, name='t_h2_conv'), 't_bn2'))

        # Fully connected layer:
        shape = h2.get_shape().as_list()
        h2_flat = tf.reshape(h2, [-1, reduce(lambda x, y: x * y, shape[1:])])
        l1 = linear(h2_flat, 512, scope="l1")
        W_loc = tf.Variable(tf.zeros([l1.get_shape()[-1], 6]), name='W_fc1')
        b_loc = tf.Variable(initial_value=theta, name='b_loc')

        # tie everything together
        fc_loc = tf.matmul(l1, W_loc) + b_loc
        h_trans = transformer(x, fc_loc, [H, W])
        f_trans = transformer(food_x, fc_loc, [H, W])
        loss = tf.losses.huber_loss(target, h_trans)
        optim = tf.train.AdamOptimizer(1e-07).minimize(loss)

    tfconfig = tf.ConfigProto(allow_soft_placement=True)
    tfconfig.gpu_options.allow_growth = True
    with tf.Session(config=tfconfig) as sess:
        sess.run(tf.global_variables_initializer())
        losses = []
        d = {'losses': []}
        #for i in range (0, S-batch_size, batch_size):
        for i in range(iterations):
            _, fc_o, l, w, b, y = sess.run(
                [optim, fc_loc, loss, W_loc, b_loc, h_trans],
                feed_dict={
                    x: loadImages("./mask/", batch_size),
                    target: target_batch
                })
            if i % 10 == 0:
                print(l)
            if l < 0.005:
                break

        # Transform and save the cropped images:
        filenames = glob.glob(originals + "*.png")
        for filename in filenames:
            [y] = sess.run(
                [f_trans],
                feed_dict={
                    food_x: img2array(filename, DIMS, expand=True),
                    target: [target_batch[0]]
                })
            imageio.imwrite("Results/" + filename.replace("./masked/", ""),
                            (y[0] * 255).astype(np.uint8))
        sess.close()
    tf.reset_default_graph()
Ejemplo n.º 11
0
            _, fc_o, l, w, b, y = sess.run(
                [optim, fc_loc, loss, W_loc, b_loc, h_trans],
                feed_dict={
                    x: loadImages("./mask/", batch_size),
                    target: target_batch
                })
            if i % 10 == 0:
                print(l)
            if l < 0.005:
                break

        # Transform and save the cropped images:
        filenames = glob.glob(originals + "*.png")
        for filename in filenames:
            [y] = sess.run(
                [f_trans],
                feed_dict={
                    food_x: img2array(filename, DIMS, expand=True),
                    target: [target_batch[0]]
                })
            imageio.imwrite("Results/" + filename.replace("./masked/", ""),
                            (y[0] * 255).astype(np.uint8))
        sess.close()
    tf.reset_default_graph()


print("Loading images")
target = img2array("./target.jpg", DIMS, expand=True)
print("Building net")
convert(target, "./mask/", "./masked/")
Ejemplo n.º 12
0
        #for i in range (0, S-batch_size, batch_size):
        for i in range(iterations):
            _, fc_o, l, w, b, y = sess.run([optim, fc_loc, loss, W_loc, b_loc, h_trans], feed_dict={x:loadImages("./square/", batch_size), target:target_batch})
            if i%10 == 0:
                print(l)
            if l < 0.05:
                break

        # Transform and save the cropped images:
        filenames = glob.glob(originals+"**/*.jpg")
        print(filenames)
        random.shuffle(filenames)
        print(filenames)
        for filename in filenames[:3]: 
            [y] = sess.run([f_trans], feed_dict={food_x:img2array(filename, DIMS, expand=True), target:[target_batch[0]]})
            imageio.imwrite("Results/"+filename.replace(".jpg", str(time.time()).replace(".", "")+'.jpg'), (y[0]*255).astype(np.uint8))
        sess.close()
    tf.reset_default_graph()

print("Loading images")       
#target = img2array("./target.jpg", DIMS, expand=True)
filenames = glob.glob("./mask/*.png")
for filename in filenames: 
    target = img2array(filename, DIMS, expand=True)
    print("Building net")       
    #folders = ['alc/', 'food/', 'gambling/', 'other/']
    #folders = ['food/']
    #for folder in folders:
    convert(target, "./ads/")