def main():
    # paths
    data_dir = "../data/"

    # load images
    imgs = []
    paths = [data_dir + "./lenna.jpg", data_dir + "./cat.jpg"]
    for i in range(len(paths)):
        img = img2array(paths[i], desired_size=[512, 512], expand=True)
        imgs.append(torch.from_numpy(img))
    imgs = torch.cat(imgs).permute(0, 3, 1, 2)

    # loc = torch.Tensor(2, 2).uniform_(-1, 1)
    loc = torch.from_numpy(np.array([[0.0, 0.0], [0.0, 0.0]]))

    num_patches = 5
    scale = 2
    patch_size = 10

    ret = Retina(g=patch_size, k=num_patches, s=scale)
    glimpse = ret.foveate(imgs, loc).data.numpy()

    glimpse = np.reshape(glimpse, [2, num_patches, 3, patch_size, patch_size])
    glimpse = np.transpose(glimpse, [0, 1, 3, 4, 2])

    merged = []
    for i in range(len(glimpse)):
        g = glimpse[i]
        g = list(g)
        g = [array2img(l) for l in g]
        res = reduce(merge_images, list(g))
        merged.append(res)

    merged = [np.asarray(l, dtype="float32") / 255.0 for l in merged]

    fig, axs = plt.subplots(nrows=2, ncols=1)
    for i, ax in enumerate(axs.flat):
        axs[i].imshow(merged[i])
        axs[i].get_xaxis().set_visible(False)
        axs[i].get_yaxis().set_visible(False)
    plt.show()
Beispiel #2
0
def main():

    # load images
    imgs = []
    paths = [data_dir + './lenna.jpg', data_dir + './cat.jpg']
    for i in range(len(paths)):
        img = img2array(paths[i], desired_size=[512, 512], expand=True)
        imgs.append(torch.from_numpy(img))
    imgs = Variable(torch.cat(imgs))
    imgs = imgs.permute(0, 3, 1, 2)

    # loc = torch.Tensor(2, 2).uniform_(-1, 1)
    loc = torch.from_numpy(np.array([[0., 0.], [0., 0.]]))
    loc = Variable(loc)

    ret = retina(g=64, k=3, s=2)
    glimpse = ret.foveate(imgs, loc).data.numpy()

    glimpse = np.reshape(glimpse, [2, 3, 3, 64, 64])
    glimpse = np.transpose(glimpse, [0, 1, 3, 4, 2])

    merged = []
    for i in range(len(glimpse)):
        g = glimpse[i]
        g = list(g)
        g = [array2img(l) for l in g]
        res = reduce(merge_images, list(g))
        merged.append(res)

    merged = [np.asarray(l, dtype='float32') / 255.0 for l in merged]

    fig, axs = plt.subplots(nrows=2, ncols=1)
    for i, ax in enumerate(axs.flat):
        axs[i].imshow(merged[i])
        axs[i].get_xaxis().set_visible(False)
        axs[i].get_yaxis().set_visible(False)
    plt.show()
Beispiel #3
0
    train_data = utils.readImg2array(train_path,
                                     size,
                                     threshold,
                                     pflag=pflag,
                                     nflag=nflag)

    # create a list of tese data, e.g. yosukekatada 's sunglasses picture.
    test_path = os.path.join(cwd_path, 'data/sunglasses.jpg')
    test_data = utils.readImg2array(test_path,
                                    size,
                                    threshold,
                                    pflag=pflag,
                                    nflag=nflag)

    # build model
    model = dhnn.DHNN(pflag=pflag, nflag=nflag)

    print('[START] training.')
    model.train([train_data.flatten()])
    print('[END] training.')

    print('[START] predicting.')
    recovery = model.predict([test_data.flatten()], epochs=50000)
    print('[END] predicting.')

    print('[START] predicting.')
    recovery = recovery[0].reshape(size)
    outfile = os.path.join(cwd_path, 'data', 'recovery.jpg')
    utils.array2img(recovery, outFile=outfile, pflag=pflag, nflag=nflag)
    print('[END] saving.')
Beispiel #4
0
input_img = np.concatenate([img1, img2, img3, img4], axis=0)
B, H, W, C = input_img.shape
print("Input Img Shape: {}".format(input_img.shape))

# identity transform
theta = np.array([[1., 0, 0], [0, 1., 0]])

x = tf.placeholder(tf.float32, [None, H, W, C])

with tf.variable_scope('spatial_transformer'):
    theta = theta.astype('float32')
    theta = theta.flatten()

    # define loc net weight and bias
    loc_in = H * W * C
    loc_out = 6
    W_loc = tf.Variable(tf.zeros([loc_in, loc_out]), name='W_loc')
    b_loc = tf.Variable(initial_value=theta, name='b_loc')

    # tie everything together
    fc_loc = tf.matmul(tf.zeros([B, loc_in]), W_loc) + b_loc
    h_trans = transformer(x, fc_loc)

# run session
sess = tf.Session()
sess.run(tf.global_variables_initializer())
y = sess.run(h_trans, feed_dict={x: input_img})
print("y: {}".format(y.shape))
array2img(y[0]).show()
            newData = subImgs[:, idx]
            if data == None:
                data = newData
            else:
                data = numpy.hstack((data, newData))
            if i % actualisation == 0:
                U, D, mu, n = skl(data=data,
                                  U0=U,
                                  D0=D,
                                  mu0=mu,
                                  n0=n,
                                  ff=ff,
                                  K=K)
                data = None

            cv.ShowImage("Warped Stream", array2img(subImgs[:, idx], outSize))
            cv.ShowImage("Mean Stream", array2img(mu, outSize))
            cv.ShowImage("1st Eigenface Stream", array2img(U[:, 0], outSize))
            i = i + 1

        # Draw the extraction polygon
        cv.PolyLine(inImgGray, (polygon.corners(), ), True,
                    cv.RGB(255, 255, 255))
        cv.ShowImage("Webcam Stream", inImgGray)

        keyPressed = cv.WaitKey(10)
        if keyPressed == 10:
            extract = not (extract)
        elif keyPressed != -1:
            break
Beispiel #6
0
    b_loc = tf.Variable(initial_value=theta, name='b_loc')

    # tie everything together
    fc_loc = tf.matmul(tf.zeros([B, loc_in]), W_loc) + b_loc
    h_trans = transformer(x, fc_loc)

    # inverse
    inv_theta = inv_theta.astype('float32')
    inv_theta = inv_theta.flatten()

    # define loc net weight and bias
    loc_in = H * W * C
    loc_out = 6
    W_loc = tf.Variable(tf.zeros([loc_in, loc_out]), name='W_loc')
    b_loc = tf.Variable(initial_value=inv_theta, name='b_loc')

    # tie everything together
    fc_loc = tf.matmul(tf.zeros([B, loc_in]), W_loc) + b_loc
    inv_h_trans = transformer(x, fc_loc)

# run session
sess = tf.Session()
sess.run(tf.global_variables_initializer())
y = sess.run(h_trans, feed_dict={x: input_img})
print("y: {}".format(y.shape))
array2img(y[0]).show()

out_img = sess.run(inv_h_trans, feed_dict={x: y})
print("inv: {}".format(out_img.shape))
array2img(out_img[0]).show()
Beispiel #7
0
                    subImgs = numpy.mat(cv2array(subImgTmp)).ravel().T
                else:
                    subImgs = numpy.hstack((subImgs, numpy.mat(cv2array(subImgTmp)).ravel().T))
            idx = maxLikelihood(subImgs, U, D, mu)
            polygon = polygonTmp[idx]

            # SKL!!!
            newData = subImgs[:, idx]
            if data == None:
                data = newData
            else:
                data = numpy.hstack((data, newData))
            if i % actualisation == 0:
                U, D, mu, n = skl(data=data, U0=U, D0=D, mu0=mu, n0=n, ff=ff, K=K)
                data = None

            cv.ShowImage("Warped Stream", array2img(subImgs[:, idx], outSize))
            cv.ShowImage("Mean Stream", array2img(mu, outSize))
            cv.ShowImage("1st Eigenface Stream", array2img(U[:, 0], outSize))
            i = i + 1

        # Draw the extraction polygon
        cv.PolyLine(inImgGray, (polygon.corners(),), True, cv.RGB(255, 255, 255))
        cv.ShowImage("Webcam Stream", inImgGray)

        keyPressed = cv.WaitKey(10)
        if keyPressed == 10:
            extract = not(extract)
        elif keyPressed != -1:
            break