Ejemplo n.º 1
0
def problem1():
    # Read stereo images and ground truth disparities
    i0 = rgb2gray(plt.imread('i0.png')).squeeze().astype(np.float32)
    i1 = rgb2gray(plt.imread('i1.png')).squeeze().astype(np.float32)
    gt = (255 * plt.imread('gt.png')).astype(np.int32)

    # Set Potts penalty
    lmbda = 3.0
    s = 10.0 / 255.0

    # Create 4 connected edge neighborhood
    edges = edges4connected(i0.shape[0], i0.shape[1])

    # Candidate search range
    candidate_disparities = np.arange(0, gt.max() + 1)

    # Graph cuts with zero initialization
    zero_init = np.zeros(gt.shape).astype(np.int32)
    estimate_zero_init = alpha_expansion(i0, i1, edges, zero_init,
                                         candidate_disparities, s, lmbda)
    show_stereo(estimate_zero_init, gt)
    perc_correct = evaluate_stereo(estimate_zero_init, gt)
    print("Correct labels (zero init): %3.2f%%" % (perc_correct * 100))

    # Graph cuts with random initialization
    random_init = np.random.randint(low=0, high=gt.max() + 1, size=i0.shape)
    estimate_random_init = alpha_expansion(i0, i1, edges, random_init,
                                           candidate_disparities, s, lmbda)
    show_stereo(estimate_random_init, gt)
    perc_correct = evaluate_stereo(estimate_random_init, gt)
    print("Correct labels (random init): %3.2f%%" % (perc_correct * 100))
Ejemplo n.º 2
0
    def forward(self, im1, im2):

        im1g = rgb2gray(im1)
        im2g = rgb2gray(im2)

        im1gx = self.xconv(im1g)
        im1gy = self.yconv(im1g)

        im2gx = self.xconv(im2g)
        im2gy = self.yconv(im2g)

        (batch, channel, height, width) = im1.size()

        im1xd = F.softmax(im1gx.view(-1, height * width), dim=1)
        im2xd = F.softmax(im2gx.view(-1, height * width), dim=1)
        im1xd = torch.log(im1xd)

        im1yd = F.softmax(im1gy.view(-1, height * width), dim=1)
        im2yd = F.softmax(im2gy.view(-1, height * width), dim=1)
        im1yd = torch.log(im1yd)

        self.loss = self.criterion(im1xd + 0.001,
                                   im2xd + 0.001) + self.criterion(
                                       im1yd + 0.001, im2yd + 0.001)
        #print(self.loss)
        return self.loss
Ejemplo n.º 3
0
def computeLBPs(images, radius, n_points, compression=True):

    hs = {}
    for j in images.keys():  #parcourt les objets

        moon = copy.copy(images[j])
        lbp = skimage.feature.local_binary_pattern(rgb2gray(moon[0]), n_points,
                                                   radius,
                                                   'uniform')  #premiere image

        h1 = np.histogram(lbp.reshape(lbp.size), 255)  #creation histogramme
        h1 = np.take(h1[0], np.where(h1[0] > 0))[0]

        h = np.zeros((len(moon), n_points + 2), dtype=np.int64)
        h[0] = h1
        for i in range(len(moon)):  #parcourt de toutes les images

            lbp = skimage.feature.local_binary_pattern(rgb2gray(moon[i]),
                                                       n_points, radius,
                                                       'uniform')

            h1 = np.histogram(lbp.reshape(lbp.size), 255)
            h1 = np.take(h1[0], np.where(h1[0] > 0))[0]

            h[i] = h1
        if compression:
            h = (h[2:] + h[1:-1] + h[:-2]) / 3
            h = h[:-1:3]
            #h=sum(h,0)/len(h)
        hs[j] = h
    return hs
Ejemplo n.º 4
0
    def import_dataset(self):
        train = load('train_32x32.mat')
        test = load('test_32x32.mat')
        train_data = train['X']
        train_labels = train['y']
        test_data = test['X']
        test_labels = test['y']

        train_data = np.transpose(train_data, [3,0,1,2])
        train_data = utils.rgb2gray(train_data)
        train_data = utils.normalize(train_data,-1,1)
        train_shape = (train_data.shape[0], train_data.shape[1]*train_data.shape[2])
        train_data = np.reshape(train_data, train_shape)
        train_labels = utils.one_hot_coding(train_labels)

        test_data = np.transpose(test_data,[3,0,1,2])
        test_data = utils.rgb2gray(test_data)
        test_data = utils.normalize(test_data,-1,1)
        test_shape = (test_data.shape[0], test_data.shape[1]*test_data.shape[2])
        test_data = np.reshape(test_data, test_shape)
        test_labels = utils.one_hot_coding(test_labels)
        self.im_size = train_data.shape[1]

        #Create datasets from the above tensors
        self.train_dataset = tf.data.Dataset.from_tensor_slices((train_data, train_labels))
        self.test_dataset = tf.data.Dataset.from_tensor_slices((test_data, test_labels))
Ejemplo n.º 5
0
def preprocessing(X_train, y_train, X_valid, y_valid, history_length=1):

    # TODO: preprocess your data here.
    # 1. convert the images in X_train/X_valid to gray scale. If you use rgb2gray() from utils.py, the output shape (96, 96, 1)
    # 2. you can train your model with discrete actions (as you get them from read_data) by discretizing the action space
    #    using action_to_id() from utils.py.
    X_train_gray = np.array(
        [utils.rgb2gray(img).reshape(96, 96, 1) for img in X_train])
    X_valid_gray = np.array(
        [utils.rgb2gray(img).reshape(96, 96, 1) for img in X_valid])
    y_train = np.array([utils.action_to_id(a) for a in y_train])
    y_valid = np.array([utils.action_to_id(a) for a in y_valid])
    if history_length > 1:
        X_history = []
        y_history = []
        X_valid_history = []
        y_valid_history = []
        for idx in range(0, X_train_gray.shape[0], history_length):
            X_history.append(X_train_gray[idx:idx + history_length].reshape(
                96, 96, history_length))
            y_history.append(y_train[idx + history_length - 1])
        for idx in range(0, X_valid_gray.shape[0], history_length):
            X_valid_history.append(X_valid_gray[idx:idx +
                                                history_length].reshape(
                                                    96, 96, history_length))
            y_valid_history.append(y_valid[idx + history_length - 1])
        return np.array(X_history), np.array(y_history), np.array(
            X_valid_history), np.array(y_valid_history)
        # num_of_train_datapoints = int(X_train_gray.shape[0]//history_length)
        # X_history = np.zeros([num_of_train_datapoints,96,96,history_length])
        # y_history = np.zeros(num_of_train_datapoints)
        # for data_point in range(0,X_train_gray.shape[0],history_length):
        #     #print(temp.shape)
        #     X_history[(data_point//history_length)] = X_train_gray[data_point:data_point+history_length].reshape(96,96,history_length)
        #     y_history[(data_point//history_length)] = y_train[(data_point + history_length - 1)]
        #
        # num_of_valid_datapoints = int(X_valid_gray.shape[0] // history_length)
        # X_valid_history = np.empty([num_of_valid_datapoints, 96, 96, history_length])
        # y_valid_history = np.empty(num_of_valid_datapoints)
        # for data_point in range(0, X_valid_gray.shape[0], history_length):
        #     temp = X_valid_gray[(data_point // history_length)]
        #     # print(temp.shape)
        #     for idx in range(1, history_length):
        #         temp = np.concatenate((temp, X_valid_gray[(data_point + idx)]), axis=2)
        #         # print(temp.shape)
        #     X_valid_history[(data_point // history_length)] = temp
        #     y_valid_history[(data_point // history_length)] = y_valid[(data_point + history_length - 1)]
        # return X_history, y_history, X_valid_history, y_valid_history

    # History:
    # At first you should only use the current image as input to your network to learn the next action. Then the input states
    # have shape (96, 96, 1). Later, add a history of the last N images to your state so that a state has shape (96, 96, N).

    return X_train_gray, y_train, X_valid_gray, y_valid
Ejemplo n.º 6
0
def load_data(im1_filename, im2_filename, flo_filename):
    """ Loads images and flow ground truth. Returns 4D tensors."""
    # load images as numpy array
    img1 = rgb2gray(read_image(im1_filename))
    img2 = rgb2gray(read_image(im2_filename))
    flo = read_flo(flo_filename)
    # convert to torch 4D tensor
    tensor1 = numpy2torch(img1).unsqueeze_(0)
    tensor2 = numpy2torch(img2).unsqueeze_(0)
    flow_gt = numpy2torch(flo).unsqueeze_(0)
    return tensor1, tensor2, flow_gt
Ejemplo n.º 7
0
def estimateFeatureTranslation(startX, startY, Ix, Iy, img1, img2):
	img1_g = rgb2gray(img1)
	img2_g = rgb2gray(img2)

	T = NUM_ITERS_OPTICAL_FLOW # num of optical flow iterations

	H, W, _ = img1.shape

	x, y = startX.copy(), startY.copy()
	valid_idx = np.logical_and(x > -1, y > -1)
	x, y = x[valid_idx], y[valid_idx]
	xy_neigh = getXYNeighbours(x, y, W, H)

	N , win, _ = xy_neigh.shape

	Ix_w = map_coordinates(Ix, [xy_neigh[:,:,1], xy_neigh[:,:,0]], order=1, mode='constant').reshape((N,win))
	Iy_w = map_coordinates(Iy, [xy_neigh[:,:,1],xy_neigh[:,:,0]], order=1, mode='constant').reshape((N,win))


	Apinv = getApinv(Ix_w, Iy_w)

	img1_w = map_coordinates(img1_g, [xy_neigh[:,:,1],xy_neigh[:,:,0]], order=1, 
		mode='constant').reshape((N,win))
	
	sx, sy = startX.copy(), startY.copy()
	valid_idx = np.logical_and(sx > -1, sy > -1)
	sx, sy = sx[valid_idx], sy[valid_idx]

	for t in range(T):
		xy_neigh2 = getXYNeighbours(sx, sy, W, H)

		img2_w = map_coordinates(img2_g, [xy_neigh2[:,:,1], xy_neigh2[:,:,0]], order=1, mode='constant')	.reshape((N, win))
		
		It_w = img1_w - img2_w # N x 100

		uv = getDisplacement(Apinv, It_w) # N x 2

		assert uv.shape[0] == sx.shape[0]

		x_new = sx + uv[:, 0]
		y_new = sy + uv[:, 1]

		sx, sy = x_new.copy(), y_new.copy()


		### Stopping Criterion
		# norm = np.linalg.norm(uv)


	return x_new, y_new
Ejemplo n.º 8
0
def preprocessing(X_train, y_train, X_valid, y_valid, conf):
    
    # TODO: preprocess your data here. For the images:
    
    # 1. convert the images in X_train/X_valid to gray scale. If you use rgb2gray() from utils.py, the output shape (100, 150, 1)
    X_train=np.array([rgb2gray(img) for img in X_train]).reshape(-1, 1,100, 150)
    X_valid=np.array([rgb2gray(img) for img in X_valid]).reshape(-1, 1,100, 150)
    # History:
    # At first you should only use the current image as input to your network to learn the next action. Then the input states
    # have shape (100, 150, 1). Later, add a history of the last N images to your state so that a state has shape (100, 150, N).

    # Hint: you can also implement frame skipping

    return X_train, y_train, X_valid, y_valid
Ejemplo n.º 9
0
def run_episode(env, agent, rendering=True, max_timesteps=1000, history_length =1):
    episode_reward = 0
    step = 0

    state = env.reset()

    # fix bug of curropted states without rendering in racingcar gym environment
    env.viewer.window.dispatch_events()


    history = []
    state = utils.rgb2gray(state).reshape(96, 96, 1)
    history.append(state)
    history = history * history_length
    state = np.array(history).reshape(-1,96, 96, history_length)
    while True:

        # TODO: preprocess the state in the same way than in your preprocessing in train_agent.py
        #    state = ...
        state = torch.tensor(state).permute(0, 3, 1, 2)

        # TODO: get the action from your agent! You need to transform the discretized actions to continuous
        # actions.
        # hints:
        #       - the action array fed into env.step() needs to have a shape like np.array([0.0, 0.0, 0.0])
        #       - just in case your agent misses the first turn because it is too fast: you are allowed to clip the acceleration in test_agent.py
        #       - you can use the softmax output to calculate the amount of lateral acceleration
        # a = ...
        a = agent.predict(state)
        a = a.argmax(dim=1).item()
        a = id_to_action(a)
        next_state, r, done, info = env.step(a)
        episode_reward += r
        next_state = utils.rgb2gray(next_state).reshape(96, 96, 1)
        history.append(next_state)
        del history[0]
        next_state = np.array(history).reshape(-1,96, 96, history_length)

        state = next_state
        step += 1
        
        if rendering:
            env.render()

        if done or step > max_timesteps: 
            break

    return episode_reward
Ejemplo n.º 10
0
def prepare_flow(reference, flow_tuple, confidence):

    reference_image = np.array(plt.imread(reference, format='png'), dtype=np.float32)*256
    flow_a = np.array(plt.imread(flow_tuple[0], format='png'), dtype=np.float32)*65536

    if TWO_FLOW:
        flow_b = np.array(plt.imread(flow_tuple[1], format='png'), dtype=np.float32)*65536
        flow = np.stack((flow_a,flow_b),axis=-1)
    else:
        flow = flow_a
    flow = np.subtract(flow, 2**15)

    flow = np.divide(flow, 256)

    weight = np.array(plt.imread(confidence, format='png'), dtype=np.float32)*256*256
    weight = np.divide(weight, 65536)

    if VERBOSE:
        print(">>> preparing flow data")
    sz = [reference_image.shape[0], reference_image.shape[1]]

    I_x = np.tile(np.floor(np.divide(np.arange(sz[1]), BILATERAL_SIGMA_SPATIAL)), (sz[0],1))
    I_y = np.tile( np.floor(np.divide(np.arange(sz[0]), BILATERAL_SIGMA_SPATIAL)).reshape(1,-1).T, (1,sz[1]) )
    I_luma = np.floor_divide(utils.rgb2gray(reference_image), float(BILATERAL_SIGMA_LUMA))

    X = np.concatenate((I_x[:,:,None],I_y[:,:,None],I_luma[:,:,None]),axis=2).reshape((-1,3),order='F')
    W0 = np.ravel(weight.T)
    if TWO_FLOW:
        X0 = np.reshape(flow,[-1,2],order='F')
    else:
        X0 = np.reshape(flow,[-1,1],order='F')
    return X, W0, X0, flow_a.shape
Ejemplo n.º 11
0
def run_episode(env, agent, config, rendering=True, max_timesteps=10000):

    episode_reward = 0
    step = 0

    state = env.reset()
    state_img = env.render(
        mode="rgb_array")[::4, ::4, :]  # downsampling (every 4th pixel).

    # fix bug of curropted states without rendering in gym environments
    env.viewer.window.dispatch_events()

    while True:

        # TODO: preprocess the state in the same way than in your preprocessing in train_agent.py
        state_img = np.array([rgb2gray(img)
                              for img in state_img]).reshape(-1, 1, 100, 150)

        with torch.no_grad():
            a = int(torch.argmax(agent.predict(torch.tensor(state))))
        next_state, r, done, info = env.step(a)
        next_state_img = env.render(mode="rgb_array")[::4, ::4, :]
        episode_reward += r
        state = next_state
        state_img = next_state_img
        step += 1

        if rendering:
            env.render()

        if done or step > max_timesteps:
            break

    return episode_reward
Ejemplo n.º 12
0
def generatepatchs(img, base_size, factor):
    # Compute the gradients as a proxy of the contextual cues.
    img_gray = rgb2gray(img)
    whole_grad = np.abs(cv2.Sobel(img_gray, cv2.CV_64F, 0, 1, ksize=3)) + \
                 np.abs(cv2.Sobel(img_gray, cv2.CV_64F, 1, 0, ksize=3))

    threshold = whole_grad[whole_grad > 0].mean()
    whole_grad[whole_grad < threshold] = 0

    # We use the integral image to speed-up the evaluation of the amount of gradients for each patch.
    gf = whole_grad.sum() / len(whole_grad.reshape(-1))
    grad_integral_image = cv2.integral(whole_grad)

    # Variables are selected such that the initial patch size would be the receptive field size
    # and the stride is set to 1/3 of the receptive field size.
    blsize = int(round(base_size / 2))
    stride = int(round(blsize * 0.75))

    # Get initial Grid
    patch_bound_list = applyGridpatch(blsize, stride, img, [0, 0, 0, 0])

    # Refine initial Grid of patches by discarding the flat (in terms of gradients of the rgb image) ones. Refine
    # each patch size to ensure that there will be enough depth cues for the network to generate a consistent depth map.
    print("Selecting patchs ...")
    patch_bound_list = adaptiveselection(grad_integral_image, patch_bound_list,
                                         gf, factor)

    # Sort the patch list to make sure the merging operation will be done with the correct order: starting from biggest
    # patch
    patchset = sorted(patch_bound_list.items(),
                      key=lambda x: getitem(x[1], 'size'),
                      reverse=True)
    return patchset
def estimateAllTranslation(startXs, startYs, img1, img2):

	N, F = startXs.shape

	img1_g = rgb2gray(img1)

	H, W = img1_g.shape

	Ix, Iy = getDerivatives(img1_g)

	newXs, newYs = np.zeros((N, F)), np.zeros((N, F))

	for f in range(F):
		startX, startY = startXs[:, f], startYs[:, f] # N x 2
		valid_idx = np.logical_and(startX > -1, startY > -1)

		newX, newY = estimateFeatureTranslation(startX, startY, Ix, Iy, img1, img2)

		x_new, y_new = newX, newY
		valid_idx_n = np.logical_and(np.logical_and(x_new >= 0, y_new >= 0), 
			np.logical_and(x_new < W, y_new < H))

		x_new[np.logical_not(valid_idx_n)] = -1 
		y_new[np.logical_not(valid_idx_n)] = -1
		
		n = len(x_new)
		assert n == len(y_new), "X and Y len differing"

		newXs[valid_idx, f], newYs[valid_idx, f] = x_new, y_new
		newXs[np.logical_not(valid_idx), f], newYs[np.logical_not(valid_idx), f] = [-1,-1]
		n = len(startX)
		newXs[n:, f], newYs[n:, f] = -1, -1

	return  newXs, newYs
def generate_generator_mask(generator, path, batch_size = 8, img_height = IMG_HEIGHT, img_width = IMG_WIDTH):

        gen_img = generator.flow_from_directory(path,
                                              classes = ["images"],
                                              target_size = (img_height,img_width),
                                              batch_size = batch_size,
                                              shuffle=True, 
                                              seed=7)

        gen_mask = generator.flow_from_directory(path,
                                              classes = ["masks"],
                                              target_size = (img_height,img_width),
                                              batch_size = batch_size,
                                              color_mode = 'grayscale',
                                              shuffle=True, 
                                              seed=7)
        while True:
                imgs, _ = gen_img.next() #in 255
                if TRAIN_PREPROC == "hsv":
                    imgs = rgb2hsv(imgs)
                elif TRAIN_PREPROC == "hsv_norm":
                    imgs = rgb2hsv(imgs, normalization = True)
                elif TRAIN_PREPROC == "gray":
                    imgs = np.expand_dims(rgb2gray(imgs),axis = -1)
                masks, _ = gen_mask.next()
                masks = masks.squeeze()
  
                yield imgs, masks #Yield both images and their mutual label
Ejemplo n.º 15
0
def estimateAllTranslation(startXs, startYs, img1, img2):
	G = GaussianPDF_2D(0,1, 5, 5)
	dx, dy = np.gradient(G, axis=(1, 0))

	img1_gray = rgb2gray(img1)

	Ix = scipy.signal.convolve(img1_gray, dx, 'same')
	Iy = scipy.signal.convolve(img1_gray, dy, 'same')

	no_of_bounding_box = startXs.shape[1]

	newXs = np.zeros(startXs.shape)
	newYs = np.zeros(startYs.shape)

	newXs[:, :]=-1
	newYs[:, :]=-1

	for bounding_box_index in range(no_of_bounding_box):
		for i, (startX, startY) in enumerate(zip(startXs[:, bounding_box_index], startYs[:, bounding_box_index])):
			if startX != -1:
				newX, newY = estimateFeatureTranslation(startX, startY, Ix, Iy, img1, img2)
				if ((newX>=img1.shape[1]) or (newY>=img1.shape[0]) or (newX<0) or (newY<0)):
					newX = -1
					newY = -1
			else:
				newX = -1
				newY = -1

			newXs[i, bounding_box_index] = newX
			newYs[i, bounding_box_index] = newY

	return newXs, newYs
Ejemplo n.º 16
0
    def detect(self, image):
        clone = image.copy()

        image = rgb2gray(image)

        # list to store the detections
        detections = []
        # current scale of the image
        downscale_power = 0

        # downscale the image and iterate
        for im_scaled in pyramid(image,
                                 downscale=self.downscale,
                                 min_size=self.window_size):
            # if the width or height of the scaled image is less than
            # the width or height of the window, then end the iterations
            if im_scaled.shape[0] < self.window_size[1] or im_scaled.shape[
                    1] < self.window_size[0]:
                break
            for (x, y, im_window) in sliding_window(im_scaled,
                                                    self.window_step_size,
                                                    self.window_size):
                if im_window.shape[0] != self.window_size[
                        1] or im_window.shape[1] != self.window_size[0]:
                    continue

                # calculate the HOG features
                feature_vector = hog(im_window)
                X = np.array([feature_vector])
                prediction = self.clf.predict(X)
                if prediction == 1:
                    x1 = int(x * (self.downscale**downscale_power))
                    y1 = int(y * (self.downscale**downscale_power))
                    detections.append(
                        (x1, y1, x1 + int(self.window_size[0] *
                                          (self.downscale**downscale_power)),
                         y1 + int(self.window_size[1] *
                                  (self.downscale**downscale_power))))

            # Move the the next scale
            downscale_power += 1

        # Display the results before performing NMS
        clone_before_nms = clone.copy()
        for (x1, y1, x2, y2) in detections:
            # Draw the detections
            cv2.rectangle(clone_before_nms, (x1, y1), (x2, y2), (0, 255, 0),
                          thickness=2)

        # Perform Non Maxima Suppression
        detections = non_max_suppression(np.array(detections), self.threshold)

        clone_after_nms = clone
        # Display the results after performing NMS
        for (x1, y1, x2, y2) in detections:
            # Draw the detections
            cv2.rectangle(clone_after_nms, (x1, y1), (x2, y2), (0, 255, 0),
                          thickness=2)

        return clone_before_nms, clone_after_nms
Ejemplo n.º 17
0
    def get_updated_state(self, im):
        tmp = np.array(self.state, dtype=np.uint8)
        self.state[0] = utils.rgb2gray(im).T
        for i in range(1, self.frames):
            self.state[i] = np.array(tmp[i - 1], dtype=np.uint8)

        return np.array(self.state, dtype=np.uint8).reshape(
            (1, self.cols, self.rows, self.frames))
Ejemplo n.º 18
0
def getEdgeMapFromParams(I_rgb, threshold, lineMap, colorMap):

	edgeMap = None

	if colorMap == "C":

		r, g, b = I_rgb[:, :, 0], I_rgb[:, :, 1], I_rgb[:, :, 2]
		edgeMap = np.zeros(I_rgb.shape, dtype=float)

		rEdgeMap, rMag = getEdgeMap(r, lineMap, threshold)
		gEdgeMap, gMag= getEdgeMap(g, lineMap, threshold)
		bEdgeMap, bMag = getEdgeMap(b, lineMap, threshold)

		totalMag = rMag + gMag + bMag

		rEdgeMap = rEdgeMap * rMag * 1.0 / totalMag
		gEdgeMap = gEdgeMap * gMag * 1.0 / totalMag
		bEdgeMap = bEdgeMap * bMag * 1.0 / totalMag

		im_gray = utils.rgb2gray(I_rgb)
		normalEdgeMap, mag = getEdgeMap(im_gray, lineMap, threshold)

		edgeMap[:,:,0] = (rEdgeMap + 0.0* normalEdgeMap) 
		edgeMap[:,:,1] = (gEdgeMap + 0.0* normalEdgeMap)
		edgeMap[:,:,2] = (bEdgeMap + 0.0* normalEdgeMap)


	else:
		im_gray = utils.rgb2gray(I_rgb)
		edgeMapbinary, mag = getEdgeMap(im_gray, lineMap, threshold)
		edgeMap = np.zeros(I_rgb.shape, dtype=float)
		edgeMap[:,:,0] = edgeMapbinary
		edgeMap[:,:,1] = edgeMapbinary
		edgeMap[:,:,2] = edgeMapbinary



	edgeMap = edgeMap * 255;
	edgeMap = edgeMap.astype("uint8")

	# Image.fromarray(edgeMap.astype("uint8")).show()

	return edgeMap
Ejemplo n.º 19
0
def preprocessing(X_train, y_train, X_valid, y_valid, conf):
    # --- preprocessing for state vector ---
    if conf.is_fcn:
        X_train, y_train = skip_frames(input_data=X_train,
                                       input_label=y_train,
                                       skip_no=conf.skip_frames,
                                       history_length=0)
        X_valid, y_valid = skip_frames(input_data=X_valid,
                                       input_label=y_valid,
                                       skip_no=conf.skip_frames,
                                       history_length=0)
        X_train = X_train.squeeze()
        X_valid = X_valid.squeeze()

        return X_train, y_train, X_valid, y_valid

    # --- preprocessing for image data ---
    # 1. convert the images in X_train/X_valid to gray scale. If you use rgb2gray() from utils.py, the output shape (100, 150, 1)
    # X_train shape: (N_sample, H, W, 3)
    X_train = rgb2gray(X_train)
    X_valid = rgb2gray(X_valid)

    # History:
    # At first you should only use the current image as input to your network to learn the next action. Then the input states
    # have shape (200, 300, 1). Later, add a history of the last N images to your state so that a state has shape (200, 300, N).

    # Hint: you can also implement frame skipping
    # skip_frames: parameter similar to stride in CNN
    skip_n = conf.skip_frames
    # history_length: images representing the history in each data point.
    hist_len = conf.history_length
    # X_train shape: (2250, 200, 300, 3)
    X_train, y_train = skip_frames(input_data=X_train,
                                   input_label=y_train,
                                   skip_no=skip_n,
                                   history_length=hist_len)

    X_valid, y_valid = skip_frames(input_data=X_valid,
                                   input_label=y_valid,
                                   skip_no=skip_n,
                                   history_length=hist_len)

    return X_train, y_train, X_valid, y_valid
Ejemplo n.º 20
0
def Domask(I, x1, x2, y1, y2):
    xA = x1
    yA = y1
    xB = x2
    yB = y2
    image = I.copy()

    # pts = detect(path)
    # (xA, yA) = pts[0]
    # (xB, yB) = pts[1]
    h = int(math.fabs(yA - yB))
    w = int(math.fabs(xA - xB))

    #image[x1:(x1 + w),y1:(y1 + h), :] = 0
    #cv2.imshow("contour", image)
    #cv2.waitKey(0)

    # image = cv2.imread(path)
    # image = imutils.resize(image, width=min(400, image.shape[1]))
    mask = np.ones([image.shape[0], image.shape[1]])

    I_choice = image[xA:xB, yA:yB, :]
    # I_choice = image[yA:yB, xA:xB, :]
    # E = cannyEdge(I_choice)
    I_gray = rgb2gray(I_choice)
    mag = findDerivatives(I_gray)
    threshold_low = 0.65  #0.015
    threshold_low = threshold_low * mag.max()
    # for strong edge
    threshold_high = 0.8  #0.115
    threshold_high = threshold_high * mag.max()

    edges = cv2.Canny(I_choice, threshold_low, threshold_high)  # 500, 800
    _, contours0, hierarchy = cv2.findContours(edges, cv2.RETR_TREE,
                                               cv2.CHAIN_APPROX_SIMPLE)
    contours = [cv2.approxPolyDP(cnt, 10, True) for cnt in contours0]
    # cv2.drawContours(mask, contours, -1, (0, 255, 0), 10, offset=(xA, yA))
    #cv2.morphologyEx(image, cv2.MORPH_CLOSE, np.ones((5, 5), dtype = 'uint8')) #filter
    cv2.drawContours(mask, contours, -1, (0, 255, 0), 6, offset=(yA, xA))
    cv2.imshow("contour", mask)
    cv2.waitKey(0)
    #contours=cv2.morphologyEx(image, cv2.MORPH_CLOSE, np.ones((5, 5), dtype = 'uint8'))
    cv2.drawContours(image, contours, -1, (0, 255, 0), 6, offset=(yA, xA))
    cv2.imshow("contour", image)
    cv2.waitKey(0)
    cv2.imshow("choice", I_choice)
    cv2.waitKey(0)

    mask[0:x1, :] = 1
    mask[x2 + 1:, :] = 1
    mask[:, 0:y1] = 1
    mask[:, y2 + 1:] = 1

    return mask, h, w
Ejemplo n.º 21
0
def aryell2(imagem):
    result, lines, borda = project.teste(img)
    fig, [(ax1, ax2),(ax3, ax4)] = plt.subplots(2,2,figsize=(20,10))

    ax1.imshow(utils.rgb2gray(img), cmap='gray')
    ax2.imshow(result,cmap='gray')
    ax3.imshow(result,cmap='gray')
    for line in lines:
        ax3.plot(*zip(*line), c='r')
    ax4.imshow(borda, cmap='gray')
    plt.show()
Ejemplo n.º 22
0
def get_data(opt=Options()):
    sim = Simulator(opt.map_ind, opt.cub_siz, opt.pob_siz, opt.act_num)
    states = np.zeros([opt.data_steps, opt.state_siz], float)
    labels = np.zeros([opt.data_steps], int)

    # Note I am forcing the display to be off here to make data collection fast
    # you can turn it on again for debugging purposes
    # opt.disp_on = False

    # 1. control loop
    if opt.disp_on:
        win_all = None
        win_pob = None
    epi_step = 0  # #steps in current episode
    nepisodes = 1  # total #episodes executed

    state = sim.newGame(opt.tgt_y, opt.tgt_x)
    for step in range(opt.data_steps):
        if state.terminal or epi_step >= opt.early_stop:
            epi_step = 0
            nepisodes += 1
            state = sim.newGame(opt.tgt_y, opt.tgt_x)
        else:
            state = sim.step()  # will perform A* actions

        # save data & label
        states[step, :] = rgb2gray(state.pob).reshape(opt.state_siz)
        labels[step] = state.action

        epi_step += 1

        if step % opt.prog_freq == 0:
            print(step)

        if opt.disp_on:
            if win_all is None:
                import pylab as pl
                pl.figure()
                win_all = pl.imshow(state.screen)
                pl.figure()
                win_pob = pl.imshow(state.pob)
            else:
                win_all.set_data(state.screen)
                win_pob.set_data(state.pob)
            pl.pause(opt.disp_interval)
            pl.draw()

    # 2. save to disk
    print('saving data ...')
    np.savetxt(opt.states_fil, states, delimiter=',')
    np.savetxt(opt.labels_fil, labels, delimiter=',')
    print("states saved to " + opt.states_fil)
    print("labels saved to " + opt.labels_fil)
Ejemplo n.º 23
0
	def forward(self, im1, im2):
		im1g = rgb2gray(im1)
		im2g = rgb2gray(im2)
		
		im1gx = self.xconv(im1g)
		im1gy = self.yconv(im1g)
		
		im2gx = self.xconv(im2g)
		im2gy = self.yconv(im2g)
		
		(batch, channel, height, width) = im1.size()
		
		im1xd = F.softmax(im1gx.view(-1, height*width), dim = 1)
		im2xd = F.softmax(im2gx.view(-1, height*width), dim = 1)
		
		im1yd = F.softmax(im1gy.view(-1, height*width), dim = 1)
		im2yd = F.softmax(im2gy.view(-1, height*width), dim = 1)
		
		self.loss = MMDcompute(im1xd, im2xd) + MMDcompute(im1yd, im2yd)
		
		return self.loss
Ejemplo n.º 24
0
    def preprocess(self, img, imgtime):
        '''
        set up for next image
        @param img: image to detect
        @param imgtime: time of the image
        '''
        self.set_ROI(None)
#        if self.flip_H: cv.Flip(img, self.imgs[0], 1)
#        else:
        cv.Copy(img, self.imgs[0])
#        cv.Smooth(img, self.imgs[0],cv.CV_GAUSSIAN,5)

        self.to_scale(img)
        for scale in range(self.max_scale + 1):
            self.set_scale(scale)
            if self.img.nChannels == 1:
                cv.Copy(self.img, self.gray_img)
            else:
                rgb2gray(self.img, self.gray_img)
            cv.Copy(self.img, self.draw_img)
        self.time = imgtime
Ejemplo n.º 25
0
def extract_hog(img, orientations, pixels_per_cell, cells_per_block):
    gray = rgb2gray(img)

    return hog(
        image=gray,
        orientations=orientations,
        pixels_per_cell=pixels_per_cell,
        cells_per_block=cells_per_block,
        block_norm='L2-Hys',
        visualize=False,
        transform_sqrt=True
    )
Ejemplo n.º 26
0
    def preprocess(self, img, imgtime):
        '''
        set up for next image
        @param img: image to detect
        @param imgtime: time of the image
        '''
        self.set_ROI(None)
#        if self.flip_H: cv.Flip(img, self.imgs[0], 1)
#        else:
        cv.Copy(img, self.imgs[0])
#        cv.Smooth(img, self.imgs[0],cv.CV_GAUSSIAN,5)

        self.to_scale(img)
        for scale in range(self.max_scale + 1):
            self.set_scale(scale)
            if self.img.nChannels == 1:
                cv.Copy(self.img, self.gray_img)
            else:
                rgb2gray(self.img, self.gray_img)
            cv.Copy(self.img, self.draw_img)
        self.time = imgtime
Ejemplo n.º 27
0
 def forward(self, x):
     # print(x.shape,' 74')
     x = rgb2gray(x).unsqueeze(1)
     # print(x.shape, ' 76')
     x = self.sift(x).cuda()
     # print(x.shape, ' 78')
     # xc = x.clone()
     # # print(xc.requires_grad)
     x = x.view(-1, 7 * 128)
     # print(x.shape, ' 82')
     x = self.fcS(x)
     # print(x.shape, ' 84')
     return x
Ejemplo n.º 28
0
    def apply_filter(self, original_image, *args):
        r = original_image[:, :, 0]
        g = original_image[:, :, 1]
        b = original_image[:, :, 2]
        gray = rgb2gray(original_image)
        r = g = b = gray
        merged = np.stack([r,g, b], axis=2)


        # Apply the Gaussian Filter in the frequency domain to average the color values
        blurred = gaussian_filter(merged, 0.1)

        final = np.clip(merged + blurred*0.3, 0, 1.0)
        return final
Ejemplo n.º 29
0
def run_episode(env, agent, config, rendering=True, max_timesteps=1000):

    episode_reward = 0
    step = 0
    is_fcn = config.is_fcn
    buffer = ImageBuffer(capacity=config.history_length + 1)

    state = env.reset()
    # downsampling (every 4th pixel). Copy because torch gives negative stride error
    state_img = env.render(mode="rgb_array")[::4, ::4, :].copy()

    # fix bug of corrupted states without rendering in gym environments
    env.viewer.window.dispatch_events()

    agent.test_mode()
    while True:
        if (is_fcn):
            a = agent.predict(X=np.expand_dims(state, axis=0))
        else:
            # preprocessing
            state_img = rgb2gray(state_img)
            state_img = np.expand_dims(a=state_img, axis=-1)
            state_img = np.expand_dims(a=state_img, axis=0)
            buffer.push(state_img)
            if buffer.is_full():
                state_img = buffer.pop()
                a = agent.predict(X=state_img)
            else:
                a = torch.zeros(4)
                a[0] = 1  # no-action aciton
        a = np.argmax(a.numpy())

        next_state, r, done, info = env.step(a)
        next_state_img = env.render(mode="rgb_array")[::4, ::4, :].copy()

        episode_reward += r
        state = next_state
        state_img = next_state_img
        step += 1

        if rendering:
            env.render()

        if done or step > max_timesteps:
            break

    return episode_reward
Ejemplo n.º 30
0
def cannyEdge(I):
    # convert RGB image to gray color space
    im_gray = utils.rgb2gray(I)

    Mag, Magx, Magy, Ori = findDerivatives(im_gray)
    M = nonMaxSup(Mag, Ori)
    E = edgeLink(M, Mag, Ori)

    # only when test passed that can show all results
    if Test_script(im_gray, E):
        # visualization results
        utils.visDerivatives(im_gray, Mag, Magx, Magy)
        utils.visCannyEdge(I, M, E)

        plt.show()

    return E
Ejemplo n.º 31
0
 def detect(self, image):
     clone = image.copy()
     image = rgb2gray(image)
     detections = []  # 记录识别的目标
     downscale_power = 0  # 当前下采样系数
     # 迭代下采样
     for im_scaled in pyramid(image,
                              downscale=self.downscale,
                              min_size=self.window_size):
         if im_scaled.shape[0] < self.window_size[1] or im_scaled.shape[
                 1] < self.window_size[0]:
             # 如果采样尺度小于模板窗,就停止迭代
             break
         for (x, y, im_window) in sliding_window(im_scaled,
                                                 self.window_step_size,
                                                 self.window_size):
             if im_window.shape[0] != self.window_size[
                     1] or im_window.shape[1] != self.window_size[0]:
                 continue
             feature_vector = hog(im_window, block_norm="L1")  # 计算HOG特征
             X = np.array([feature_vector])
             prediction = self.clf.predict(X)
             if prediction == 1:
                 x1 = int(x * (self.downscale**downscale_power))
                 y1 = int(y * (self.downscale**downscale_power))
                 detections.append(
                     (x1, y1, x1 + int(self.window_size[0] *
                                       (self.downscale**downscale_power)),
                      y1 + int(self.window_size[1] *
                               (self.downscale**downscale_power))))
         downscale_power += 1  # 移动到下一个尺度
     clone_before_nms = clone.copy()  # 用来显示NMS处理前的结果
     for (x1, y1, x2, y2) in detections:
         cv2.rectangle(clone_before_nms, (x1, y1), (x2, y2), (0, 255, 0),
                       thickness=2)  # 描框
     detections = non_max_suppression(np.array(detections),
                                      self.threshold)  # NMS处理后的结果
     clone_after_nms = clone
     # NMS处理后的结果
     for (x1, y1, x2, y2) in detections:
         cv2.rectangle(clone_after_nms, (x1, y1), (x2, y2), (0, 255, 0),
                       thickness=2)  # 描框
     return clone_before_nms, clone_after_nms
def simple_region_growing(img, bbox, threshold=10):
    """
    A (very) simple implementation of region growing.
    Extracts a region of the input image depending on a start position and a stop condition.
    The input should be a single channel 8 bits image and the seed a pixel position (x, y).
    The threshold corresponds to the difference between outside pixel intensity and mean intensity of region.
    In case no new pixel is found, the growing stops.
    Outputs a single channel 8 bits binary (0 or 255) image. Extracted region is highlighted in white.
    """


    img = rgb2gray(img)
    dims = img.shape

    # # CONVERTI IN GRAYSCALE SE RGB!
    #
    # # threshold tests
    # if not isinstance(threshold, int):
    #     raise TypeError("(%s) Int expected!" % sys._getframe().f_code.co_name)
    # elif threshold < 0:
    #     raise ValueError("(%s) Positive value expected!" % sys._getframe().f_code.co_name)
    #
    # # seed tests
    # if not((isinstance(seed, tuple)) and (len(seed) is 2) ) :
    #     raise TypeError("(%s) (x, y) variable expected!" % sys._getframe().f_code.co_name)
    # if (seed[0] or seed[1] ) < 0 :
    #     raise ValueError("(%s) Seed should have positive values!" % sys._getframe().f_code.co_name)
    # elif (seed[0] > dims[0]) or (seed[1] > dims[1]):
    #     raise ValueError("(%s) Seed values greater than img size!" % sys._getframe().f_code.co_name)

    # IMG DI REGISTRAZIONE
    # reg = cv.CreateImage(dims, cv.IPL_DEPTH_8U, 1)
    # cv.Zero(reg)

    reg = np.zeros(shape=img.shape)
    if len(bbox) == 0:
        return reg

    # area of rectangle we don't want to exceed
    pix_area =dims[0]*dims[1]


    # seed is the central region of a 3x3 grid
    seed = [bbox[0]+bbox[2]/3, bbox[1]+bbox[3]/3, bbox[2]/3, bbox[3]/3]

    # parameters
    mean_reg = np.mean(img[seed[1]:seed[1]+seed[3], seed[0]:seed[0]+seed[2]]) #float(img[seed[1], seed[0]])
    # initial region size
    size = img[seed[1]:seed[1]+seed[3], seed[0]:seed[0]+seed[2]].size


    contour = [] # will be [ [[x1, y1], val1],..., [[xn, yn], valn] ]
    contour_val = []
    dist = 0
    # TODO: may be enhanced later with 8th connectivity
    orient = [(1, 0), (0, 1), (-1, 0), (0, -1)] # 4 connectivity

    pixel_to_check = []
    for y in range(seed[1], seed[1]+seed[3]+1):
        for x in range(seed[0], seed[0]+seed[2]):
            pixel_to_check.append([x, y])

    #cur_pix = [seed[0], seed[1]]

    #Spreading
    while dist < threshold and size < pix_area:
        #adding pixels
        try:
            cur_pix = pixel_to_check.pop()
        except:
            break
        for j in range(4):
            #select new candidate
            temp_pix = [cur_pix[0] + orient[j][0], cur_pix[1] + orient[j][1]]

            #check if it belongs to the image
            is_in_img = dims[0] > temp_pix[0] > 0 and dims[1] > temp_pix[1] > 0 #returns boolean

            is_in_bbox = bbox[1] < temp_pix[1] and bbox[1] + bbox[3] > temp_pix[1] and \
                         bbox[0] < temp_pix[0] and bbox[0] + bbox[2] > temp_pix[0]

            #candidate is taken if not already selected before
            if is_in_img and reg[temp_pix[1], temp_pix[0]] == 0:
                contour.append(temp_pix)
                contour_val.append(img[temp_pix[1], temp_pix[0]])
                reg[temp_pix[1], temp_pix[0]] = 150

        #add the nearest pixel of the contour in it
        #dist = abs(int(numpy.mean(contour_val)) - mean_reg) # distanza punto medio temp da punto mean_reg (dei valori in scala di grigio

        dist_list = [abs(i - mean_reg) for i in contour_val]
        dist = min(dist_list)    #get min distance
        #print dist
        index = dist_list.index(min(dist_list)) #mean distance index
        size += 1 # updating region size
        reg[cur_pix[1], cur_pix[0]] = 255

        #updating mean MUST BE FLOAT
        mean_reg = (mean_reg*size + float(contour_val[index]))/(size+1)
        #updating seed

        cur_pix.append(contour[index])

        #removing pixel from neigborhood
        del contour[index]
        del contour_val[index]

    return reg
Ejemplo n.º 33
0
 def convolve2d(self):
     self.roi.play(lambda x: utils.cconv(ker, utils.rgb2gray(x).astype(float)/255))