def problem1(): # Read stereo images and ground truth disparities i0 = rgb2gray(plt.imread('i0.png')).squeeze().astype(np.float32) i1 = rgb2gray(plt.imread('i1.png')).squeeze().astype(np.float32) gt = (255 * plt.imread('gt.png')).astype(np.int32) # Set Potts penalty lmbda = 3.0 s = 10.0 / 255.0 # Create 4 connected edge neighborhood edges = edges4connected(i0.shape[0], i0.shape[1]) # Candidate search range candidate_disparities = np.arange(0, gt.max() + 1) # Graph cuts with zero initialization zero_init = np.zeros(gt.shape).astype(np.int32) estimate_zero_init = alpha_expansion(i0, i1, edges, zero_init, candidate_disparities, s, lmbda) show_stereo(estimate_zero_init, gt) perc_correct = evaluate_stereo(estimate_zero_init, gt) print("Correct labels (zero init): %3.2f%%" % (perc_correct * 100)) # Graph cuts with random initialization random_init = np.random.randint(low=0, high=gt.max() + 1, size=i0.shape) estimate_random_init = alpha_expansion(i0, i1, edges, random_init, candidate_disparities, s, lmbda) show_stereo(estimate_random_init, gt) perc_correct = evaluate_stereo(estimate_random_init, gt) print("Correct labels (random init): %3.2f%%" % (perc_correct * 100))
def forward(self, im1, im2): im1g = rgb2gray(im1) im2g = rgb2gray(im2) im1gx = self.xconv(im1g) im1gy = self.yconv(im1g) im2gx = self.xconv(im2g) im2gy = self.yconv(im2g) (batch, channel, height, width) = im1.size() im1xd = F.softmax(im1gx.view(-1, height * width), dim=1) im2xd = F.softmax(im2gx.view(-1, height * width), dim=1) im1xd = torch.log(im1xd) im1yd = F.softmax(im1gy.view(-1, height * width), dim=1) im2yd = F.softmax(im2gy.view(-1, height * width), dim=1) im1yd = torch.log(im1yd) self.loss = self.criterion(im1xd + 0.001, im2xd + 0.001) + self.criterion( im1yd + 0.001, im2yd + 0.001) #print(self.loss) return self.loss
def computeLBPs(images, radius, n_points, compression=True): hs = {} for j in images.keys(): #parcourt les objets moon = copy.copy(images[j]) lbp = skimage.feature.local_binary_pattern(rgb2gray(moon[0]), n_points, radius, 'uniform') #premiere image h1 = np.histogram(lbp.reshape(lbp.size), 255) #creation histogramme h1 = np.take(h1[0], np.where(h1[0] > 0))[0] h = np.zeros((len(moon), n_points + 2), dtype=np.int64) h[0] = h1 for i in range(len(moon)): #parcourt de toutes les images lbp = skimage.feature.local_binary_pattern(rgb2gray(moon[i]), n_points, radius, 'uniform') h1 = np.histogram(lbp.reshape(lbp.size), 255) h1 = np.take(h1[0], np.where(h1[0] > 0))[0] h[i] = h1 if compression: h = (h[2:] + h[1:-1] + h[:-2]) / 3 h = h[:-1:3] #h=sum(h,0)/len(h) hs[j] = h return hs
def import_dataset(self): train = load('train_32x32.mat') test = load('test_32x32.mat') train_data = train['X'] train_labels = train['y'] test_data = test['X'] test_labels = test['y'] train_data = np.transpose(train_data, [3,0,1,2]) train_data = utils.rgb2gray(train_data) train_data = utils.normalize(train_data,-1,1) train_shape = (train_data.shape[0], train_data.shape[1]*train_data.shape[2]) train_data = np.reshape(train_data, train_shape) train_labels = utils.one_hot_coding(train_labels) test_data = np.transpose(test_data,[3,0,1,2]) test_data = utils.rgb2gray(test_data) test_data = utils.normalize(test_data,-1,1) test_shape = (test_data.shape[0], test_data.shape[1]*test_data.shape[2]) test_data = np.reshape(test_data, test_shape) test_labels = utils.one_hot_coding(test_labels) self.im_size = train_data.shape[1] #Create datasets from the above tensors self.train_dataset = tf.data.Dataset.from_tensor_slices((train_data, train_labels)) self.test_dataset = tf.data.Dataset.from_tensor_slices((test_data, test_labels))
def preprocessing(X_train, y_train, X_valid, y_valid, history_length=1): # TODO: preprocess your data here. # 1. convert the images in X_train/X_valid to gray scale. If you use rgb2gray() from utils.py, the output shape (96, 96, 1) # 2. you can train your model with discrete actions (as you get them from read_data) by discretizing the action space # using action_to_id() from utils.py. X_train_gray = np.array( [utils.rgb2gray(img).reshape(96, 96, 1) for img in X_train]) X_valid_gray = np.array( [utils.rgb2gray(img).reshape(96, 96, 1) for img in X_valid]) y_train = np.array([utils.action_to_id(a) for a in y_train]) y_valid = np.array([utils.action_to_id(a) for a in y_valid]) if history_length > 1: X_history = [] y_history = [] X_valid_history = [] y_valid_history = [] for idx in range(0, X_train_gray.shape[0], history_length): X_history.append(X_train_gray[idx:idx + history_length].reshape( 96, 96, history_length)) y_history.append(y_train[idx + history_length - 1]) for idx in range(0, X_valid_gray.shape[0], history_length): X_valid_history.append(X_valid_gray[idx:idx + history_length].reshape( 96, 96, history_length)) y_valid_history.append(y_valid[idx + history_length - 1]) return np.array(X_history), np.array(y_history), np.array( X_valid_history), np.array(y_valid_history) # num_of_train_datapoints = int(X_train_gray.shape[0]//history_length) # X_history = np.zeros([num_of_train_datapoints,96,96,history_length]) # y_history = np.zeros(num_of_train_datapoints) # for data_point in range(0,X_train_gray.shape[0],history_length): # #print(temp.shape) # X_history[(data_point//history_length)] = X_train_gray[data_point:data_point+history_length].reshape(96,96,history_length) # y_history[(data_point//history_length)] = y_train[(data_point + history_length - 1)] # # num_of_valid_datapoints = int(X_valid_gray.shape[0] // history_length) # X_valid_history = np.empty([num_of_valid_datapoints, 96, 96, history_length]) # y_valid_history = np.empty(num_of_valid_datapoints) # for data_point in range(0, X_valid_gray.shape[0], history_length): # temp = X_valid_gray[(data_point // history_length)] # # print(temp.shape) # for idx in range(1, history_length): # temp = np.concatenate((temp, X_valid_gray[(data_point + idx)]), axis=2) # # print(temp.shape) # X_valid_history[(data_point // history_length)] = temp # y_valid_history[(data_point // history_length)] = y_valid[(data_point + history_length - 1)] # return X_history, y_history, X_valid_history, y_valid_history # History: # At first you should only use the current image as input to your network to learn the next action. Then the input states # have shape (96, 96, 1). Later, add a history of the last N images to your state so that a state has shape (96, 96, N). return X_train_gray, y_train, X_valid_gray, y_valid
def load_data(im1_filename, im2_filename, flo_filename): """ Loads images and flow ground truth. Returns 4D tensors.""" # load images as numpy array img1 = rgb2gray(read_image(im1_filename)) img2 = rgb2gray(read_image(im2_filename)) flo = read_flo(flo_filename) # convert to torch 4D tensor tensor1 = numpy2torch(img1).unsqueeze_(0) tensor2 = numpy2torch(img2).unsqueeze_(0) flow_gt = numpy2torch(flo).unsqueeze_(0) return tensor1, tensor2, flow_gt
def estimateFeatureTranslation(startX, startY, Ix, Iy, img1, img2): img1_g = rgb2gray(img1) img2_g = rgb2gray(img2) T = NUM_ITERS_OPTICAL_FLOW # num of optical flow iterations H, W, _ = img1.shape x, y = startX.copy(), startY.copy() valid_idx = np.logical_and(x > -1, y > -1) x, y = x[valid_idx], y[valid_idx] xy_neigh = getXYNeighbours(x, y, W, H) N , win, _ = xy_neigh.shape Ix_w = map_coordinates(Ix, [xy_neigh[:,:,1], xy_neigh[:,:,0]], order=1, mode='constant').reshape((N,win)) Iy_w = map_coordinates(Iy, [xy_neigh[:,:,1],xy_neigh[:,:,0]], order=1, mode='constant').reshape((N,win)) Apinv = getApinv(Ix_w, Iy_w) img1_w = map_coordinates(img1_g, [xy_neigh[:,:,1],xy_neigh[:,:,0]], order=1, mode='constant').reshape((N,win)) sx, sy = startX.copy(), startY.copy() valid_idx = np.logical_and(sx > -1, sy > -1) sx, sy = sx[valid_idx], sy[valid_idx] for t in range(T): xy_neigh2 = getXYNeighbours(sx, sy, W, H) img2_w = map_coordinates(img2_g, [xy_neigh2[:,:,1], xy_neigh2[:,:,0]], order=1, mode='constant') .reshape((N, win)) It_w = img1_w - img2_w # N x 100 uv = getDisplacement(Apinv, It_w) # N x 2 assert uv.shape[0] == sx.shape[0] x_new = sx + uv[:, 0] y_new = sy + uv[:, 1] sx, sy = x_new.copy(), y_new.copy() ### Stopping Criterion # norm = np.linalg.norm(uv) return x_new, y_new
def preprocessing(X_train, y_train, X_valid, y_valid, conf): # TODO: preprocess your data here. For the images: # 1. convert the images in X_train/X_valid to gray scale. If you use rgb2gray() from utils.py, the output shape (100, 150, 1) X_train=np.array([rgb2gray(img) for img in X_train]).reshape(-1, 1,100, 150) X_valid=np.array([rgb2gray(img) for img in X_valid]).reshape(-1, 1,100, 150) # History: # At first you should only use the current image as input to your network to learn the next action. Then the input states # have shape (100, 150, 1). Later, add a history of the last N images to your state so that a state has shape (100, 150, N). # Hint: you can also implement frame skipping return X_train, y_train, X_valid, y_valid
def run_episode(env, agent, rendering=True, max_timesteps=1000, history_length =1): episode_reward = 0 step = 0 state = env.reset() # fix bug of curropted states without rendering in racingcar gym environment env.viewer.window.dispatch_events() history = [] state = utils.rgb2gray(state).reshape(96, 96, 1) history.append(state) history = history * history_length state = np.array(history).reshape(-1,96, 96, history_length) while True: # TODO: preprocess the state in the same way than in your preprocessing in train_agent.py # state = ... state = torch.tensor(state).permute(0, 3, 1, 2) # TODO: get the action from your agent! You need to transform the discretized actions to continuous # actions. # hints: # - the action array fed into env.step() needs to have a shape like np.array([0.0, 0.0, 0.0]) # - just in case your agent misses the first turn because it is too fast: you are allowed to clip the acceleration in test_agent.py # - you can use the softmax output to calculate the amount of lateral acceleration # a = ... a = agent.predict(state) a = a.argmax(dim=1).item() a = id_to_action(a) next_state, r, done, info = env.step(a) episode_reward += r next_state = utils.rgb2gray(next_state).reshape(96, 96, 1) history.append(next_state) del history[0] next_state = np.array(history).reshape(-1,96, 96, history_length) state = next_state step += 1 if rendering: env.render() if done or step > max_timesteps: break return episode_reward
def prepare_flow(reference, flow_tuple, confidence): reference_image = np.array(plt.imread(reference, format='png'), dtype=np.float32)*256 flow_a = np.array(plt.imread(flow_tuple[0], format='png'), dtype=np.float32)*65536 if TWO_FLOW: flow_b = np.array(plt.imread(flow_tuple[1], format='png'), dtype=np.float32)*65536 flow = np.stack((flow_a,flow_b),axis=-1) else: flow = flow_a flow = np.subtract(flow, 2**15) flow = np.divide(flow, 256) weight = np.array(plt.imread(confidence, format='png'), dtype=np.float32)*256*256 weight = np.divide(weight, 65536) if VERBOSE: print(">>> preparing flow data") sz = [reference_image.shape[0], reference_image.shape[1]] I_x = np.tile(np.floor(np.divide(np.arange(sz[1]), BILATERAL_SIGMA_SPATIAL)), (sz[0],1)) I_y = np.tile( np.floor(np.divide(np.arange(sz[0]), BILATERAL_SIGMA_SPATIAL)).reshape(1,-1).T, (1,sz[1]) ) I_luma = np.floor_divide(utils.rgb2gray(reference_image), float(BILATERAL_SIGMA_LUMA)) X = np.concatenate((I_x[:,:,None],I_y[:,:,None],I_luma[:,:,None]),axis=2).reshape((-1,3),order='F') W0 = np.ravel(weight.T) if TWO_FLOW: X0 = np.reshape(flow,[-1,2],order='F') else: X0 = np.reshape(flow,[-1,1],order='F') return X, W0, X0, flow_a.shape
def run_episode(env, agent, config, rendering=True, max_timesteps=10000): episode_reward = 0 step = 0 state = env.reset() state_img = env.render( mode="rgb_array")[::4, ::4, :] # downsampling (every 4th pixel). # fix bug of curropted states without rendering in gym environments env.viewer.window.dispatch_events() while True: # TODO: preprocess the state in the same way than in your preprocessing in train_agent.py state_img = np.array([rgb2gray(img) for img in state_img]).reshape(-1, 1, 100, 150) with torch.no_grad(): a = int(torch.argmax(agent.predict(torch.tensor(state)))) next_state, r, done, info = env.step(a) next_state_img = env.render(mode="rgb_array")[::4, ::4, :] episode_reward += r state = next_state state_img = next_state_img step += 1 if rendering: env.render() if done or step > max_timesteps: break return episode_reward
def generatepatchs(img, base_size, factor): # Compute the gradients as a proxy of the contextual cues. img_gray = rgb2gray(img) whole_grad = np.abs(cv2.Sobel(img_gray, cv2.CV_64F, 0, 1, ksize=3)) + \ np.abs(cv2.Sobel(img_gray, cv2.CV_64F, 1, 0, ksize=3)) threshold = whole_grad[whole_grad > 0].mean() whole_grad[whole_grad < threshold] = 0 # We use the integral image to speed-up the evaluation of the amount of gradients for each patch. gf = whole_grad.sum() / len(whole_grad.reshape(-1)) grad_integral_image = cv2.integral(whole_grad) # Variables are selected such that the initial patch size would be the receptive field size # and the stride is set to 1/3 of the receptive field size. blsize = int(round(base_size / 2)) stride = int(round(blsize * 0.75)) # Get initial Grid patch_bound_list = applyGridpatch(blsize, stride, img, [0, 0, 0, 0]) # Refine initial Grid of patches by discarding the flat (in terms of gradients of the rgb image) ones. Refine # each patch size to ensure that there will be enough depth cues for the network to generate a consistent depth map. print("Selecting patchs ...") patch_bound_list = adaptiveselection(grad_integral_image, patch_bound_list, gf, factor) # Sort the patch list to make sure the merging operation will be done with the correct order: starting from biggest # patch patchset = sorted(patch_bound_list.items(), key=lambda x: getitem(x[1], 'size'), reverse=True) return patchset
def estimateAllTranslation(startXs, startYs, img1, img2): N, F = startXs.shape img1_g = rgb2gray(img1) H, W = img1_g.shape Ix, Iy = getDerivatives(img1_g) newXs, newYs = np.zeros((N, F)), np.zeros((N, F)) for f in range(F): startX, startY = startXs[:, f], startYs[:, f] # N x 2 valid_idx = np.logical_and(startX > -1, startY > -1) newX, newY = estimateFeatureTranslation(startX, startY, Ix, Iy, img1, img2) x_new, y_new = newX, newY valid_idx_n = np.logical_and(np.logical_and(x_new >= 0, y_new >= 0), np.logical_and(x_new < W, y_new < H)) x_new[np.logical_not(valid_idx_n)] = -1 y_new[np.logical_not(valid_idx_n)] = -1 n = len(x_new) assert n == len(y_new), "X and Y len differing" newXs[valid_idx, f], newYs[valid_idx, f] = x_new, y_new newXs[np.logical_not(valid_idx), f], newYs[np.logical_not(valid_idx), f] = [-1,-1] n = len(startX) newXs[n:, f], newYs[n:, f] = -1, -1 return newXs, newYs
def generate_generator_mask(generator, path, batch_size = 8, img_height = IMG_HEIGHT, img_width = IMG_WIDTH): gen_img = generator.flow_from_directory(path, classes = ["images"], target_size = (img_height,img_width), batch_size = batch_size, shuffle=True, seed=7) gen_mask = generator.flow_from_directory(path, classes = ["masks"], target_size = (img_height,img_width), batch_size = batch_size, color_mode = 'grayscale', shuffle=True, seed=7) while True: imgs, _ = gen_img.next() #in 255 if TRAIN_PREPROC == "hsv": imgs = rgb2hsv(imgs) elif TRAIN_PREPROC == "hsv_norm": imgs = rgb2hsv(imgs, normalization = True) elif TRAIN_PREPROC == "gray": imgs = np.expand_dims(rgb2gray(imgs),axis = -1) masks, _ = gen_mask.next() masks = masks.squeeze() yield imgs, masks #Yield both images and their mutual label
def estimateAllTranslation(startXs, startYs, img1, img2): G = GaussianPDF_2D(0,1, 5, 5) dx, dy = np.gradient(G, axis=(1, 0)) img1_gray = rgb2gray(img1) Ix = scipy.signal.convolve(img1_gray, dx, 'same') Iy = scipy.signal.convolve(img1_gray, dy, 'same') no_of_bounding_box = startXs.shape[1] newXs = np.zeros(startXs.shape) newYs = np.zeros(startYs.shape) newXs[:, :]=-1 newYs[:, :]=-1 for bounding_box_index in range(no_of_bounding_box): for i, (startX, startY) in enumerate(zip(startXs[:, bounding_box_index], startYs[:, bounding_box_index])): if startX != -1: newX, newY = estimateFeatureTranslation(startX, startY, Ix, Iy, img1, img2) if ((newX>=img1.shape[1]) or (newY>=img1.shape[0]) or (newX<0) or (newY<0)): newX = -1 newY = -1 else: newX = -1 newY = -1 newXs[i, bounding_box_index] = newX newYs[i, bounding_box_index] = newY return newXs, newYs
def detect(self, image): clone = image.copy() image = rgb2gray(image) # list to store the detections detections = [] # current scale of the image downscale_power = 0 # downscale the image and iterate for im_scaled in pyramid(image, downscale=self.downscale, min_size=self.window_size): # if the width or height of the scaled image is less than # the width or height of the window, then end the iterations if im_scaled.shape[0] < self.window_size[1] or im_scaled.shape[ 1] < self.window_size[0]: break for (x, y, im_window) in sliding_window(im_scaled, self.window_step_size, self.window_size): if im_window.shape[0] != self.window_size[ 1] or im_window.shape[1] != self.window_size[0]: continue # calculate the HOG features feature_vector = hog(im_window) X = np.array([feature_vector]) prediction = self.clf.predict(X) if prediction == 1: x1 = int(x * (self.downscale**downscale_power)) y1 = int(y * (self.downscale**downscale_power)) detections.append( (x1, y1, x1 + int(self.window_size[0] * (self.downscale**downscale_power)), y1 + int(self.window_size[1] * (self.downscale**downscale_power)))) # Move the the next scale downscale_power += 1 # Display the results before performing NMS clone_before_nms = clone.copy() for (x1, y1, x2, y2) in detections: # Draw the detections cv2.rectangle(clone_before_nms, (x1, y1), (x2, y2), (0, 255, 0), thickness=2) # Perform Non Maxima Suppression detections = non_max_suppression(np.array(detections), self.threshold) clone_after_nms = clone # Display the results after performing NMS for (x1, y1, x2, y2) in detections: # Draw the detections cv2.rectangle(clone_after_nms, (x1, y1), (x2, y2), (0, 255, 0), thickness=2) return clone_before_nms, clone_after_nms
def get_updated_state(self, im): tmp = np.array(self.state, dtype=np.uint8) self.state[0] = utils.rgb2gray(im).T for i in range(1, self.frames): self.state[i] = np.array(tmp[i - 1], dtype=np.uint8) return np.array(self.state, dtype=np.uint8).reshape( (1, self.cols, self.rows, self.frames))
def getEdgeMapFromParams(I_rgb, threshold, lineMap, colorMap): edgeMap = None if colorMap == "C": r, g, b = I_rgb[:, :, 0], I_rgb[:, :, 1], I_rgb[:, :, 2] edgeMap = np.zeros(I_rgb.shape, dtype=float) rEdgeMap, rMag = getEdgeMap(r, lineMap, threshold) gEdgeMap, gMag= getEdgeMap(g, lineMap, threshold) bEdgeMap, bMag = getEdgeMap(b, lineMap, threshold) totalMag = rMag + gMag + bMag rEdgeMap = rEdgeMap * rMag * 1.0 / totalMag gEdgeMap = gEdgeMap * gMag * 1.0 / totalMag bEdgeMap = bEdgeMap * bMag * 1.0 / totalMag im_gray = utils.rgb2gray(I_rgb) normalEdgeMap, mag = getEdgeMap(im_gray, lineMap, threshold) edgeMap[:,:,0] = (rEdgeMap + 0.0* normalEdgeMap) edgeMap[:,:,1] = (gEdgeMap + 0.0* normalEdgeMap) edgeMap[:,:,2] = (bEdgeMap + 0.0* normalEdgeMap) else: im_gray = utils.rgb2gray(I_rgb) edgeMapbinary, mag = getEdgeMap(im_gray, lineMap, threshold) edgeMap = np.zeros(I_rgb.shape, dtype=float) edgeMap[:,:,0] = edgeMapbinary edgeMap[:,:,1] = edgeMapbinary edgeMap[:,:,2] = edgeMapbinary edgeMap = edgeMap * 255; edgeMap = edgeMap.astype("uint8") # Image.fromarray(edgeMap.astype("uint8")).show() return edgeMap
def preprocessing(X_train, y_train, X_valid, y_valid, conf): # --- preprocessing for state vector --- if conf.is_fcn: X_train, y_train = skip_frames(input_data=X_train, input_label=y_train, skip_no=conf.skip_frames, history_length=0) X_valid, y_valid = skip_frames(input_data=X_valid, input_label=y_valid, skip_no=conf.skip_frames, history_length=0) X_train = X_train.squeeze() X_valid = X_valid.squeeze() return X_train, y_train, X_valid, y_valid # --- preprocessing for image data --- # 1. convert the images in X_train/X_valid to gray scale. If you use rgb2gray() from utils.py, the output shape (100, 150, 1) # X_train shape: (N_sample, H, W, 3) X_train = rgb2gray(X_train) X_valid = rgb2gray(X_valid) # History: # At first you should only use the current image as input to your network to learn the next action. Then the input states # have shape (200, 300, 1). Later, add a history of the last N images to your state so that a state has shape (200, 300, N). # Hint: you can also implement frame skipping # skip_frames: parameter similar to stride in CNN skip_n = conf.skip_frames # history_length: images representing the history in each data point. hist_len = conf.history_length # X_train shape: (2250, 200, 300, 3) X_train, y_train = skip_frames(input_data=X_train, input_label=y_train, skip_no=skip_n, history_length=hist_len) X_valid, y_valid = skip_frames(input_data=X_valid, input_label=y_valid, skip_no=skip_n, history_length=hist_len) return X_train, y_train, X_valid, y_valid
def Domask(I, x1, x2, y1, y2): xA = x1 yA = y1 xB = x2 yB = y2 image = I.copy() # pts = detect(path) # (xA, yA) = pts[0] # (xB, yB) = pts[1] h = int(math.fabs(yA - yB)) w = int(math.fabs(xA - xB)) #image[x1:(x1 + w),y1:(y1 + h), :] = 0 #cv2.imshow("contour", image) #cv2.waitKey(0) # image = cv2.imread(path) # image = imutils.resize(image, width=min(400, image.shape[1])) mask = np.ones([image.shape[0], image.shape[1]]) I_choice = image[xA:xB, yA:yB, :] # I_choice = image[yA:yB, xA:xB, :] # E = cannyEdge(I_choice) I_gray = rgb2gray(I_choice) mag = findDerivatives(I_gray) threshold_low = 0.65 #0.015 threshold_low = threshold_low * mag.max() # for strong edge threshold_high = 0.8 #0.115 threshold_high = threshold_high * mag.max() edges = cv2.Canny(I_choice, threshold_low, threshold_high) # 500, 800 _, contours0, hierarchy = cv2.findContours(edges, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) contours = [cv2.approxPolyDP(cnt, 10, True) for cnt in contours0] # cv2.drawContours(mask, contours, -1, (0, 255, 0), 10, offset=(xA, yA)) #cv2.morphologyEx(image, cv2.MORPH_CLOSE, np.ones((5, 5), dtype = 'uint8')) #filter cv2.drawContours(mask, contours, -1, (0, 255, 0), 6, offset=(yA, xA)) cv2.imshow("contour", mask) cv2.waitKey(0) #contours=cv2.morphologyEx(image, cv2.MORPH_CLOSE, np.ones((5, 5), dtype = 'uint8')) cv2.drawContours(image, contours, -1, (0, 255, 0), 6, offset=(yA, xA)) cv2.imshow("contour", image) cv2.waitKey(0) cv2.imshow("choice", I_choice) cv2.waitKey(0) mask[0:x1, :] = 1 mask[x2 + 1:, :] = 1 mask[:, 0:y1] = 1 mask[:, y2 + 1:] = 1 return mask, h, w
def aryell2(imagem): result, lines, borda = project.teste(img) fig, [(ax1, ax2),(ax3, ax4)] = plt.subplots(2,2,figsize=(20,10)) ax1.imshow(utils.rgb2gray(img), cmap='gray') ax2.imshow(result,cmap='gray') ax3.imshow(result,cmap='gray') for line in lines: ax3.plot(*zip(*line), c='r') ax4.imshow(borda, cmap='gray') plt.show()
def get_data(opt=Options()): sim = Simulator(opt.map_ind, opt.cub_siz, opt.pob_siz, opt.act_num) states = np.zeros([opt.data_steps, opt.state_siz], float) labels = np.zeros([opt.data_steps], int) # Note I am forcing the display to be off here to make data collection fast # you can turn it on again for debugging purposes # opt.disp_on = False # 1. control loop if opt.disp_on: win_all = None win_pob = None epi_step = 0 # #steps in current episode nepisodes = 1 # total #episodes executed state = sim.newGame(opt.tgt_y, opt.tgt_x) for step in range(opt.data_steps): if state.terminal or epi_step >= opt.early_stop: epi_step = 0 nepisodes += 1 state = sim.newGame(opt.tgt_y, opt.tgt_x) else: state = sim.step() # will perform A* actions # save data & label states[step, :] = rgb2gray(state.pob).reshape(opt.state_siz) labels[step] = state.action epi_step += 1 if step % opt.prog_freq == 0: print(step) if opt.disp_on: if win_all is None: import pylab as pl pl.figure() win_all = pl.imshow(state.screen) pl.figure() win_pob = pl.imshow(state.pob) else: win_all.set_data(state.screen) win_pob.set_data(state.pob) pl.pause(opt.disp_interval) pl.draw() # 2. save to disk print('saving data ...') np.savetxt(opt.states_fil, states, delimiter=',') np.savetxt(opt.labels_fil, labels, delimiter=',') print("states saved to " + opt.states_fil) print("labels saved to " + opt.labels_fil)
def forward(self, im1, im2): im1g = rgb2gray(im1) im2g = rgb2gray(im2) im1gx = self.xconv(im1g) im1gy = self.yconv(im1g) im2gx = self.xconv(im2g) im2gy = self.yconv(im2g) (batch, channel, height, width) = im1.size() im1xd = F.softmax(im1gx.view(-1, height*width), dim = 1) im2xd = F.softmax(im2gx.view(-1, height*width), dim = 1) im1yd = F.softmax(im1gy.view(-1, height*width), dim = 1) im2yd = F.softmax(im2gy.view(-1, height*width), dim = 1) self.loss = MMDcompute(im1xd, im2xd) + MMDcompute(im1yd, im2yd) return self.loss
def preprocess(self, img, imgtime): ''' set up for next image @param img: image to detect @param imgtime: time of the image ''' self.set_ROI(None) # if self.flip_H: cv.Flip(img, self.imgs[0], 1) # else: cv.Copy(img, self.imgs[0]) # cv.Smooth(img, self.imgs[0],cv.CV_GAUSSIAN,5) self.to_scale(img) for scale in range(self.max_scale + 1): self.set_scale(scale) if self.img.nChannels == 1: cv.Copy(self.img, self.gray_img) else: rgb2gray(self.img, self.gray_img) cv.Copy(self.img, self.draw_img) self.time = imgtime
def extract_hog(img, orientations, pixels_per_cell, cells_per_block): gray = rgb2gray(img) return hog( image=gray, orientations=orientations, pixels_per_cell=pixels_per_cell, cells_per_block=cells_per_block, block_norm='L2-Hys', visualize=False, transform_sqrt=True )
def forward(self, x): # print(x.shape,' 74') x = rgb2gray(x).unsqueeze(1) # print(x.shape, ' 76') x = self.sift(x).cuda() # print(x.shape, ' 78') # xc = x.clone() # # print(xc.requires_grad) x = x.view(-1, 7 * 128) # print(x.shape, ' 82') x = self.fcS(x) # print(x.shape, ' 84') return x
def apply_filter(self, original_image, *args): r = original_image[:, :, 0] g = original_image[:, :, 1] b = original_image[:, :, 2] gray = rgb2gray(original_image) r = g = b = gray merged = np.stack([r,g, b], axis=2) # Apply the Gaussian Filter in the frequency domain to average the color values blurred = gaussian_filter(merged, 0.1) final = np.clip(merged + blurred*0.3, 0, 1.0) return final
def run_episode(env, agent, config, rendering=True, max_timesteps=1000): episode_reward = 0 step = 0 is_fcn = config.is_fcn buffer = ImageBuffer(capacity=config.history_length + 1) state = env.reset() # downsampling (every 4th pixel). Copy because torch gives negative stride error state_img = env.render(mode="rgb_array")[::4, ::4, :].copy() # fix bug of corrupted states without rendering in gym environments env.viewer.window.dispatch_events() agent.test_mode() while True: if (is_fcn): a = agent.predict(X=np.expand_dims(state, axis=0)) else: # preprocessing state_img = rgb2gray(state_img) state_img = np.expand_dims(a=state_img, axis=-1) state_img = np.expand_dims(a=state_img, axis=0) buffer.push(state_img) if buffer.is_full(): state_img = buffer.pop() a = agent.predict(X=state_img) else: a = torch.zeros(4) a[0] = 1 # no-action aciton a = np.argmax(a.numpy()) next_state, r, done, info = env.step(a) next_state_img = env.render(mode="rgb_array")[::4, ::4, :].copy() episode_reward += r state = next_state state_img = next_state_img step += 1 if rendering: env.render() if done or step > max_timesteps: break return episode_reward
def cannyEdge(I): # convert RGB image to gray color space im_gray = utils.rgb2gray(I) Mag, Magx, Magy, Ori = findDerivatives(im_gray) M = nonMaxSup(Mag, Ori) E = edgeLink(M, Mag, Ori) # only when test passed that can show all results if Test_script(im_gray, E): # visualization results utils.visDerivatives(im_gray, Mag, Magx, Magy) utils.visCannyEdge(I, M, E) plt.show() return E
def detect(self, image): clone = image.copy() image = rgb2gray(image) detections = [] # 记录识别的目标 downscale_power = 0 # 当前下采样系数 # 迭代下采样 for im_scaled in pyramid(image, downscale=self.downscale, min_size=self.window_size): if im_scaled.shape[0] < self.window_size[1] or im_scaled.shape[ 1] < self.window_size[0]: # 如果采样尺度小于模板窗,就停止迭代 break for (x, y, im_window) in sliding_window(im_scaled, self.window_step_size, self.window_size): if im_window.shape[0] != self.window_size[ 1] or im_window.shape[1] != self.window_size[0]: continue feature_vector = hog(im_window, block_norm="L1") # 计算HOG特征 X = np.array([feature_vector]) prediction = self.clf.predict(X) if prediction == 1: x1 = int(x * (self.downscale**downscale_power)) y1 = int(y * (self.downscale**downscale_power)) detections.append( (x1, y1, x1 + int(self.window_size[0] * (self.downscale**downscale_power)), y1 + int(self.window_size[1] * (self.downscale**downscale_power)))) downscale_power += 1 # 移动到下一个尺度 clone_before_nms = clone.copy() # 用来显示NMS处理前的结果 for (x1, y1, x2, y2) in detections: cv2.rectangle(clone_before_nms, (x1, y1), (x2, y2), (0, 255, 0), thickness=2) # 描框 detections = non_max_suppression(np.array(detections), self.threshold) # NMS处理后的结果 clone_after_nms = clone # NMS处理后的结果 for (x1, y1, x2, y2) in detections: cv2.rectangle(clone_after_nms, (x1, y1), (x2, y2), (0, 255, 0), thickness=2) # 描框 return clone_before_nms, clone_after_nms
def simple_region_growing(img, bbox, threshold=10): """ A (very) simple implementation of region growing. Extracts a region of the input image depending on a start position and a stop condition. The input should be a single channel 8 bits image and the seed a pixel position (x, y). The threshold corresponds to the difference between outside pixel intensity and mean intensity of region. In case no new pixel is found, the growing stops. Outputs a single channel 8 bits binary (0 or 255) image. Extracted region is highlighted in white. """ img = rgb2gray(img) dims = img.shape # # CONVERTI IN GRAYSCALE SE RGB! # # # threshold tests # if not isinstance(threshold, int): # raise TypeError("(%s) Int expected!" % sys._getframe().f_code.co_name) # elif threshold < 0: # raise ValueError("(%s) Positive value expected!" % sys._getframe().f_code.co_name) # # # seed tests # if not((isinstance(seed, tuple)) and (len(seed) is 2) ) : # raise TypeError("(%s) (x, y) variable expected!" % sys._getframe().f_code.co_name) # if (seed[0] or seed[1] ) < 0 : # raise ValueError("(%s) Seed should have positive values!" % sys._getframe().f_code.co_name) # elif (seed[0] > dims[0]) or (seed[1] > dims[1]): # raise ValueError("(%s) Seed values greater than img size!" % sys._getframe().f_code.co_name) # IMG DI REGISTRAZIONE # reg = cv.CreateImage(dims, cv.IPL_DEPTH_8U, 1) # cv.Zero(reg) reg = np.zeros(shape=img.shape) if len(bbox) == 0: return reg # area of rectangle we don't want to exceed pix_area =dims[0]*dims[1] # seed is the central region of a 3x3 grid seed = [bbox[0]+bbox[2]/3, bbox[1]+bbox[3]/3, bbox[2]/3, bbox[3]/3] # parameters mean_reg = np.mean(img[seed[1]:seed[1]+seed[3], seed[0]:seed[0]+seed[2]]) #float(img[seed[1], seed[0]]) # initial region size size = img[seed[1]:seed[1]+seed[3], seed[0]:seed[0]+seed[2]].size contour = [] # will be [ [[x1, y1], val1],..., [[xn, yn], valn] ] contour_val = [] dist = 0 # TODO: may be enhanced later with 8th connectivity orient = [(1, 0), (0, 1), (-1, 0), (0, -1)] # 4 connectivity pixel_to_check = [] for y in range(seed[1], seed[1]+seed[3]+1): for x in range(seed[0], seed[0]+seed[2]): pixel_to_check.append([x, y]) #cur_pix = [seed[0], seed[1]] #Spreading while dist < threshold and size < pix_area: #adding pixels try: cur_pix = pixel_to_check.pop() except: break for j in range(4): #select new candidate temp_pix = [cur_pix[0] + orient[j][0], cur_pix[1] + orient[j][1]] #check if it belongs to the image is_in_img = dims[0] > temp_pix[0] > 0 and dims[1] > temp_pix[1] > 0 #returns boolean is_in_bbox = bbox[1] < temp_pix[1] and bbox[1] + bbox[3] > temp_pix[1] and \ bbox[0] < temp_pix[0] and bbox[0] + bbox[2] > temp_pix[0] #candidate is taken if not already selected before if is_in_img and reg[temp_pix[1], temp_pix[0]] == 0: contour.append(temp_pix) contour_val.append(img[temp_pix[1], temp_pix[0]]) reg[temp_pix[1], temp_pix[0]] = 150 #add the nearest pixel of the contour in it #dist = abs(int(numpy.mean(contour_val)) - mean_reg) # distanza punto medio temp da punto mean_reg (dei valori in scala di grigio dist_list = [abs(i - mean_reg) for i in contour_val] dist = min(dist_list) #get min distance #print dist index = dist_list.index(min(dist_list)) #mean distance index size += 1 # updating region size reg[cur_pix[1], cur_pix[0]] = 255 #updating mean MUST BE FLOAT mean_reg = (mean_reg*size + float(contour_val[index]))/(size+1) #updating seed cur_pix.append(contour[index]) #removing pixel from neigborhood del contour[index] del contour_val[index] return reg
def convolve2d(self): self.roi.play(lambda x: utils.cconv(ker, utils.rgb2gray(x).astype(float)/255))