def get_lines(img, rho, theta, threshold, min_line_len, max_line_gap): # convert to grayscale img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # perform gaussian blur blur = gaussian_blur(img_gray, kernel_size=17) # perform edge detection canny_edges = canny(blur, low_threshold=50, high_threshold=70) detected_lines = hough_lines(img=canny_edges, rho=rho, theta=theta, threshold=threshold, min_line_len=min_line_len, max_line_gap=max_line_gap) candidate_lines = [] for line in detected_lines: for x1, y1, x2, y2 in line: slope = get_slope(x1, y1, x2, y2) if 0.5 <= np.abs(slope) <= 2: candidate_lines.append({ "slope": slope, "bias": get_bias(slope, x1, y1) }) lane_lines = compute_candidates(candidate_lines, img_gray.shape) return lane_lines
def fine_lane_pipeline(image): imshape = image.shape xlength = imshape[1] ylength = imshape[0] gray = grayscale(image) # Define a kernel size and apply Gaussian smoothing blur_gray = gaussian_blur(gray, kernel_size=5) # Define our parameters for Canny and apply low_threshold = 50 high_threshold = 150 edges = canny(blur_gray, low_threshold, high_threshold) # Define the Hough transform parameters # Make a blank the same size as our image to draw on rho = 1 theta = np.pi / 180 threshold = 25 min_line_length = 10 max_line_gap = 5 # Run Hough on edge detected image line_image = hough_lines(edges, rho, theta, threshold, min_line_length, max_line_gap) vertices = np.array([[(0, ylength), (xlength / 2 - ylength / 10, ylength * 0.625), (xlength / 2 + ylength / 10, ylength * 0.625), (xlength, ylength)]], dtype=np.int32) combo = region_of_interest(line_image, vertices) combo = weighted_img(combo, image) return combo
def lane_detection_pipeline(img): """ Detect lane lines in an input image. Args: img: The original unmodified input image. Returns: A new image with lines drawn. """ # calculate shape (height, width, num_channels) = img.shape white_mask = utils.color_threshold(img, rgb_min=[150, 150, 150]) yellow_mask = utils.color_threshold(img, rgb_min=[150, 150, 0], rgb_max=[255, 255, 165]) color_mask = white_mask & yellow_mask # yellow_white = utils.boolean_mask(img, color_mask) # convert image to gray scale gray = utils.grayscale(img) # blur image with Gaussian smoothing blur = utils.gaussian_blur(gray, 5) # Define our parameters for Canny and apply edges = utils.canny(blur, 50, 150) # Only keep the portions that originally had white or yellow color_masked = utils.boolean_mask(edges, color_mask) # mask everything but the region of interest vertices = np.array([[(0, height), (width * 6 / 13, height * 3 / 5), (width * 7 / 13, height * 3 / 5), (width, height)]], dtype=np.int32) masked = utils.region_of_interest(color_masked, vertices) # Apply Hough transform detected_lines = utils.hough_lines( img=masked, rho=2, # distance resolution in pixels of the Hough grid theta=np.pi / 180, # angular resolution in radians of the Hough grid threshold=15, # min num of votes (intersections in Hough grid cell) min_line_len=40, # minimum number of pixels making up a line max_line_gap= 20 # maximum gap in pixels between connectable line segments ) # Overlay the detected lines on the original img output = utils.weighted_img(detected_lines, img) return output
def detect_smear_camer(camera): """ Detect smear in the given camera and output the mask for it Args: camera (int): Camera number """ mean = cv2.imread(os.path.join(data_path, os.listdir(data_path)[0]), cv2.IMREAD_GRAYSCALE) equ = cv2.equalizeHist(mean) mean = gaussian_blur(equ, 9) for n, img in enumerate(tqdm(os.listdir(data_path)[1:])): image = cv2.imread(os.path.join(data_path, img), cv2.IMREAD_GRAYSCALE) equ = cv2.equalizeHist(image) equ = gaussian_blur(equ, 9) mean = (mean * (n + 1) + equ) / (n + 2) mean = mean.astype(np.uint8) plot_bounding_box(mean, title_='mean camera ' + str(camera)) image = 255 - cv2.adaptiveThreshold(mean, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 75, 3) lines = custom_median_filter(image, 301, 1) lines = dilate(lines, 3) image = image - lines cv2.imwrite('outputs/mask_' + str(camera) + '.jpg', image) image = cv2.imread('outputs/mask_' + str(camera) + '.jpg', 0) image = median_blur(image, 75, 1) image = dilate(image, 7, 7) image = 255 - apply_thresholding_img(image, 2, 255) image = dilate(image, 5, 15) image = median_blur(image, 51, 5) plot_bounding_box(image, title_='mask camera ' + str(camera)) cv2.imwrite('outputs/mask_' + str(camera) + '.jpg', image)
def find_mean(folder, subtract): mean_img = cv2.imread(os.path.join(folder, os.listdir(folder)[0]), 0) mean_img_equ = cv2.equalizeHist(mean_img) mean_img_equ = cv2.equalizeHist(mean_img_equ) mean_img_gauss = cv2.GaussianBlur(mean_img_equ, (9, 9), 0) for idx, filename in enumerate(tqdm(os.listdir(folder)[1:])): img = cv2.imread(os.path.join(folder, filename), 0) if img is not None: img_equ = cv2.equalizeHist(img) img_gauss = gaussian_blur(img_equ, 11, iterations=2) mean_img_gauss = (mean_img_gauss * (idx + 1) + img_gauss) / (idx + 2) return mean_img_gauss
def process_image(image_original): # Note: always make a copy rather than simply using "=" image_copy = np.copy(image_original) # Read in and grayscale the image image_gray = grayscale(image_copy) # Define a kernel size and apply Gaussian smoothing kernel_size = 3 image_blur_gray = gaussian_blur(image_gray, kernel_size) # Define our parameters for Canny and apply the Canny transform low_threshold = 50 #35 high_threshold = 200 #70 image_edges_canny = canny(image_blur_gray, low_threshold, high_threshold) # Next we'll create a masked edges image using cv2.fillPoly() # We are defining a four sided polygon to mask imshape = image_original.shape if (imshape[1] == 960): #small image use this mask vertices = [(150, imshape[0]), (480, 300), (490, 300), (imshape[1] - 30, imshape[0])] else: #large image use this mask vertices = [(int(imshape[1] * 6 / 32), 660), (620, 430), (710, 430), (imshape[1] - 150, 650)] mask_polygon = np.array([vertices], dtype=np.int32) image_masked_edges_canny = region_of_interest(image_edges_canny, mask_polygon) # Define the Hough transform parameters rho = 2 #2,1 distance resolution in pixels of the Hough grid theta = np.pi / 180 # angular resolution in radians of the Hough grid threshold = 80 #8,15,1 minimum number of votes (intersections in Hough grid cell) meaning at least 15 points in image space need to be associated with each line segment min_line_length = 10 #40,10 minimum number of pixels making up a line max_line_gap = 13 #20,5 maximum gap in pixels between connectable line segments # Make a blank the same size as our image to draw on # Run Hough on edge detected image #It returns an image with lines drawn on it, a blank image (all black) with lines drawn on it image_hough_lines_masked = draw_hough_lines_extrapolate( image_copy, image_masked_edges_canny, rho, theta, threshold, min_line_length, max_line_gap) #To generate videos you need to return this image return image_hough_lines_masked
def reconstruct_stim(features, net, img_mean=np.array((0, 0, 0)).astype(np.float32), img_std=np.array((1, 1, 1)).astype(np.float32), norm=255, bgr=False, initial_input=None, input_size=(224, 224, 3), feature_masks=None, layer_weight=None, channel=None, mask=None, opt_name='SGD', prehook_dict = {}, lr_start=0.02, lr_end=1e-12, momentum_start=0.009, momentum_end=0.009, decay_start=0.02, decay_end=1e-11, grad_normalize = True, image_jitter=False, jitter_size=4, image_blur=True, sigma_start=2, sigma_end=0.5, p=3, lamda=0.5, TVlambda = [0,0], clip_extreme=False, clip_extreme_every=4, e_pct_start=1, e_pct_end=1, clip_small_norm=False, clip_small_norm_every=4, n_pct_start=5., n_pct_end=5., loss_type='l2', iter_n=200, save_intermediate=False, save_intermediate_every=1, save_intermediate_path=None, disp_every=1, ): if loss_type == "l2": loss_fun = torch.nn.MSELoss(reduction='sum') elif loss_type == "L2_with_reg": loss_fun = MSE_with_regulariztion(L_lambda=lamda, alpha=p, TV_lambda=TVlambda) else: assert loss_type + ' is not correct' # make save dir if save_intermediate: if save_intermediate_path is None: save_intermediate_path = os.path.join('..', 'recon_img_by_icnn' + datetime.now().strftime('%Y%m%dT%H%M%S')) if not os.path.exists(save_intermediate_path): os.makedirs(save_intermediate_path) # image size input_size = input_size # image mean img_mean = img_mean img_std = img_std norm = norm # image norm noise_img = np.random.randint(0, 256, (input_size)) img_norm0 = np.linalg.norm(noise_img) img_norm0 = img_norm0/2. # initial input if initial_input is None: initial_input = np.random.randint(0, 256, (input_size)) else: input_size = initial_input.shape if save_intermediate: if len(input_size) == 3: #image save_name = 'initial_image.jpg' if bgr: PIL.Image.fromarray(np.uint8(initial_input[...,[2,1,0]])).save(os.path.join(save_intermediate_path, save_name)) else: PIL.Image.fromarray(np.uint8(initial_input)).save(os.path.join(save_intermediate_path, save_name)) elif len(input_size) == 4: # video # if you install cv2 and ffmpeg, you can use save_video function which save preferred video as video format save_name = 'initial_video.avi' save_video(initial_input, save_name, save_intermediate_path, bgr) save_name = 'initial_video.gif' save_gif(initial_input, save_name, save_intermediate_path, bgr, fr_rate=150) else: print('Input size is not appropriate for save') assert len(input_size) not in [3,4] # layer_list layer_dict = features layer_list = list(features.keys()) # number of layers num_of_layer = len(layer_list) # layer weight if layer_weight is None: weights = np.ones(num_of_layer) weights = np.float32(weights) weights = weights / weights.sum() layer_weight = {} for j, layer in enumerate(layer_list): layer_weight[layer] = weights[j] # feature mask if feature_masks is None: feature_masks = create_feature_masks(layer_dict, masks=mask, channels=channel) # iteration for gradient descent input = initial_input.copy().astype(np.float32) if len(input_size) == 3: input = img_preprocess(input, img_mean, img_std, norm) else: input = vid_preprocess(input, img_mean, img_std, norm) loss_list = np.zeros(iter_n, dtype='float32') for t in range(iter_n): # parameters lr = lr_start + t * (lr_end - lr_start) / iter_n momentum = momentum_start + t * (momentum_end - momentum_start) / iter_n decay = decay_start + t * (decay_end - decay_start) / iter_n sigma = sigma_start + t * (sigma_end - sigma_start) / iter_n # shift if image_jitter: ox, oy = np.random.randint(-jitter_size, jitter_size+1, 2) input = np.roll(np.roll(input, ox, -1), oy, -2) # forward input = torch.tensor(input[np.newaxis], requires_grad=True) if opt_name == 'Adam': #op = optim.Adam([input], lr = lr) op = optim.Adam([input], lr = lr) elif opt_name == 'SGD': op = optim.SGD([input], lr=lr, momentum=momentum) #op = optim.SGD([input], lr=lr) elif opt_name == 'Adadelta': op = optim.Adadelta([input],lr = lr) elif opt_name == 'Adagrad': op = optim.Adagrad([input], lr = lr) elif opt_name == 'AdamW': op = optim.AdamW([input], lr = lr) elif opt_name == 'SparseAdam': op = optim.SparseAdam([input], lr = lr) elif opt_name == 'Adamax': op = optim.Adamax([input], lr = lr) elif opt_name == 'ASGD': op = optim.ASGD([input], lr = lr) elif opt_name == 'RMSprop': op = optim.RMSprop([input], lr = lr) elif opt_name == 'Rprop': op = optim.Rprop([input], lr = lr) fw = get_cnn_features(net, input, features.keys(), prehook_dict) # backward for net err = 0. loss = 0. # set the grad of network to 0 net.zero_grad() op.zero_grad() for j in range(num_of_layer): # op.zero_grad() target_layer_id = num_of_layer -1 -j target_layer = layer_list[target_layer_id] # extract activation or mask at input true video, and mask act_j = fw[target_layer_id].clone() feat_j = features[target_layer].clone() mask_j = feature_masks[target_layer] layer_weight_j = layer_weight[target_layer] masked_act_j = torch.masked_select(act_j, torch.FloatTensor(mask_j).bool()) masked_feat_j = torch.masked_select(feat_j, torch.FloatTensor(mask_j).bool()) # calculate loss using pytorch loss function loss_j = loss_fun(masked_act_j, masked_feat_j) * layer_weight_j # backward the gradient to the video loss_j.backward(retain_graph=True) loss += loss_j.detach().numpy() if grad_normalize: grad_mean = torch.abs(input.grad).mean() if grad_mean > 0: input.grad /= grad_mean op.step() input = input.detach().numpy()[0] err = err + loss loss_list[t] = loss # clip pixels with extreme value if clip_extreme and (t+1) % clip_extreme_every == 0: e_pct = e_pct_start + t * (e_pct_end - e_pct_start) / iter_n input = clip_extreme_value(input, e_pct) # clip pixels with small norm if clip_small_norm and (t+1) % clip_small_norm_every == 0: n_pct = n_pct_start + t * (n_pct_end - n_pct_start) / iter_n input = clip_small_norm_value(input, n_pct) # unshift if image_jitter: input = np.roll(np.roll(input, -ox, -1), -oy, -2) # L_2 decay input = (1-decay) * input # gaussian blur if image_blur: if len(input_size) == 3: input = gaussian_blur(input, sigma) else: for i in range(input.shape[1]): input[:, i] = gaussian_blur(input[:, i], sigma) # disp info if (t+1) % disp_every == 0: print('iter=%d; err=%g;' % (t+1, err)) # save image if save_intermediate and ((t+1) % save_intermediate_every == 0): if len(input_size) == 3: save_name = '%05d.jpg' % (t+1) PIL.Image.fromarray(normalise_img(img_deprocess(input, img_mean, img_std, norm))).save( os.path.join(save_intermediate_path, save_name)) else: save_stim = input # if you install cv2 and ffmpeg, you can use save_video function which save preferred video as video format save_name = '%05d.avi' % (t + 1) save_video(normalise_vid(vid_deprocess(save_stim, img_mean, img_std, norm)), save_name, save_intermediate_path, bgr, fr_rate=30) save_name = '%05d.gif' % (t + 1) save_gif(normalise_vid(vid_deprocess(save_stim, img_mean, img_std, norm)), save_name, save_intermediate_path, bgr, fr_rate=150) # return img if len(input_size) == 3: return img_deprocess(input, img_mean, img_std, norm), loss_list else: return vid_deprocess(input, img_mean, img_std, norm), loss_list
def process_image(image): original_image = image.copy() ysize = image.shape[0] xsize = image.shape[1] gray = grayscale(image) # Define a kernel size and apply Gaussian smoothing kernel_size = 5 blur_gray = gaussian_blur(gray, kernel_size) # Define our parameters for Canny and apply low_threshold = 50 high_threshold = 150 edges = canny(blur_gray, low_threshold, high_threshold) left_bottom = [0, ysize] right_bottom = [xsize, ysize] apex = [xsize / 2, ysize / 1.72] # This time we are defining a four sided polygon to mask vertices = np.array([[left_bottom, apex, apex, right_bottom]], dtype=np.int32) masked_edges = region_of_interest(edges, vertices) # Define the Hough transform parameters # Make a blank the same size as our image to draw on rho = 2 # distance resolution in pixels of the Hough grid theta = np.pi / 180 # angular resolution in radians of the Hough grid threshold = 15 # minimum number of votes (intersections in Hough grid cell) min_line_length = 40 # minimum number of pixels making up a line max_line_gap = 20 # maximum gap in pixels between connectable line segments line_image = np.copy(image) * 0 # creating a blank to draw lines on # Run Hough on edge detected image # Output "lines" is an array containing endpoints of detected line segments # lines = hough_lines(masked_edges, rho, theta, threshold, min_line_length, max_line_gap) lines = cv2.HoughLinesP(masked_edges, rho, theta, threshold, np.array([]), min_line_length, max_line_gap) left_points, right_points = separate_by_slope(lines) if left_points: # Find the slope based on the generated points for the left line slope = slope_from_lin_reg(left_points) # Calculate x for the largest y. i.e find the lowest point on the image part of the extrapolate line x2 = int(max(left_points)[0] + (ysize - max(left_points)[1]) / slope) up_left_point = max(left_points) down_left_point = [x2, ysize] draw_line(line_image, up_left_point, down_left_point) if right_points: slope = slope_from_lin_reg(right_points) # Calculate x for the largest y. i.e find the lowest point on the image part of the extrapolate line x2 = int(max(right_points)[0] + (ysize - max(right_points)[1]) / slope) up_right_point = min(right_points) down_right_point = [x2, ysize] draw_line(line_image, up_right_point, down_right_point) # Draw the lines on the edge image lines_edges = weighted_img(line_image, original_image) return lines_edges
def generate_preferred_tmp(net, exec_code, channel=None, feature_mask=None, img_mean=(0, 0, 0), img_std=(1, 1, 1), norm=255, input_size=(224, 224, 3), bgr=False, feature_weight=1., initial_input=None, iter_n=200, lr_start=1., lr_end=1., momentum_start=0.001, momentum_end=0.001, decay_start=0.001, decay_end=0.001, grad_normalize=True, image_jitter=True, jitter_size=32, jitter_size_z=2, image_blur=True, sigma_xy_start=2.5, sigma_xy_end=0.5, sigma_t_start=0.01, sigma_t_end=0.002, use_p_norm_reg=False, p=2, lamda_start=0.5, lamda_end=0.5, use_TV_norm_reg=False, TVbeta1=2, TVbeta2=2, TVlamda_start_sp=0.5, TVlamda_end_sp=0.5, TVlamda_start_tmp=0.5, TVlamda_end_tmp=0.5, clip_extreme=False, clip_extreme_every=4, e_pct_start=1, e_pct_end=1, clip_small_norm=False, clip_small_norm_every=4, n_pct_start=5., n_pct_end=5., clip_small_contribution=False, clip_small_contribution_every=4, c_pct_start=5., c_pct_end=5., disp_every=1, save_intermediate=False, save_intermediate_every=1, save_intermediate_path=None): '''Generate preferred image/video for the target uints using gradient descent with momentum. Parameters ---------- net: torch.nn.Module CNN model coresponding to the target CNN features. feature_mask: ndarray The mask used to select the target units. The shape of the mask should be the same as that of the CNN features in that layer. The values of the mask array are binary, (1: target uint; 0: irrelevant unit) exec_code: list The code to extract intermidiate layer. This code is run in the 'get_cnn_feature' function img_mean: np.ndarray set the mean in rgb order to pre/de-process to input/output image/video img_std : np.ndarray set the std in rgb order to pre/de-process to input/output image/video input_size: np.ndarray the shape correspond to the CNN available input Optional Parameters ---------- feature_weight: float or ndarray The weight for each target unit. If it is scalar, the scalar will be used as the universal weight for all units. If it is numpy array, it allows to specify different weights for different uints. initial_input: ndarray Initial image for the optimization. Use random noise as initial image by setting to None. iter_n: int The total number of iterations. lr_start: float The learning rate at start of the optimization. The learning rate will linearly decrease from lr_start to lr_end during the optimization. lr_end: float The learning rate at end of the optimization. The learning rate will linearly decrease from lr_start to lr_end during the optimization. momentum_start: float The momentum (gradient descend with momentum) at start of the optimization. The momentum will linearly decrease from momentum_start to momentum_end during the optimization. momentum_end: float The momentum (gradient descend with momentum) at the end of the optimization. The momentum will linearly decrease from momentum_start to momentum_end during the optimization. decay_start: float The decay rate of the image pixels at start of the optimization. The decay rate will linearly decrease from decay_start to decay_end during the optimization. decay_end: float The decay rate of the image pixels at the end of the optimization. The decay rate will linearly decrease from decay_start to decay_end during the optimization. grad_normalize: bool Normalise the gradient or not for each iteration. image_jitter: bool Use image jittering or not. If true, randomly shift the intermediate reconstructed image for each iteration. jitter_size: int image jittering in number of pixels. image_blur: bool Use image smoothing or not. If true, smoothing the image for each iteration. sigma_start: float The size of the gaussian filter for image smoothing at start of the optimization. The sigma will linearly decrease from sigma_start to sigma_end during the optimization. sigma_end: float The size of the gaussian filter for image smoothing at the end of the optimization. The sigma will linearly decrease from sigma_start to sigma_end during the optimization. use_p_norm_reg: bool Use p-norm loss for image or not as regularization term. p: float The order of the p-norm loss of image lamda_start: float The weight for p-norm loss at start of the optimization. The lamda will linearly decrease from lamda_start to lamda_end during the optimization. lamda_end: float The weight for p-norm loss at the end of the optimization. The lamda will linearly decrease from lamda_start to lamda_end during the optimization. use_TV_norm_reg: bool Use TV-norm or not as regularization term. TVbeta: float The order of the TV-norm. TVlamda_start: float The weight for TV-norm regularization term at start of the optimization. The TVlamda will linearly decrease from TVlamda_start to TVlamda_end during the optimization. TVlamda_end: float The weight for TV-norm regularization term at the end of the optimization. The TVlamda will linearly decrease from TVlamda_start to TVlamda_end during the optimization. clip_extreme: bool Clip or not the pixels with extreme high or low value. clip_extreme_every: int Clip the pixels with extreme value every n iterations. e_pct_start: float the percentage of pixels to be clipped at start of the optimization. The percentage will linearly decrease from e_pct_start to e_pct_end during the optimization. e_pct_end: float the percentage of pixels to be clipped at the end of the optimization. The percentage will linearly decrease from e_pct_start to e_pct_end during the optimization. clip_small_norm: bool Clip or not the pixels with small norm of RGB valuse. clip_small_norm_every: int Clip the pixels with small norm every n iterations n_pct_start: float The percentage of pixels to be clipped at start of the optimization. The percentage will linearly decrease from n_pct_start to n_pct_end during the optimization. n_pct_end: float The percentage of pixels to be clipped at start of the optimization. The percentage will linearly decrease from n_pct_start to n_pct_end during the optimization. clip_small_contribution: bool Clip or not the pixels with small contribution: norm of RGB channels of (img*grad). clip_small_contribution_every: int Clip the pixels with small contribution every n iterations. c_pct_start: float The percentage of pixels to be clipped at start of the optimization. The percentage will linearly decrease from c_pct_start to c_pct_end during the optimization. c_pct_end: float The percentage of pixels to be clipped at the end of the optimization. The percentage will linearly decrease from c_pct_start to c_pct_end during the optimization. disp_every: int Display the optimization information for every n iterations. save_intermediate: bool Save the intermediate reconstruction or not. save_intermediate_every: int Save the intermediate reconstruction for every n iterations. save_intermediate_path: str The path to save the intermediate reconstruction. Returns ------- img: ndarray The preferred image/video same shape as input_size. ''' # make save dir if save_intermediate: if save_intermediate_path is None: save_intermediate_path = os.path.join( '.', 'preferred_gd_' + datetime.now().strftime('%Y%m%dT%H%M%S')) if not os.path.exists(save_intermediate_path): os.makedirs(save_intermediate_path, exist_ok=True) # initial input if initial_input is None: initial_input = np.random.randint(0, 256, (input_size)) else: input_size = initial_input.shape # image mean img_mean = img_mean img_std = img_std # image norm noise_vid = np.random.randint(0, 256, (input_size)) img_norm0 = np.linalg.norm(noise_vid) img_norm0 = img_norm0 / 2. if save_intermediate: if len(input_size) == 3: #image save_name = 'initial_video.jpg' if bgr: PIL.Image.fromarray(np.uint8( initial_input[..., [2, 1, 0]])).save( os.path.join(save_intermediate_path, save_name)) else: PIL.Image.fromarray(np.uint8(initial_input)).save( os.path.join(save_intermediate_path, save_name)) elif len(input_size) == 4: # video save_name = 'initial_video.avi' save_video(initial_input, save_name, save_intermediate_path, bgr) save_name = 'initial_video.gif' save_gif(initial_input, save_name, save_intermediate_path, bgr, fr_rate=150) else: print('Input size is not appropriate for save') assert len(input_size) not in [3, 4] # create feature mask if not define if feature_mask is None: feature_mask = create_feature_mask(net, exec_code, input_size, channel) # iteration for gradient descent init_input = initial_input.copy() if len(input_size) == 3: #Image input = img_preprocess(init_input, img_mean, img_std, norm) else: #Video input = vid_preprocess(init_input, img_mean, img_std, norm) delta_input = np.zeros_like(input) feat_grad = np.zeros_like(feature_mask) feat_grad[ feature_mask == 1] = -1. # here we use gradient descent, so the gradient is negative, in order to make the target units have high positive activation; feat_grad = feat_grad * feature_weight # Loss function (minus Loss) loss_fun = minusLoss() for t in range(iter_n): # parameters lr = lr_start + t * (lr_end - lr_start) / iter_n momentum = momentum_start + t * (momentum_end - momentum_start) / iter_n decay = decay_start + t * (decay_end - decay_start) / iter_n sigma_xy = sigma_xy_start + t * (sigma_xy_end - sigma_xy_start) / iter_n sigma_t = sigma_t_start + t * (sigma_t_end - sigma_t_start) / iter_n # shift if image_jitter: ox, oy = np.random.randint(-jitter_size, jitter_size + 1, 2) oz = np.random.randint(-jitter_size_z, jitter_size_z + 1, 1) input = np.roll(np.roll(np.roll(input, ox, -1), oy, -2), oz, -3) delta_input = np.roll( np.roll(np.roll(delta_input, ox, -1), oy, -2), oz, -3) # create Tensor input = torch.Tensor(input[np.newaxis]) input.requires_grad_() # forward fw = get_cnn_features(net, input, exec_code)[0] feat = torch.masked_select(fw, torch.ByteTensor(feature_mask)) feat_abs_mean = np.mean(np.abs(feat[0].detach().numpy())) #for the first time iteration, input.grad is None if input.grad is not None: input.grad.data.zero_() # zero grad net.zero_grad() # backward for net loss = loss_fun(feat) loss.backward() grad = input.grad.numpy() input = input.detach().numpy() # normalize gradient if grad_normalize: grad_mean = np.abs(grad).mean() if grad_mean > 0: grad = grad / grad_mean # gradient with momentum delta_input = delta_input * momentum + grad # p norm regularization if use_p_norm_reg: lamda = lamda_start + t * (lamda_end - lamda_start) / iter_n _, grad_r = p_norm(input, p) grad_r = grad_r / (img_norm0**2) if grad_normalize: grad_mean = np.abs(grad_r).mean() if grad_mean > 0: grad_r = grad_r / grad_mean delta_input = delta_input + lamda * grad_r # TV norm regularization if use_TV_norm_reg: TVlamda_sp = TVlamda_start_sp + t * (TVlamda_end_sp - TVlamda_start_sp) / iter_n if len(input_size) == 3: loss_r, grad_r = TV_norm(input, TVbeta1) loss_r = loss_r / (img_norm0**2) grad_r = grad_r / (img_norm0**2) if grad_normalize: grad_mean = np.abs(grad_r).mean() if grad_mean > 0: grad_r = grad_r / grad_mean delta_input = delta_input + TVlamda_sp * grad_r else: # spatial loss_r_sp, grad_r_sp = TV_norm_sp(input, TVbeta1) loss_r_sp = loss_r_sp / (img_norm0**2) grad_r_sp = grad_r_sp / (img_norm0**2) if grad_normalize: grad_mean_sp = np.abs(grad_r_sp).mean() if grad_mean > 0: grad_r_sp = grad_r_sp / grad_mean_sp # temporal TVlamda_tmp = TVlamda_start_tmp + t * ( TVlamda_end_tmp - TVlamda_start_tmp) / iter_n loss_r_tmp, grad_r_tmp = TV_norm_tmp(input, TVbeta2) loss_r_tmp = loss_r_tmp / (img_norm0**2) grad_r_tmmp = grad_r_tmp / (img_norm0**2) if grad_normalize: grad_mean_tmp = np.abs(grad_r_tmp).mean() if grad_mean > 0: grad_r_tmp = grad_r_tmp / grad_mean_tmp delta_input = delta_input + TVlamda_sp * grad_r_sp + TVlamda_tmp * grad_r_tmp # input update [0] means remove the newaxis input = np.add(input, -lr * delta_input, dtype=np.float32)[0] grad = grad[0] delta_input = delta_input[0] # clip pixels with extreme value if clip_extreme and (t + 1) % clip_extreme_every == 0: e_pct = e_pct_start + t * (e_pct_end - e_pct_start) / iter_n input = clip_extreme_pixel(input, e_pct) # clip pixels with small norm if clip_small_norm and (t + 1) % clip_small_norm_every == 0: n_pct = n_pct_start + t * (n_pct_end - n_pct_start) / iter_n input = clip_small_norm_pixel(input, n_pct) # clip pixels with small contribution if clip_small_contribution and ( t + 1) % clip_small_contribution_every == 0: c_pct = c_pct_start + t * (c_pct_end - c_pct_start) / iter_n input = clip_small_contribution_pixel(input, grad, c_pct) # unshift if image_jitter: input = np.roll(np.roll(np.roll(input, -ox, -1), -oy, -2), -oz, -3) delta_input = delta_input - grad delta_input = np.roll( np.roll(np.roll(delta_input, -ox, -1), -oy, -2), -oz, -3) delta_input = delta_input + grad # L_2 decay input = (1 - decay) * input # gaussian blur if image_blur: if len(input_size) == 3: input = gaussian_blur(input, sigma) else: input = gaussian_blur_vid(input, sigma_xy, sigma_t) # disp info if (t + 1) % disp_every == 0: print('iter=%d; mean(abs(feat))=%g;' % (t + 1, feat_abs_mean)) # save image if save_intermediate and ((t + 1) % save_intermediate_every == 0): if len(input_size) == 3: save_name = '%05d.jpg' % (t + 1) if bgr: PIL.Image.fromarray( normalise_img( img_deprocess(input, img_mean, img_std, norm)[..., [2, 1, 0]])).save( os.path.join( save_intermediate_path, save_name)) else: PIL.Image.fromarray( normalise_img( img_deprocess(input, img_mean, img_std, norm))).save( os.path.join( save_intermediate_path, save_name)) else: save_name = '%05d.avi' % (t + 1) save_video(normalise_vid( vid_deprocess(input, img_mean, img_std, norm)), save_name, save_intermediate_path, bgr, fr_rate=10) save_name = '%05d.gif' % (t + 1) save_gif(normalise_vid( vid_deprocess(input, img_mean, img_std, norm)), save_name, save_intermediate_path, bgr, fr_rate=150) # return input if len(input_size) == 3: return img_deprocess(input, img_mean, img_std, norm) else: return vid_deprocess(input, img_mean, img_std, norm)
images = ['challenge.jpg'] for image_name in images: # Read in and grayscale the image # image_name = 'solidYellowCurve2.jpg' # image_name = 'solidWhiteCurve.jpg' image = mpimg.imread('./test_images_challenge/'+image_name) # gray = cv2.cvtColor(image,cv2.COLOR_RGB2GRAY) gray = utils.grayscale(image) # Define a kernel size and apply Gaussian smoothing kernel_size = 5 # blur_gray = cv2.GaussianBlur(gray,(kernel_size, kernel_size),0) blur_gray = utils.gaussian_blur(gray, kernel_size) # Define our parameters for Canny and apply low_threshold = 50 high_threshold = 150 # edges = cv2.Canny(blur_gray, low_threshold, high_threshold) edges = utils.canny(blur_gray, low_threshold, high_threshold) plt.subplot(221),plt.imshow(image,cmap = 'gray') plt.title('Original Image \n{}'.format(image_name) ) plt.subplot(222),plt.imshow(edges,cmap = 'gray') plt.title('Canny Edges \n{}'.format(image_name) ) # Next we'll create a masked edges image using cv2.fillPoly() mask = np.zeros_like(edges) ignore_mask_color = 255
def detect(fname): image = mpimg.imread(fname + '.jpeg') height, width = image.shape[:2] image = cv2.resize(image, (1280, 720))[:, :, :3] image_original = image kernel_size = 5 img_size = np.shape(image) ht_window = np.uint(img_size[0] / 1.5) hb_window = np.uint(img_size[0]) c_window = np.uint(img_size[1] / 2) ctl_window = c_window - .36 * np.uint(img_size[1] / 2) ctr_window = c_window + .36 * np.uint(img_size[1] / 2) cbl_window = c_window - 0.9 * np.uint(img_size[1] / 2) cbr_window = c_window + 0.9 * np.uint(img_size[1] / 2) src = np.float32([[cbl_window, hb_window], [cbr_window, hb_window], [ctr_window, ht_window], [ctl_window, ht_window]]) dst = np.float32([[0, img_size[0]], [img_size[1], img_size[0]], [img_size[1], 0], [0, 0]]) warped, M_warp, Minv_warp = utils.warp_image(image, src, dst, (img_size[1], img_size[0])) image_HSV = cv2.cvtColor(warped, cv2.COLOR_RGB2HSV) yellow_hsv_low = np.array([0, 100, 100]) yellow_hsv_high = np.array([80, 255, 255]) res_mask = utils.color_mask(image_HSV, yellow_hsv_low, yellow_hsv_high) res = utils.apply_color_mask(image_HSV, warped, yellow_hsv_low, yellow_hsv_high) image_HSV = cv2.cvtColor(warped, cv2.COLOR_RGB2HSV) white_hsv_low = np.array([0, 0, 160]) white_hsv_high = np.array([255, 80, 255]) res1 = utils.apply_color_mask(image_HSV, warped, white_hsv_low, white_hsv_high) mask_yellow = utils.color_mask(image_HSV, yellow_hsv_low, yellow_hsv_high) mask_white = utils.color_mask(image_HSV, white_hsv_low, white_hsv_high) mask_lane = cv2.bitwise_or(mask_yellow, mask_white) image = utils.gaussian_blur(warped, kernel=5) image_HLS = cv2.cvtColor(warped, cv2.COLOR_RGB2HLS) img_gs = image_HLS[:, :, 1] sobel_c = utils.sobel_combined(img_gs) img_abs_x = utils.abs_sobel_thresh(img_gs, 'x', 5, (50, 225)) img_abs_y = utils.abs_sobel_thresh(img_gs, 'y', 5, (50, 225)) wraped2 = np.copy(cv2.bitwise_or(img_abs_x, img_abs_y)) img_gs = image_HLS[:, :, 2] sobel_c = utils.sobel_combined(img_gs) img_abs_x = utils.abs_sobel_thresh(img_gs, 'x', 5, (50, 255)) img_abs_y = utils.abs_sobel_thresh(img_gs, 'y', 5, (50, 255)) wraped3 = np.copy(cv2.bitwise_or(img_abs_x, img_abs_y)) image_cmb = cv2.bitwise_or(wraped2, wraped3) image_cmb = utils.gaussian_blur(image_cmb, 3) image_cmb = cv2.bitwise_or(wraped2, wraped3) image_cmb1 = np.zeros_like(image_cmb) image_cmb1[(mask_lane >= .5) | (image_cmb >= .5)] = 1 mov_filtsize = img_size[1] / 50. mean_lane = np.mean(image_cmb1[img_size[0] / 2:, :], axis=0) indexes = find_peaks_cwt(mean_lane, [100], max_distances=[800]) window_size = 50 val_ind = np.array([mean_lane[indexes[i]] for i in range(len(indexes))]) ind_sorted = np.argsort(-val_ind) ind_peakR = indexes[ind_sorted[0]] ind_peakL = indexes[ind_sorted[1]] if ind_peakR < ind_peakL: ind_temp = ind_peakR ind_peakR = ind_peakL ind_peakL = ind_temp n_vals = 8 ind_min_L = ind_peakL - 50 ind_max_L = ind_peakL + 50 ind_min_R = ind_peakR - 50 ind_max_R = ind_peakR + 50 mask_L_poly = np.zeros_like(image_cmb1) mask_R_poly = np.zeros_like(image_cmb1) ind_peakR_prev = ind_peakR ind_peakL_prev = ind_peakL for i in range(8): img_y1 = img_size[0] - img_size[0] * i / 8 img_y2 = img_size[0] - img_size[0] * (i + 1) / 8 mean_lane_y = np.mean(image_cmb1[img_y2:img_y1, :], axis=0) indexes = find_peaks_cwt(mean_lane_y, [100], max_distances=[800]) if len(indexes) > 1.5: val_ind = np.array( [mean_lane[indexes[i]] for i in range(len(indexes))]) ind_sorted = np.argsort(-val_ind) ind_peakR = indexes[ind_sorted[0]] ind_peakL = indexes[ind_sorted[1]] if ind_peakR < ind_peakL: ind_temp = ind_peakR ind_peakR = ind_peakL ind_peakL = ind_temp else: if len(indexes) == 1: if np.abs(indexes[0] - ind_peakR_prev) < np.abs(indexes[0] - ind_peakL_prev): ind_peakR = indexes[0] ind_peakL = ind_peakL_prev else: ind_peakL = indexes[0] ind_peakR = ind_peakR_prev else: ind_peakL = ind_peakL_prev ind_peakR = ind_peakR_prev if np.abs(ind_peakL - ind_peakL_prev) >= 100: ind_peakL = ind_peakL_prev if np.abs(ind_peakR - ind_peakR_prev) >= 100: ind_peakR = ind_peakR_prev mask_L_poly[img_y2:img_y1, ind_peakL - window_size:ind_peakL + window_size] = 1. mask_R_poly[img_y2:img_y1, ind_peakR - window_size:ind_peakR + window_size] = 1. ind_peakL_prev = ind_peakL ind_peakR_prev = ind_peakR mask_L_poly, mask_R_poly = utils.get_initial_mask(image_cmb1, 50, mean_lane) mask_L = mask_L_poly img_L = np.copy(image_cmb1) img_L = cv2.bitwise_and(img_L, img_L, mask=mask_L_poly) mask_R = mask_R_poly img_R = np.copy(image_cmb1) img_R = cv2.bitwise_and(img_R, img_R, mask=mask_R_poly) vals = np.argwhere(img_L > .5) all_x = vals.T[0] all_y = vals.T[1] left_fit = np.polyfit(all_x, all_y, 2) left_y = np.arange(11) * img_size[0] / 10 left_fitx = left_fit[0] * left_y**2 + left_fit[1] * left_y + left_fit[2] vals = np.argwhere(img_R > .5) all_x = vals.T[0] all_y = vals.T[1] right_fit = np.polyfit(all_x, all_y, 2) right_y = np.arange(11) * img_size[0] / 10 right_fitx = right_fit[0] * right_y**2 + right_fit[ 1] * right_y + right_fit[2] window_sz = 20 mask_L_poly = np.zeros_like(image_cmb1) mask_R_poly = np.zeros_like(image_cmb1) left_pts = [] right_pts = [] pt_y_all = [] for i in range(8): img_y1 = img_size[0] - img_size[0] * i / 8 img_y2 = img_size[0] - img_size[0] * (i + 1) / 8 pt_y = (img_y1 + img_y2) / 2 pt_y_all.append(pt_y) left_pt = np.round(left_fit[0] * pt_y**2 + left_fit[1] * pt_y + left_fit[2]) right_pt = np.round(right_fit[0] * pt_y**2 + right_fit[1] * pt_y + right_fit[2]) right_pts.append(right_fit[0] * pt_y**2 + right_fit[1] * pt_y + right_fit[2]) left_pts.append(left_fit[0] * pt_y**2 + left_fit[1] * pt_y + left_fit[2]) warp_zero = np.zeros_like(image_cmb1).astype(np.uint8) color_warp = np.dstack((warp_zero, warp_zero, warp_zero)) pts_left = np.array([np.transpose(np.vstack([left_fitx, left_y]))]) pts_right = np.array( [np.flipud(np.transpose(np.vstack([right_fitx, right_y])))]) pts = np.hstack((pts_left, pts_right)) cv2.fillPoly(color_warp, np.int_([pts]), (0, 255, 255)) col_L = (255, 255, 0) col_R = (255, 255, 255) utils.draw_pw_lines(color_warp, np.int_(pts_left), col_L) utils.draw_pw_lines(color_warp, np.int_(pts_right), col_R) newwarp = cv2.warpPerspective(color_warp, Minv_warp, (image.shape[1], image.shape[0])) result = cv2.addWeighted(image_original, 1, newwarp, 0.5, 0) grid = [] coordinates = [] a = [[left_fitx[i], i * 72] for i in range(0, 11)] b = [[right_fitx[i], i * 72] for i in range(0, 11)] c = np.concatenate([a, b]) c = np.array([c], dtype='float32') coordinates = cv2.perspectiveTransform(c, Minv_warp)[0] return coordinates, result