def process_image(img, lanes=None, SAVE=""): if SAVE == "" and left_lane.savepath != "": savename = os.path.join(left_lane.savepath, left_lane.savename) else: savename = SAVE mtx, dist = cam_calib(SAVE=savename) # print("process :", fname) bin_img = gen_binary_images(img, mtx, dist, savename) #print("bin image shape", bin_img.shape, "type", bin_img.dtype) warp_img = persp_trans_forward(bin_img) index = np.copy(warp_img) warp_img[index[:, :] > 0] = 255 #print("warp image shape", warp_img.shape, "type", warp_img.dtype) save_image(warp_img, savename, "warp") lanes = detect_lanes(warp_img, prev_lanes=None, save_path=savename) left_fitx, ploty, right_fitx = gen_fit_line(warp_img, lanes[0], lanes[1]) out_img = plot_lane(img, left_fitx, ploty, right_fitx) # Distance from center dist_x = dist_from_center(left_fitx, right_fitx) # Radius of curvature curverad = get_curverad(ploty, left_fitx, right_fitx) # Draw lane into original image, first do inverse perspective tranformation out_img = persp_trans_backward(out_img) out_img = cv2.addWeighted(img, .5, out_img, .5, 0.0, dtype=0) cv2.putText(out_img, "Radius: %.2fm" % curverad, (400, 650), cv2.FONT_HERSHEY_DUPLEX, 1.0, (255, 255, 255)) if dist_x > 0: cv2.putText(out_img, "Right from center: %.2fm" % (np.abs(dist_x)), (400, 700), cv2.FONT_HERSHEY_DUPLEX, 1.0, (255, 255, 255)) elif dist_x < 0: cv2.putText(out_img, "Left from center: %.2fm" % (np.abs(dist_x)), (400, 700), cv2.FONT_HERSHEY_DUPLEX, 1.0, (255, 255, 255)) else: cv2.putText(out_img, "Center", (400, 700), cv2.FONT_HERSHEY_DUPLEX, 1.0, (255, 255, 255)) # print("save name :", save_name) save_image(out_img, savename, "final") return out_img
def validate_perspective_transform(inputdir, outputdir): fnames = load_images(inputdir) for fname in fnames: image = cv2.imread(fname) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = undistort(image) basename = os.path.basename(fname) savename = os.path.join(outputdir, basename) src = np.copy(image) src = draw_src_rectangle(src) save_image(src, savename, 'persp_src') dst = persp_trans_forward(image) dst = draw_dst_rectangle(dst) save_image(dst, savename, 'persp_dst') basename = os.path.basename(fname) head, ext = os.path.splitext(basename) savename = os.path.join('output_images', head + '_perspective' + ext) w = image.shape[1] h = image.shape[0] dpi = 96 fig = plt.figure(figsize=(w / dpi, h / dpi)) plt.suptitle(fname) plt.subplot(121) plt.imshow(src) plt.title('Undistort Image') plt.subplot(122) plt.imshow(dst) plt.title('Perspective Transform') #plt.xlabel(fname) fig.tight_layout() fig.savefig(savename, dpi=dpi) plt.close()
def train(args): ##ALL REQUIRED INITIALIZATIONS #checking all saving directories if not os.path.exists(args.log_dir): os.makedirs(args.log_dir) if not os.path.exists(args.images_test_dir): os.makedirs(args.images_test_dir) if not os.path.exists(args.model_dir): os.makedirs(args.model_dir) print('Saving paths checked...') #Summary Writer writer = SummaryWriter(args.log_dir) print('Summary Writer Initialised') #SET DEVICE if torch.cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') print('DEVICE SET TO: ', device.type, '...') loss = CalculateLoss(args.vgg_path).to(device) #send to gpu print('Loss function loaded...') style_net = style_network().to(device) print('Style Network loaded...') optimizer = Adam(style_net.parameters(), args.lr) print('Optimizer set...') #Initialising style image tensor style_transform = transforms.Compose([ transforms.ToTensor( ), # turn image from [0-255] to [0-1] and convert to tensor transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # normalize with ImageNet values ]) style_img = load_image( args.style_image_path) #Style image is of the size 256 x 256 style_img = style_transform(style_img) style_img = style_img.repeat(args.batch_size, 1, 1, 1).to(device) loss.add_style_img(style_img) # Resume training on model start = 0 if args.load_model: filename = args.model_dir + args.load_model checkpoint_dict = torch.load(filename) style_net.load_state_dict(checkpoint_dict["model"]) optimizer.load_state_dict(checkpoint_dict["optimizer"]) start = checkpoint_dict["epoch"] + 1 print("Resuming training on model:{} and epoch:{}".format( args.load_model, start)) # Load all parameters to gpu style_net = style_net.to(device) for state in optimizer.state.values(): for key, value in state.items(): if isinstance(value, torch.Tensor): state[key] = value.to(device) #content images content_transform = transforms.Compose([ transforms.Scale(256), # scale shortest side to image_size transforms.CenterCrop(256), # crop center image_size out transforms.ToTensor(), # turn image from [0-255] to [0-1] transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # normalize with ImageNet values ]) #data loader for content images (train + val) train_dataset = datasets.ImageFolder(args.train_dir, content_transform) val_dataset = datasets.ImageFolder(args.val_dir, content_transform) #Load testing images test_images = [] images = ['lighthouse.jpg', 'pier.jpg', 'sfbridge.jpg', 'skyline.jpg'] for name in images: testImage = load_image(args.test_img_dir + '/' + name) testImage = content_transform(testImage) test_images.append(testImage.repeat(1, 1, 1, 1)) b = float(args.batch_size) t_s = time.time() for i in range(start, start + args.epochs): content_loader = DataLoader(train_dataset, batch_size=args.batch_size, drop_last=True, shuffle=True) N = len(content_loader) #print(N) #train over whole dataset in 1 epoch for j, batch in enumerate(content_loader): batch_train_img = batch[0].to(device) output = style_net(batch_train_img) #print('output extracted') #zero out gradients optimizer.zero_grad() total_loss, style_loss_i, content_loss_i, tv_loss_i = loss( batch_train_img, output) total_loss_i = total_loss.item() #backprop total_loss.backward() optimizer.step() #Save train loss if (j) % args.log_interval == 0: writer.add_scalar('train_total_loss', total_loss_i / b, (i * N + j)) writer.add_scalar('train_style_loss', style_loss_i / b, (i * N + j)) writer.add_scalar('train_content_loss', content_loss_i / b, (i * N + j)) writer.add_scalar('train_tv_loss', tv_loss_i / b, (i * N + j)) writer.file_writer.flush() #print('Saved train loss...') print(total_loss_i / b) #Save val image if (j) % args.val_interval == 0: style_net.eval() val_loader = DataLoader(val_dataset, batch_size=args.batch_size, drop_last=True) val_n = len(val_loader) val_total_loss_c = 0.0 val_style_loss_c = 0.0 val_content_loss_c = 0.0 val_tv_loss_c = 0.0 for k, batch_val in enumerate(val_loader): batch_val_img = batch[0].to(device) output_val = style_net(batch_val_img) val_total_loss, val_style_loss_i, val_content_loss_i, val_tv_loss_i = loss( batch_val_img, output_val) val_total_loss_c += val_total_loss.item() val_style_loss_c += val_style_loss_i val_content_loss_c += val_content_loss_i val_tv_loss_c += val_tv_loss_i writer.add_scalar('val_total_loss', val_total_loss_c / (val_n * b), (i * N + j)) writer.add_scalar('val_style_loss', val_style_loss_c / (val_n * b), (i * N + j)) writer.add_scalar('val_content_loss', val_content_loss_c / (val_n * b), (i * N + j)) writer.add_scalar('val_tv_loss', val_tv_loss_c / (val_n * b), (i * N + j)) style_net.train() #print('Saved val loss...') print(val_total_loss_c / (val_n * b)) #Save test image if (j) % args.test_image_interval == 0 or (j) == N - 1: style_net.eval() k = 0 for img in test_images: outputTestImage = style_net(img.to(device)).cpu() path = args.images_test_dir + ( "/test_k{}_e{}_i{}.jpg".format(k, i, j)) save_image(path, outputTestImage.data[0]) k = k + 1 style_net.train() #Save model # Save model if (j) % args.model_interval == 0 or (j) == N - 1: filename = args.model_dir + "/model_e{}_i{}.pth".format(i, j) state = { "model": style_net.state_dict(), "optimizer": optimizer.state_dict(), "epoch": i } torch.save(state, filename) #print('Saved model') writer.close() t_e = time.time() print(t_e - t_s)
def do_camera_calibration(image_names, SAVE=""): # prepare object points, like (0,0,0), (1,0,0), (2,0,0) ....,(6,5,0) objp = np.zeros((6 * 9, 3), np.float32) objp[:, :2] = np.mgrid[0:9, 0:6].T.reshape(-1, 2) # Arrays to store object points and image points from all the images. objpoints = [] # 3d points in real world space imgpoints = [] # 2d points in image plane. # Step through the list and search for chessboard corners for image_name in image_names: img = cv2.imread(image_name) # save original images into save dir if SAVE != "": basename = os.path.basename(image_name) head, tail = os.path.split(SAVE) savename = os.path.join(head, basename) save_image(img, savename, append="original") gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Find the chessboard corners ret, corners = cv2.findChessboardCorners(gray, (9, 6), None) # If found, add object points, image points if ret is True: objpoints.append(objp) imgpoints.append(corners) # Draw and save the chessboard corners if SAVE != "": basename = os.path.basename(image_name) head, tail = os.path.split(SAVE) savename = os.path.join(head, basename) chess_img = cv2.drawChessboardCorners(img, (9, 6), corners, ret) # print("save :", savename) save_image(chess_img, savename, append="chess") else: print("can't fine chess board ", image_name) print("Found", len(imgpoints), "images with chessboard corners from", len(image_names), "images.") ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera(objpoints, imgpoints, gray.shape[::-1], None, None) if SAVE != "": for image_name in image_names: img = cv2.imread(image_name) basename = os.path.basename(image_name) head, tail = os.path.split(SAVE) savename = os.path.join(head, basename) undist = cv2.undistort(img, mtx, dist, None, mtx) # print("save :", savename) save_image(undist, savename, append="undistort") return mtx, dist
def gen_binary_images(img, mtx, dist, SAVE=""): # save original image if SAVE == "" and left_lane.savepath != "": savename = os.path.join(left_lane.savepath, left_lane.savename) else: savename = SAVE save_image(img, savename, "original") # undistort image dst = cv2.undistort(img, mtx, dist, None, mtx) save_image(dst, savename, "undistort") # Convert to HLS color space and separate the S channel # Note: img is the undistorted image hls = cv2.cvtColor(dst, cv2.COLOR_BGR2HLS) s_channel = hls[:, :, 2] l_channel = hls[:, :, 1] # Grayscale image # NOTE: we already saw that standard grayscaling lost color information for # the lane lines # Explore gradients in other colors spaces / color channels to see what # might work better gray = cv2.cvtColor(dst, cv2.COLOR_BGR2GRAY) # Sobel x sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0) # Take the derivative in x # Absolute x derivative to accentuate lines away from horizontal abs_sobelx = np.absolute(sobelx) scaled_sobel = np.uint8(255 * abs_sobelx / np.max(abs_sobelx)) # Threshold x gradient thresh_min = 20 thresh_max = 100 sxbinary = np.zeros_like(scaled_sobel) sxbinary[(scaled_sobel >= thresh_min) & (scaled_sobel <= thresh_max)] = 1 # Threshold luma channel l_thresh_min = 30 #l_binary = np.zeros_like(l_channel) #l_binary[l_channel >= l_thresh_min] = 1 # Threshold color channel s_thresh_min = 170 s_thresh_max = 255 s_binary = np.zeros_like(s_channel) s_binary[(s_channel >= s_thresh_min) & (s_channel <= s_thresh_max) & (l_channel >= l_thresh_min)] = 1 # Stack each channel to view their individual contributions in green and # blue respectively # This returns a stack of the two binary images, whose components you can # see as different colors color_binary = np.dstack((sxbinary, s_binary, np.zeros_like(sxbinary))) color_binary[(sxbinary == 1), 0] = 255 color_binary[(s_binary == 1), 1] = 255 save_image(color_binary, savename, "color_stack") # Combine the two binary thresholds combined_binary = np.zeros_like(sxbinary) combined_binary[(s_binary == 1) | (sxbinary == 1)] = 255 save_image(combined_binary, savename, "combined") return combined_binary
def detect_lanes(image, prev_lanes=None, save_path=""): global left_lane global right_lane if save_path == "" and left_lane.savepath != "": savename = os.path.join(left_lane.savepath, left_lane.savename) else: savename = save_path # Choose the number of sliding windows nwindows = 9 left_center_line = [] right_center_line = [] if left_lane.detected == False: # Take a histogram of the bottom half of the image bot_histogram = np.sum(image[int(image.shape[0] / 2):, :], axis=0) top_histogram = np.sum(image[:int(image.shape[0] / 2), :], axis=0) save_hist(bot_histogram, savename, "bottom_hist") save_hist(top_histogram, savename, "top_hist") # Create an output image to draw on and visualize the result out_img = np.dstack((image, image, image)) * 255 # Find the peak of the left and right halves of the histogram # These will be the starting point for the left and right lines leftx_base, rightx_base = gen_from_hist(bot_histogram, top_histogram) # Set height of windows window_height = np.int(image.shape[0] / nwindows) # Identify the x and y positions of all nonzero pixels in the image nonzero = image.nonzero() nonzeroy = np.array(nonzero[0]) nonzerox = np.array(nonzero[1]) # Current positions to be updated for each window leftx_current = leftx_base rightx_current = rightx_base # Set the width of the windows +/- margin margin = 50 # Set minimum number of pixels found to recenter window minpix = 50 # Create empty lists to receive left and right lane pixel indices left_lane_inds = [] right_lane_inds = [] half_nwindow = int(np.floor(nwindows / 2)) center_y_low = half_nwindow * window_height center_y_hi = center_y_low if (nwindows % 2 != 0): left_center_line.append(leftx_current) right_center_line.append(rightx_current) center_y_hi = (half_nwindow + 1) * window_height win_y_low = center_y_low win_y_high = center_y_hi win_xleft_low = leftx_current - margin win_xleft_high = leftx_current + margin win_xright_low = rightx_current - margin win_xright_high = rightx_current + margin # Identify the nonzero pixels in x and y within the window good_left_inds = ((nonzeroy >= win_y_low) & (nonzeroy < win_y_high) & (nonzerox >= win_xleft_low) & (nonzerox < win_xleft_high)).nonzero()[0] good_right_inds = ((nonzeroy >= win_y_low) & (nonzeroy < win_y_high) & (nonzerox >= win_xright_low) & (nonzerox < win_xright_high)).nonzero()[0] # Append these indices to the lists left_lane_inds.append(good_left_inds) right_lane_inds.append(good_right_inds) # If you found > minpix pixels, recenter next window on their mean # position if len(good_left_inds) > minpix: leftx_current = np.int(np.mean(nonzerox[good_left_inds])) left_center_line.append(leftx_current) else: left_center_line.append(None) if len(good_right_inds) > minpix: rightx_current = np.int(np.mean(nonzerox[good_right_inds])) right_center_line.append(rightx_current) else: right_center_line.append(None) # Step through the windows one by one top_leftx_current = leftx_current bot_leftx_current = leftx_current top_rightx_current = rightx_current bot_rightx_current = rightx_current for window in range(half_nwindow): # Top # Identify window boundaries in x and y (and right and left) win_y_low = center_y_low - (window + 1) * window_height win_y_high = center_y_low - window * window_height win_xleft_low = top_leftx_current - margin win_xleft_high = top_leftx_current + margin win_xright_low = top_rightx_current - margin win_xright_high = top_rightx_current + margin # Identify the nonzero pixels in x and y within the window good_left_inds = ((nonzeroy >= win_y_low) & (nonzeroy < win_y_high) & (nonzerox >= win_xleft_low) & (nonzerox < win_xleft_high)).nonzero()[0] good_right_inds = ((nonzeroy >= win_y_low) & (nonzeroy < win_y_high) & (nonzerox >= win_xright_low) & (nonzerox < win_xright_high)).nonzero()[0] # Append these indices to the lists left_lane_inds.append(good_left_inds) right_lane_inds.append(good_right_inds) # If you found > minpix pixels, recenter next window on their mean # position if len(good_left_inds) > minpix: top_leftx_current = np.int(np.mean(nonzerox[good_left_inds])) left_center_line.append(top_leftx_current) else: left_center_line.append(None) if len(good_right_inds) > minpix: top_rightx_current = np.int(np.mean(nonzerox[good_right_inds])) right_center_line.append(top_rightx_current) else: right_center_line.append(None) # bottom # Identify window boundaries in x and y (and right and left) win_y_low = center_y_hi + window * window_height win_y_high = center_y_hi + (window + 1) * window_height win_xleft_low = bot_leftx_current - margin win_xleft_high = bot_leftx_current + margin win_xright_low = bot_rightx_current - margin win_xright_high = bot_rightx_current + margin # Identify the nonzero pixels in x and y within the window good_left_inds = ((nonzeroy >= win_y_low) & (nonzeroy < win_y_high) & (nonzerox >= win_xleft_low) & (nonzerox < win_xleft_high)).nonzero()[0] good_right_inds = ((nonzeroy >= win_y_low) & (nonzeroy < win_y_high) & (nonzerox >= win_xright_low) & (nonzerox < win_xright_high)).nonzero()[0] # Append these indices to the lists left_lane_inds.insert(0, good_left_inds) right_lane_inds.insert(0, good_right_inds) # If you found > minpix pixels, recenter next window on their mean # position if len(good_left_inds) > minpix: bot_leftx_current = np.int(np.mean(nonzerox[good_left_inds])) left_center_line.insert(0, bot_leftx_current) else: left_center_line.insert(0, None) if len(good_right_inds) > minpix: bot_rightx_current = np.int(np.mean(nonzerox[good_right_inds])) right_center_line.insert(0, bot_rightx_current) else: right_center_line.insert(0, None) # Concatenate the arrays of indices left_lane_inds = np.concatenate(left_lane_inds) right_lane_inds = np.concatenate(right_lane_inds) else: pred_left_fit = left_lane.best_fit pred_right_fit = right_lane.best_fit nonzero = image.nonzero() nonzeroy = np.array(nonzero[0]) nonzerox = np.array(nonzero[1]) margin = 100 pred_center = pred_left_fit[0] * \ (nonzeroy ** 2) + pred_left_fit[1] * nonzeroy + pred_left_fit[2] left_lane_boarder0 = pred_center - margin left_lane_boarder1 = pred_center + margin left_lane_inds = ((nonzerox > left_lane_boarder0) & (nonzerox < left_lane_boarder1)) pred_center = pred_right_fit[0] * \ (nonzeroy ** 2) + pred_right_fit[1] * nonzeroy + pred_right_fit[2] right_lane_boarder0 = pred_center - margin right_lane_boarder1 = pred_center + margin right_lane_inds = ((nonzerox > right_lane_boarder0) & (nonzerox < right_lane_boarder1)) # Extract left and right line pixel positions leftx = nonzerox[left_lane_inds] lefty = nonzeroy[left_lane_inds] rightx = nonzerox[right_lane_inds] righty = nonzeroy[right_lane_inds] # Fit a second order polynomial to each pred_left_fit = np.polyfit(lefty, leftx, 2) pred_right_fit = np.polyfit(righty, rightx, 2) # Generate x and y values for plotting ploty = np.linspace(0, image.shape[0] - 1, image.shape[0]) left_fitx = pred_left_fit[0] * ploty ** 2 + \ pred_left_fit[1] * ploty + pred_left_fit[2] right_fitx = pred_right_fit[0] * ploty ** 2 + \ pred_right_fit[1] * ploty + pred_right_fit[2] # Create an image to draw on and an image to show the selection window out_img = np.dstack((image, image, image)) #* 255 window_img = np.zeros_like(out_img) # Color in left and right line pixels out_img[nonzeroy[left_lane_inds], nonzerox[left_lane_inds]] = [255, 0, 0] out_img[nonzeroy[right_lane_inds], nonzerox[right_lane_inds]] = [0, 0, 255] # Draw the fit lines left_x = left_fitx left_x[left_x < 0] = 0 left_x[left_x >= 1280] = 1279 right_x = right_fitx right_x[right_x < 0] = 0 right_x[right_x >= 1280] = 1279 l_points = np.squeeze(np.array(np.dstack((left_x, ploty)), dtype='int32')) r_points = np.squeeze(np.array(np.dstack((right_x, ploty)), dtype='int32')) out_img[l_points[:, 1], l_points[:, 0]] = [0, 255, 255] out_img[r_points[:, 1], r_points[:, 0]] = [0, 255, 255] # Draw the search box if left_lane.detected == False: draw_search_box(left_center_line, right_center_line, image.shape[0], nwindows, margin, out_img) # Generate a polygon to illustrate the search window area # And recast the x and y points into usable format for cv2.fillPoly() left_line_window1 = np.array( [np.transpose(np.vstack([left_fitx - margin, ploty]))]) left_line_window2 = np.array( [np.flipud(np.transpose(np.vstack([left_fitx + margin, ploty])))]) left_line_pts = np.hstack((left_line_window1, left_line_window2)) right_line_window1 = np.array( [np.transpose(np.vstack([right_fitx - margin, ploty]))]) right_line_window2 = np.array( [np.flipud(np.transpose(np.vstack([right_fitx + margin, ploty])))]) right_line_pts = np.hstack((right_line_window1, right_line_window2)) # Draw the lane onto the warped blank image cv2.fillPoly(window_img, np.int_([left_line_pts]), (0, 255, 0)) cv2.fillPoly(window_img, np.int_([right_line_pts]), (0, 255, 0)) result = cv2.addWeighted(out_img, 1, window_img, 0.3, 0) # print("save lane image: ", savename) save_image(out_img, savename, "lane") left_lane.update(left_fitx, ploty) right_lane.update(right_fitx, ploty) return pred_left_fit, pred_right_fit