def main(): transform = transforms.Compose([transforms.ToTensor()]) trainloader = DataLoader( datasets.MNIST( root="./datasets/", train=True, download=True, transform=transform ), batch_size=batch_size, shuffle=True, ) model = NICE(image_shape).to(device) optimizer = torch.optim.Adam( model.parameters(), lr=1e-3, betas=(0.9, 0.99), eps=1e-4, weight_decay=0 ) z_test = torch.randn((n_test_plot, *image_shape)).to(device) for i in range(1000): # ===== train ===== model.train() with tqdm(total=len(trainloader.dataset)) as progress_bar: for x, _ in trainloader: x = x.to(device) z, sum_log_det_J = model(x) loss = model.loss_func(z, sum_log_det_J) optimizer.zero_grad() loss.backward() optimizer.step() progress_bar.set_postfix(loss=loss.item()) progress_bar.update(x.shape[0]) # ===== test ===== model.eval() img_for_plot, _ = model(z_test, inverse=True) plot_images(img_for_plot, col=col_plot, row=row_plot) plt.savefig("img/figure_" + str(i) + ".png")
def character_segmentation(img, plot_flag=False): """ Main function for character segmentation, helper function sort_contours; compare_contours will be called """ # Ensure image passed in is not empty if img is not None: # convert to grayscale and blur the image gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) # Applied threshold to convert to binary image binary = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] # Applied dilation and erode process to binary kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3)) # dilate_image = cv2.morphologyEx(binary, cv2.MORPH_DILATE, kernel) # closed = cv2.morphologyEx(dilate_image, cv2.MORPH_ERODE, kernel) dilate_image = dilation(binary, kernel) closed = erosion(dilate_image, kernel) # Find contours using built in function _, cont, _ = cv2.findContours(binary, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) # Use a copy version of plat_image to draw bounding box (all boxes and filtered) test_roi = img.copy() passed_in = img.copy() # Initialize empty list which for appending detected character image crop_characters = [] # define tempting return standard width and height of character (Should be tried for different size) digit_w, digit_h = 30, 60 # Find area of interest by using proportion of input plate width and height min_height, max_height, min_width, max_width = (0.26 * img.shape[0], 0.78 * img.shape[0], 0.014 * img.shape[1], 0.12 * img.shape[1]) # Loop over sorted contours contour_num, existing = 0, [] for c in sort_contours(cont): msg = "" contour_num += 1 (x, y, w, h) = cv2.boundingRect(c) box = tuple((x, y, w, h)) ratio = h / w # print("Contour height {} ({}), width {} ({})".format(h, h/img.shape[0], w, w/img.shape[1])) cv2.rectangle(passed_in, (x, y), (x + w, y + h), (0, 255, 0), 1) if in_range(min_height, h, max_height) and in_range( min_width, w, max_width ): # Only select contour with qualified height width if in_range(1, ratio, 9, exclusive=False ): # Only select contour with defined ratio if abs( h - w ) >= 6: # Select contour which has the height width difference >= 6 if compare_contours( existing, box ): # No previous overlapping or duplicate contours found msg += "No previous contours detected! Want this box!" # Draw rectangles around detected region cv2.rectangle(test_roi, (x, y), (x + w, y + h), (0, 0, 255), 2) # Extract character region; then resize to standard size and convert to binary image for the # result; Update qualifying contours for future checking _, curr_num = cv2.threshold( closed[y:y + h, x:x + w], 230, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) existing.append((x, y, w, h)) # Add padding to the detected region for better readability for machine added = cv2.resize(add_padding(curr_num, 5), dsize=(digit_w, digit_h)) # Add to result list crop_characters.append(added) print("Detect {} letters out of {} contours...".format( len(crop_characters), contour_num)) # Plot is enabled when wanted if plot_flag is True: plot_images([passed_in, test_roi], ["All contours", "Detected Letters"]) return crop_characters else: print("Image passed in is empty; should not reach here!!!")
shear_range=0.12) validation_data_generator = ImageDataGenerator(rescale=1. / 255) train_directory_iterator = train_data_generator.flow_from_directory( directory=train_path, target_size=image_size, classes=classes, batch_size=batch_size) validation_directory_iterator = validation_data_generator.flow_from_directory( directory=validation_path, target_size=image_size, classes=classes, batch_size=batch_size) sample_training_images, _ = next(train_directory_iterator) plot_images(sample_training_images[:5]) vgg19_model = keras.applications.vgg19.VGG19() model = Sequential() for layer in vgg19_model.layers[:-1]: model.add(layer) for layer in model.layers[:-7]: layer.trainable = False model.add(Dense(2, activation='softmax')) model.summary() model.compile(optimizer=Adam(learning_rate=0.0001), loss=keras.losses.CategoricalCrossentropy(), metrics=['accuracy'])
def localise_plate(image, image_name, plot=False): """ Plate localisation algorithm to detect plate region from the image """ print("Image:{}".format(image_name)) # Resize the image to 640 & 480 image_copy = image.copy() # Convert rgb array to gray scale image_gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) # Blur the image image = cv2.GaussianBlur(image_gray, (9, 9), 3) if plot: plot_images([image_copy, image_gray, image], ["Original Image", "Grayscale Image", "Image Blurring"]) # Compute Directional Gradient and Gradient Magnitude Gx, Gy = cv2.Sobel(image, cv2.CV_8U, 1, 0), cv2.Sobel(image, cv2.CV_8U, 0, 1) M = np.abs(Gx) + np.abs(Gy) if plot: plot_images([Gx, Gy, M], ["X gradient", "Y Gradient", "Image Gradient"]) # Threshold the image _, image = cv2.threshold(M, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) kernel = morphology_kernel(5, 23) if plot: dilate = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel) close = cv2.morphologyEx(dilate, cv2.MORPH_ERODE, kernel) plot_images([image, dilate, close], ["Threshold Image", "Dilation", "Erosion"]) # Apply morphological closing to the image image_closing = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel) # Get contours in the image _, contours, hierarchy = cv2.findContours(image_closing, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) contours = sorted(contours, key=cv2.contourArea, reverse=True) candidates = [] # Finding plate candidates for hull in contours[:4]: (x, y, w, h) = cv2.boundingRect(hull) ratio = h / w candidate = image_copy[y:y + h, x:x + w] if h in range(35, 110) and 0.1 <= ratio < 0.31: candidates.append((candidate, ratio)) # Filter Candidates if not candidates: return np.zeros((300, 100)) else: # Iterate all contours in the candidate # Find out the one with the largest ratio best_ratio = -np.inf res = None for i in range(len(candidates)): curr, ratio = candidates[i] if res is None or ratio > best_ratio: res = curr best_ratio = ratio if plot: plt.title("Plate") plt.imshow(res) plt.show() return res
tf_list = [transforms.Resize((opt.fine_width, opt.fine_height))] + aug + \ [transforms.ToTensor(), Cutout(4, opt.fine_width * 0.25), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ] tfs = transforms.Compose(tf_list) dataset = CarDataset('train', opt, image_transformer=tfs) data_loader = DataLoader(opt, dataset) batch = data_loader.next_batch() sample_batch = batch['data'] imgs = convert_batch_to_image(sample_batch) plot_images(imgs[:4]) plt.show() dataloaders = get_cars_datasets(opt, valid_size=0.1, tfs=tfs) n_classes = 196 model = RNModel(n_classes) # Freeze first layers and train some epochs train(dataloaders, model, opt, num_epochs=10) # Unfreeze all model parameters and train for param in model.parameters(): param.require_grad = True train(dataloaders, model, opt, num_epochs=200) # optimizer_ft = optim.Adam(classifier.parameters(), lr=0.0000001)
def main(): transform = transforms.Compose([transforms.ToTensor()]) trainloader = DataLoader( datasets.MNIST(root="./datasets/", train=True, download=True, transform=transform), batch_size=mini_batch_size, shuffle=True, ) testloader = DataLoader( datasets.MNIST(root="./datasets/", train=False, download=True, transform=transform), batch_size=n_test_plot, shuffle=False, ) encoder = model.Encoder(z_dim).to(device) decoder = model.Decoder(z_dim).to(device) optimizer = torch.optim.Adam( list(encoder.parameters()) + list(decoder.parameters()), learning_rate) for i in range(10): # ===== train ===== encoder.train() decoder.train() with tqdm(total=len(trainloader.dataset)) as progress_bar: for real_img, _ in trainloader: real_img = real_img.to(device) z_mean, z_log_var = encoder(real_img) kl_divergence = -0.5 * torch.mean( torch.sum(1 + z_log_var - z_mean**2 - torch.exp(z_log_var), dim=-1)) epsilon = torch.empty_like(z_mean).normal_(0, 1).to(device) z = z_mean + epsilon * torch.exp(z_log_var)**0.5 fake_img = decoder(z) x = real_img.view(real_img.shape[0], -1) y = fake_img.view(fake_img.shape[0], -1) reconstruction = torch.mean( torch.sum(x * torch.log(y) + (1 - x) * torch.log(1 - y), dim=-1)) loss = kl_divergence - reconstruction optimizer.zero_grad() loss.backward() optimizer.step() progress_bar.set_postfix(loss=loss.item()) progress_bar.update(real_img.shape[0]) # ===== test ===== encoder.eval() decoder.eval() real_img_for_plot = iter(testloader).next()[0].to(device) z_mean, _ = encoder(real_img_for_plot) z = z_mean fake_img_for_plot = decoder(z) img_for_plot = torch.cat((real_img_for_plot, fake_img_for_plot)) plot_images(img_for_plot, col=col_plot, row=row_plot) plt.savefig("img/figure_" + str(i) + ".png")