def stylize(self, im_path): with torch.no_grad(): self.net.eval() im = load_im(im_path) x = self.transform(im) out = self.net(x) _name = (os.path.basename(im_path).split('.')[0] + '_' + self.model_name + '.jpg') _path = os.path.join(self.output_path, _name) save_im(_path, out[0])
def intermediate_res(self, c_loss, s_loss, r_loss, n): self.tfm_net.eval() check = self.tfm_net(self.check_tensor) _path = os.path.join('tmp', 'images', f'check{n}.jpg') save_im(_path, check[0]) self.tfm_net.train() msg = (f'\nbatch: {n}\t' f'content: {c_loss/n}\t' f'style: {s_loss/n}\t' f'reg: {r_loss/n}\t' f'total: {(c_loss + s_loss + r_loss)/n} \n') print(msg)
### END YOUR CODE HERE ### return conv_result if __name__ == "__main__": verbose = True # change if you want # Changing this code should not be needed im = skimage.data.camera() im = utils.uint8_to_float(im) # DO NOT CHANGE gaussian_kernel = np.array([ [1, 4, 6, 4, 1], [4, 16, 24, 16, 4], [6, 24, 36, 24, 6], [4, 16, 24, 16, 4], [1, 4, 6, 4, 1], ]) / 256 image_gaussian = convolve_im(im, gaussian_kernel, verbose) # DO NOT CHANGE sobel_horizontal = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]) image_sobelx = convolve_im(im, sobel_horizontal, verbose) if verbose: plt.show() utils.save_im("camera_gaussian.png", image_gaussian) utils.save_im("camera_sobelx.png", image_sobelx)
oldMax = betweenClass return thresholdBest ### END YOUR CODE HERE ### if __name__ == "__main__": # DO NOT CHANGE impaths_to_segment = [ pathlib.Path("thumbprint.png"), pathlib.Path("polymercell.png") ] for impath in impaths_to_segment: im = utils.read_image(impath) threshold = otsu_thresholding(im) print("Found optimal threshold:", threshold) # Segment the image by threshold segmented_image = (im >= threshold) assert im.shape == segmented_image.shape, \ "Expected image shape ({}) to be same as thresholded image shape ({})".format( im.shape, segmented_image.shape) assert segmented_image.dtype == np.bool, \ "Expected thresholded image dtype to be np.bool. Was: {}".format( segmented_image.dtype) segmented_image = utils.to_uint8(segmented_image) save_path = "{}-segmented.png".format(impath.stem) utils.save_im(save_path, segmented_image)
(np.ndarray) of shape (H, W). dtype=np.bool """ ### START YOUR CODE HERE ### (You can change anything inside this block) # You can also define other helper functions sh = disk(10) binary_closing(im, selem=sh, out = im) binary_opening(im, selem=sh, out = im) # im = binary_erosion(im, selem=sh) # im = binary_dilation(im, selem=sh) return im ### END YOUR CODE HERE ### if __name__ == "__main__": # DO NOT CHANGE im = utils.read_image("noisy.png") binary_image = (im != 0) noise_free_image = remove_noise(binary_image) assert im.shape == noise_free_image.shape, \ "Expected image shape ({}) to be same as resulting image shape ({})".format( im.shape, noise_free_image.shape) assert noise_free_image.dtype == np.bool, \ "Expected resulting image dtype to be np.bool. Was: {}".format( noise_free_image.dtype) noise_free_image = utils.to_uint8(noise_free_image) utils.save_im("noisy-filtered.png", noise_free_image)
(np.ndarray) of shape (H, W). dtype=np.bool """ # START YOUR CODE HERE ### (You can change anything inside this block) # You can also define other helper functions structuring_element = np.array([[1, 1, 1], [1, 1, 1], [1, 1, 1]], dtype=bool) result = im.copy() return result ### END YOUR CODE HERE ### if __name__ == "__main__": im = utils.read_image("balls-with-reflections.png") binary_image = im != 0 starting_points = [ # (row, column) [51, 64], [44, 180], [35, 365], [156, 94], [141, 264], [138, 467], [198, 180], [229, 413], [294, 103], [302, 230], [368, 388], [352, 489], [454, 57], [457, 236], [469, 400], [489, 506] ] num_iterations = 30 result = fill_holes(binary_image, starting_points, num_iterations) assert im.shape == result.shape, "Expected image shape ({}) to be same as resulting image shape ({})".format( im.shape, result.shape) assert result.dtype == np.bool, "Expected resulting image dtype to be np.bool. Was: {}".format( result.dtype) result = utils.to_uint8(result) utils.save_im("balls-with-reflections-filled.png", result)
def __dispatch_end(self, addr, packets=None): if self.save_obs: im = np.reshape(self.last_obs.detach().numpy(), (64, 64))*255.0 save_im(im) #self.__visualize_debug(im) self.finished = True
args: im: np.ndarray of shape (H, W) with boolean values (dtype=np.bool) return: (np.ndarray) of shape (H, W). dtype=np.bool """ ### START YOUR CODE HERE ### (You can change anything inside this block) # You can also define other helper functions structuring_element = np.array([[1, 1, 1], [1, 1, 1], [1, 1, 1]], dtype=bool) eroded_im = binary_erosion(im, selem=structuring_element) boundary = np.bitwise_xor(im, eroded_im) return boundary ### END YOUR CODE HERE ### if __name__ == "__main__": im = utils.read_image("lincoln.png") binary_image = (im != 0) boundary = extract_boundary(binary_image) assert im.shape == boundary.shape, \ "Expected image shape ({}) to be same as resulting image shape ({})".format( im.shape, boundary.shape) assert boundary.dtype == np.bool, \ "Expected resulting image dtype to be np.bool. Was: {}".format( boundary.dtype) boundary = utils.to_uint8(boundary) utils.save_im("lincoln-boundary.png", boundary)
ax[2].imshow(absolute_convoluted_frequency_image, cmap='gray') ax[2].set_title('Frequency Domain Convolution') ax[3].imshow(conv_result, cmap='gray') ax[3].set_title('Convolved Image') ### END YOUR CODE HERE ### return conv_result if __name__ == "__main__": verbose = True # Changing this code should not be needed im = skimage.data.camera() im = utils.uint8_to_float(im) # DO NOT CHANGE frequency_kernel_low_pass = utils.create_low_pass_frequency_kernel(im, radius=50) image_low_pass = convolve_im(im, frequency_kernel_low_pass, verbose=verbose) # DO NOT CHANGE frequency_kernel_high_pass = utils.create_high_pass_frequency_kernel(im, radius=50) image_high_pass = convolve_im(im, frequency_kernel_high_pass, verbose=verbose) if verbose: plt.show() utils.save_im("camera_low_pass.png", image_low_pass) utils.save_im("camera_high_pass.png", image_high_pass)
[np.ndarray, np.bool]: [A binary image] """ # START YOUR CODE HERE ### (You can change anything inside this block) binary_im = np.zeros_like(im, dtype=np.bool) ### END YOUR CODE HERE ### return binary_im if __name__ == "__main__": # NO NEED TO EDIT THE CODE BELOW. verbose = True plt.figure(figsize=(4, 12)) plt.tight_layout() images_to_visualize = [] for i, impath in enumerate(impaths): im = utils.read_im(str(impath)) im_binary = create_binary_image(im) assert im_binary.dtype == np.bool, f"Expected the image to be of dtype np.bool, got {im_binary.dtype}" angles, distances = utils.find_angle(im_binary) angle = 0 if len(angles) > 0: angle = angles[0] * 180 / np.pi print(f"Found angle: {angle:.2f}") hough_im = utils.create_hough_line_image(im, angles, distances) rotated = skimage.transform.rotate(im, angle, cval=im.max()) images_to_visualize.extend([im, im_binary, hough_im, rotated]) image = utils.np_make_image_grid(images_to_visualize, nrow=len(impaths)) utils.save_im("task4d.png", image) plt.imshow(image, cmap="gray")
Args: im ([type]): [np.array of shape [H, W, 3]] Returns: im ([type]): [np.array of shape [H, W]] """ # grey = 0.212R + 0.7152G + 0.0722B return im.dot([0.212, 0.7152, 0.0722]) # Without weights: # return np.sum(a=im, axis=2) im_greyscale = greyscale(im) save_im(output_dir.joinpath("lake_greyscale.jpg"), im_greyscale, cmap="gray") plt.imshow(im_greyscale, cmap="gray") # plt.show() # print("Image range: {}-{} ".format(im.min(), im.max())) def inverse(im): """ Finds the inverse of the greyscale image Args: im ([type]): [np.array of shape [H, W]] Returns: im ([type]): [np.array of shape [H, W]] """
cline.add_argument('-nms_thold', type=float, default=0.4, help='threshold for non max supression') cline.add_argument('-model_res', type=int, default=416, help='resolution of the model\'s input') cline.add_argument('-save', action='store_true', help='whether to save result or not') if __name__ == '__main__': args = cline.parse_args() with torch.no_grad(): bbone = Darknet() bbone = bbone.extractor model = Yolo3(bbone) print(f'Loading weights from {args.weights}') model.load_state_dict(torch.load(args.weights)) model.to(device) image = cv2.imread(args.image) res = detect(model, image, device, args.obj_thold, args.nms_thold, args.model_res) cv2.imshow('prediction', res) cv2.waitKey(0) cv2.destroyAllWindows() if args.save: save_im(res, args.image)
kernel = np.fft.fftshift(kernel) plt.figure(figsize=(16, 8)) plt_rows = 5 plt.subplot(1, plt_rows, 1) plt.imshow(im, cmap="gray") plt.title("Original image") plt.subplot(1, plt_rows, 2) plt.imshow(fft_im, cmap="gray") plt.title("FFT image shifted") plt.subplot(1, plt_rows, 3) plt.imshow(kernel, cmap="gray") plt.title("Kernel") plt.subplot(1, plt_rows, 4) plt.imshow(fft_im_filtered, cmap="gray") plt.title("FFT image filtered") plt.subplot(1, plt_rows, 5) plt.imshow(inversed_im, cmap="gray") plt.title("Inversed image") plt.savefig(utils.image_output_dir.joinpath("task4c_full.png")) # plt.show() # END YOUR CODE HERE ### utils.save_im("moon_filtered.png", utils.normalize(inversed_im))
im = utils.read_im(impath) # START YOUR CODE HERE ### (You can change anything inside this block) im_freq = np.fft.fft2(im) im_freq_old = im_freq.copy() neigboor_average = np.ones((5, 5))/25 neigboor_average[2, 2] = 0 neigboor_convolved = convolve_im(np.abs(im_freq), neigboor_average, False) ratio = np.abs(im_freq) / neigboor_convolved noise_arg = np.unravel_index( np.argmax(ratio), im_freq.shape) print(noise_arg) im_freq[noise_arg] = 0 im_freq[-noise_arg[0], -noise_arg[1]] = 0 im_filtered = np.fft.ifft2(im_freq) fig, ax = plt.subplots(2, 2) ax[0, 0].imshow(im, cmap="gray") ax[0, 1].imshow(np.abs(im_filtered), cmap="gray") # ax[0, 2].set_axis_off() ax[1, 0].imshow(np.abs(np.fft.fftshift(im_freq_old)), cmap="gray", norm=SymLogNorm(1)) ax[1, 1].imshow(np.abs(np.fft.fftshift(im_freq)), cmap="gray", norm=SymLogNorm(1)) ### END YOUR CODE HERE ### utils.save_im("moon_filtered.png", utils.normalize(im_filtered)) plt.show()
A function that computes the distance to the closest boundary pixel. args: im: np.ndarray of shape (H, W) with boolean values (dtype=np.bool) return: (np.ndarray) of shape (H, W). dtype=np.int32 """ # START YOUR CODE HERE ### (You can change anything inside this block) # You can also define other helper functions assert im.dtype == np.bool structuring_element = np.array([[1, 1, 1], [1, 1, 1], [1, 1, 1]], dtype=bool) result = im.astype(np.int32) return result ### END YOUR CODE HERE ### if __name__ == "__main__": im = utils.read_image("noisy.png") binary_image = (im != 0) noise_free_image = remove_noise(binary_image) distance = distance_transform(noise_free_image) assert im.shape == distance.shape, "Expected image shape ({}) to be same as resulting image shape ({})".format( im.shape, distance.shape) assert distance.dtype == np.int32, "Expected resulting image dtype to be np.int32. Was: {}".format( distance.dtype) distance = utils.to_uint8(distance) utils.save_im("noisy-distance.png", distance)
Returns: im: np.array of shape [H, W] """ laplacian = np.array([[0, -1, 0], [-1, 4, -1], [0, -1, 0]]) ### START YOUR CODE HERE ### (You can change anything inside this block) # Convolve img with laplacian kernel convolved_im = convolve_im(im, laplacian, verbose=True) # Use equation 6. resulting_im = np.add(im, (im * convolved_im)) # Limit values between [0,1] im = np.add(resulting_im, resulting_im.min()) im = np.multiply(im, 1 / im.max()) ### END YOUR CODE HERE ### return im if __name__ == "__main__": # DO NOT CHANGE im = skimage.data.moon() im = utils.uint8_to_float(im) sharpen_im = sharpen(im) sharpen_im = utils.to_uint8(sharpen_im) im = utils.to_uint8(im) # Concatenate the image, such that we get # the original on the left side, and the sharpened on the right side im = np.concatenate((im, sharpen_im), axis=1) utils.save_im("moon_sharpened.png", im)
""" A function that max pools an image with size kernel size. Assume that the stride is equal to the kernel size, and that the kernel size is even. Args: im: [np.array of shape [H, W, 3]] kernel_size: integer Returns: im: [np.array of shape [H/kernel_size, W/kernel_size, 3]]. """ stride = kernel_size ### START YOUR CODE HERE ### (You can change anything inside this block) return new_im ### END YOUR CODE HERE ### if __name__ == "__main__": # DO NOT CHANGE im = skimage.data.chelsea() im = utils.uint8_to_float(im) max_pooled_image = MaxPool2d(im, 4) utils.save_im("chelsea.png", im) utils.save_im("chelsea_maxpooled.png", max_pooled_image) im = utils.create_checkerboard() im = utils.uint8_to_float(im) utils.save_im("checkerboard.png", im) max_pooled_image = MaxPool2d(im, 2) utils.save_im("checkerboard_maxpooled.png", max_pooled_image)
import skimage import os import numpy as np import utils from task4b import convolve_im from matplotlib import pyplot as plt if __name__ == "__main__": # DO NOT CHANGE impath = os.path.join("images", "clown.jpg") im = skimage.io.imread(impath) im = utils.uint8_to_float(im) kernel = np.load("images/notch_filter.npy") ### START YOUR CODE HERE ### (You can change anything inside this block) im_filtered = convolve_im(im, kernel) plt.show() ### END YOUR CODE HERE ### utils.save_im("clown_filtered.png", im_filtered)
for y in range(len(im)): for x in range(len(im[y])): imCopy[y][x] = convolve(y, x) return imCopy # Define the convolutional kernels h_b = 1 / 256 * np.array([[1, 4, 6, 4, 1], [4, 16, 24, 16, 4], [6, 24, 36, 24, 6], [4, 16, 24, 16, 4], [1, 4, 6, 4, 1]]) sobel_x = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]) # Convolve images im_smoothed = convolve_im(im.copy(), h_b) save_im(output_dir.joinpath("im_smoothed.jpg"), im_smoothed) im_sobel = convolve_im(im, sobel_x) save_im(output_dir.joinpath("im_sobel.jpg"), im_sobel) # DO NOT CHANGE. Checking that your function returns as expected assert isinstance( im_smoothed, np.ndarray ), f"Your convolve function has to return a np.array. " + f"Was: {type(im_smoothed)}" assert im_smoothed.shape == im.shape, f"Expected smoothed im ({im_smoothed.shape}" + \ f"to have same shape as im ({im.shape})" assert im_sobel.shape == im.shape, f"Expected smoothed im ({im_sobel.shape}" + \ f"to have same shape as im ({im.shape})" plt.subplot(1, 2, 1) plt.imshow(normalize(im_smoothed))
for row, col in seed_points: segmented[row, col] = True neighbourhood(im, segmented, row, col, T) return segmented ### END YOUR CODE HERE ### if __name__ == "__main__": # DO NOT CHANGE im = utils.read_image("defective-weld.png") seed_points = [ # (row, column) [254, 138], # Seed point 1 [253, 296], # Seed point 2 [233, 436], # Seed point 3 [232, 417], # Seed point 4 ] intensity_threshold = 50 segmented_image = region_growing(im, seed_points, intensity_threshold) assert im.shape == segmented_image.shape, \ "Expected image shape ({}) to be same as thresholded image shape ({})".format( im.shape, segmented_image.shape) assert segmented_image.dtype == np.bool, \ "Expected thresholded image dtype to be np.bool. Was: {}".format( segmented_image.dtype) segmented_image = utils.to_uint8(segmented_image) utils.save_im("defective-weld-segmented.png", segmented_image)
def task_abd(): print("Task A:") image_transform = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), ]) dataloader_train, dataloader_test = dataloaders.load_dataset( batch_size, image_transform) example_images, _ = next(iter(dataloader_train)) print( f"The tensor containing the images has shape: {example_images.shape} (batch size, number of color channels, height, width)", f"The maximum value in the image is {example_images.max()}, minimum: {example_images.min()}", sep="\n\t") def create_model(): """ Initializes the mode. Edit the code below if you would like to change the model. """ model = nn.Sequential( nn.Flatten( ), # Flattens the image from shape (batch_size, C, Height, width) to (batch_size, C*height*width) nn.Linear(28 * 28 * 1, 10) # 28*28 input features, 10 outputs # No need to include softmax, as this is already combined in the loss function ) # Transfer model to GPU memory if a GPU is available model = utils.to_cuda(model) return model model = create_model() # Test if the model is able to do a single forward pass example_images = utils.to_cuda(example_images) output = model(example_images) print("Output shape:", output.shape) # 10 since mnist has 10 different classes expected_shape = (batch_size, 10) assert output.shape == expected_shape, f"Expected shape: {expected_shape}, but got: {output.shape}" # Define optimizer (Stochastic Gradient Descent) optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) trainer = Trainer(model=model, dataloader_train=dataloader_train, dataloader_test=dataloader_test, batch_size=batch_size, loss_function=loss_function, optimizer=optimizer) train_loss_dict_non_normalized, test_loss_dict_non_normalized = trainer.train( num_epochs) final_loss, final_acc = utils.compute_loss_and_accuracy( dataloader_test, model, loss_function) print(f"Final Test loss: {final_loss}. Final Test accuracy: {final_acc}") # Normalize from here on image_transform = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=0.5, std=0.5) ]) # We reset the manual seed to 0, such that the model parameters are initialized with the same random number generator. torch.random.manual_seed(0) np.random.seed(0) dataloader_train, dataloader_test = dataloaders.load_dataset( batch_size, image_transform) model = create_model() # Redefine optimizer, as we have a new model. optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) trainer = Trainer(model=model, dataloader_train=dataloader_train, dataloader_test=dataloader_test, batch_size=batch_size, loss_function=loss_function, optimizer=optimizer) train_loss_dict_normalized, test_loss_dict_normalized = trainer.train( num_epochs) # Plot loss utils.plot_loss(train_loss_dict_non_normalized, label="Train Loss - Not normalized") utils.plot_loss(test_loss_dict_non_normalized, label="Test Loss - Not normalized") utils.plot_loss(train_loss_dict_normalized, label="Train Loss - Normalized") utils.plot_loss(test_loss_dict_normalized, label="Test Loss - Normalized") # Limit the y-axis of the plot (The range should not be increased!) plt.ylim([0, 1]) plt.legend() plt.xlabel("Global Training Step") plt.ylabel("Cross Entropy Loss") plt.savefig("image_solutions/task_4a.png") plt.clf() final_loss, final_acc = utils.compute_loss_and_accuracy( dataloader_test, model, loss_function) print(f"Final Test loss: {final_loss}. Final Test accuracy: {final_acc}") #################### # Task B #################### print("Task B:") weight_image_array = np.zeros(shape=(28, 28)) weight_tensors = list(model.children())[1].weight.cpu().data # 10 tensors since we have 0-9 classes for tensor_index, tensor in enumerate(weight_tensors): # Each tensor has length 28x28 for index, value in enumerate(tensor): weight_image_array[index // 28, index % 28] = value utils.save_im( output_dir_images.joinpath("weights{}.jpg".format(tensor_index)), weight_image_array) #################### # Task D #################### print("Task D:") image_transform = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=0.5, std=0.5) ]) dataloader_train, dataloader_test = dataloaders.load_dataset( batch_size, image_transform) example_images, _ = next(iter(dataloader_train)) print( f"The tensor containing the images has shape: {example_images.shape} (batch size, number of color channels, height, width)", f"The maximum value in the image is {example_images.max()}, minimum: {example_images.min()}", sep="\n\t") def create_model(): """ Initializes the mode. Edit the code below if you would like to change the model. """ model = nn.Sequential( nn.Flatten( ), # Flattens the image from shape (batch_size, C, Height, width) to (batch_size, C*height*width) nn.Linear(28 * 28, 64), # 28*28 input features, 64 outputs nn.ReLU(), # ReLU as activation funciton for the layer above nn.Linear(64, 10), # 64 inputs, 10 outputs # No need to include softmax, as this is already combined in the loss function ) # Transfer model to GPU memory if a GPU is available model = utils.to_cuda(model) return model model = create_model() # Test if the model is able to do a single forward pass example_images = utils.to_cuda(example_images) output = model(example_images) print("Output shape:", output.shape) # 10 since mnist has 10 different classes expected_shape = (batch_size, 10) assert output.shape == expected_shape, f"Expected shape: {expected_shape}, but got: {output.shape}" # Define optimizer (Stochastic Gradient Descent) optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) trainer = Trainer(model=model, dataloader_train=dataloader_train, dataloader_test=dataloader_test, batch_size=batch_size, loss_function=loss_function, optimizer=optimizer) train_loss_dict_hidden, test_loss_dict_hidden = trainer.train(num_epochs) # Plot loss utils.plot_loss(train_loss_dict_normalized, label="Train Loss - Normalized") utils.plot_loss(test_loss_dict_normalized, label="Test Loss - Normalized") utils.plot_loss(train_loss_dict_hidden, label="Train Loss - One hidden layer") utils.plot_loss(test_loss_dict_hidden, label="Test Loss - One hidden layer") # Limit the y-axis of the plot (The range should not be increased!) plt.ylim([0, 1]) plt.legend() plt.xlabel("Global Training Step") plt.ylabel("Cross Entropy Loss") plt.savefig("image_solutions/task_4d.png") # plt.show() plt.clf() final_loss, final_acc = utils.compute_loss_and_accuracy( dataloader_test, model, loss_function) print(f"Final Test loss: {final_loss}. Final Test accuracy: {final_acc}")