def train(self): print(f'training model...') for j, epoch in enumerate(tqdm(range(N_EPOCHS), position=0, leave=True)): epoch_running_loss = 0.0 running_loss = 0.0 for i, data in enumerate(tqdm(self.trainloader, position=0, leave=True), 0): # get the inputs; data is a list of [inputs, labels] inputs, labels = [data[i].to(self.device) for i in [0, 1]] # zero the parameter gradients self.optimizer.zero_grad() # forward + backward + optimize outputs = self.model(inputs) loss = self.criterion(outputs, labels) loss.backward() self.optimizer.step() # print statistics running_loss += loss.item() epoch_running_loss += loss.item() # print every 2000 mini-batches if i % 2000 == 1999: print(inputs[0].size()) imshow(inputs[0].to('cpu')) print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000)) running_loss = 0.0 self.train_losses_by_epoch.append(epoch_running_loss / len(self.trainloader)) print('finished training')
def predict_image(): print(request.headers) print("Running") base64Image = base64.b64decode(request.data) pilImage = Image.open(io.BytesIO(base64Image)) print("Still Running") cv2Image = cv2.cvtColor(np.array(pilImage), cv2.COLOR_RGB2BGR) print("Extracting Logo!") extract = ExtractLogo() import src.utils as utils # Detecting page # from src.extraction.detectPage import detectPage # cv2Image = detectPage(cv2Image) # Instead of detecting page, directly reduce resolution and process cv2Image = utils.resize(cv2Image) utils.imshow(cv2Image) # No need of segmentation predictedLogoList, urlList = extract.extract(cv2Image, segment=False) # imFileLoc = "C:/Users/Abhishek Bansal/Desktop/img.jpg" # cv2.imwrite(imFileLoc, cv2Image) response = {} response["status"] = 200 response["answer"] = "Predicted logos are: " response["url"] = urlList[0][2:-1] print(urlList[0]) print(response["url"]) for logo in predictedLogoList: response["answer"] += logo print(json.dumps(response)) return json.dumps(response)
def extract(self, image, debug=False, segment=True): self.document = image self.debug = debug self.preprocess() self.findConnComp() # utils.imshow(self.erodedImage) sl = SegmentLogo(self.resizeImage, self.mergedComponents.mergedComponents) # sl = SegmentLogo(orig_image, comp.components) if segment is True: sl.segmentLogoByMean() else: sl.logos.append(image) predictedLogoList = [] urlList = [] for logo in sl.logos: utils.imshow(logo) # utils.imshow(process_image(logo)) ctx = Context() ctx.loadModels() pred = Predict(ctx) predictedClass = pred.predictLabel(logo) a = pred.predictedSURFClass b = pred.predictedSIFTClass print(ctx.stringLabels[a - 1], ctx.stringLabels[b - 1]) print("Predicted Class: ", ctx.stringLabels[predictedClass - 1]) if predictedClass is not -1: predictedLogoList.append(ctx.stringLabels[predictedClass - 1]) urlList.append(ctx.urlList[predictedClass - 1]) return predictedLogoList, urlList
def allMiss(self): count = 0 for i in range(74): if self.HOGMis[i] and self.SURFMis[i] and self.predFromProbMis[ i] and self.SIFTMis[i]: # if bestMis[i]: count += 1 utils.imshow(self.images.testImages[i]) print(count)
def preprocess(self): # self.blurredImage = cv2.GaussianBlur(self.document, (5, 5), 0) self.removedEdgeImage = self.document[10:-10][10:-10] self.resizeImage = utils.resize(self.removedEdgeImage) if self.debug: utils.imshow(self.resizeImage) self.erodedImage = erodeImage.erode(self.resizeImage, self.debug) utils.imshow(self.erodedImage) print("Processed Image for Extraction!")
def erode(image, debug=True): kernel = constants.kernel_ones image = utils.rgb2gray(image) if constants.binarizeOriginal: image = utils.imbinarize(image) dilate_image = cv2.dilate(image, kernel, iterations=constants.numOfDilation) binary_dilate_image = utils.imbinarize(dilate_image) if debug: utils.imshow(dilate_image) utils.imshow(binary_dilate_image) return binary_dilate_image
def captch_ex(img): # img = cv2.imread(file_name) img2gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) ret, mask = cv2.threshold(img2gray, 180, 255, cv2.THRESH_BINARY) image_final = cv2.bitwise_and(img2gray, img2gray, mask=mask) ret, new_img = cv2.threshold( image_final, 180, 255, cv2.THRESH_BINARY) # for black text , cv.THRESH_BINARY_INV kernel = cv2.getStructuringElement( cv2.MORPH_CROSS, (3, 3) ) # to manipulate the orientation of dilution , large x means horizonatally dilating more, large y means vertically dilating more dilated = cv2.dilate( new_img, kernel, iterations=9) # dilate , more the iteration more the dilation _, contours, hierarchy = cv2.findContours( dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE ) # findContours returns 3 variables for getting contours # for cv3.x.x comment above line and uncomment line below #image, contours, hierarchy = cv2.findContours(dilated,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE) for contour in contours: # get rectangle bounding contour [x, y, w, h] = cv2.boundingRect(contour) # Don't plot small false positives that aren't text if w < 35 and h < 35: continue # draw rectangle around contour on original image cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 255), 2) # write original image with added contours to disk # cv2.imshow('captcha_result', img) # cv2.waitKey() utils.imshow(img)
dtype=torch.float32) srgb = ciexyzNet.forward_global(xyz, target='srgb') if tasks[3] != 'none': srgb = pp.postprocessing(srgb, tasks[3]).to(device=device, dtype=torch.float32) local_t_srgb = ciexyzNet.forward_local(srgb, target='srgb') if tasks[4] != 'none': local_t_srgb = pp.postprocessing(local_t_srgb, tasks[4]).to(device=device, dtype=torch.float32) result = utils.outOfGamutClipping( utils.from_tensor_to_image(srgb + local_t_srgb)) if args.show: logging.info( "Visualizing results for image: {}, close to continue ...". format(filename)) utils.imshow(in_img, srgb_out=result, task='pp') if save_output: in_dir, fn = os.path.split(filename) name, _ = os.path.splitext(fn) out_filename = os.path.join(out_dir, name + '_result.png') result = result * 255 cv2.imwrite(out_filename, result.astype(np.uint8))
if task.lower() == 'srgb-2-xyz-2-srgb': with torch.no_grad(): output_XYZ, output_sRGB = ciexyzNet(in_img_tensor) output_XYZ = utils.from_tensor_to_image(output_XYZ, device=device) output_sRGB = utils.from_tensor_to_image(output_sRGB, device=device) output_XYZ = utils.outOfGamutClipping(output_XYZ) output_sRGB = utils.outOfGamutClipping(output_sRGB) if args.show: logging.info( "Visualizing results for image: {}, close to continue ...". format(filename)) utils.imshow(in_img, xyz_out=output_XYZ, srgb_out=output_sRGB, task=task) if save_output: in_dir, fn = os.path.split(filename) name, _ = os.path.splitext(fn) outxyz_name = os.path.join(out_dir['xyz-rec'], name + '_XYZ_reconstructed.png') outsrgb_name = os.path.join(out_dir['re-rendered'], name + '_sRGB_re-rendered.png') output_XYZ = output_XYZ * 65536 output_sRGB = output_sRGB * 255 cv2.imwrite(outxyz_name, output_XYZ.astype(np.uint16)) cv2.imwrite(outsrgb_name, output_sRGB.astype(np.uint8)) elif task.lower() == 'srgb-2-xyz':
sampler=valid_sampler, num_workers=2) self.test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=True, num_workers=2) self.classes = ('piste-cyclable', 'route', 'sentier', 'trottoir', 'voie-partagee') if __name__ == '__main__': data_loader = Data() data_iter = iter(data_loader.train_loader) images, labels = data_iter.next() utils.imshow(torchvision.utils.make_grid(images)) print('GroundTruth: ', ' '.join('%5s' % data_loader.classes[labels[j]] for j in range(4))) data_iter = iter(data_loader.test_loader) images, labels = data_iter.next() utils.imshow(torchvision.utils.make_grid(images)) print('GroundTruth: ', ' '.join('%5s' % data_loader.classes[labels[j]] for j in range(4))) print('Distribution of classes in train dataset:') fig, ax = plt.subplots() labels = [label for _, label in data_loader.train_loader.dataset.imgs] class_labels, counts = np.unique(labels, return_counts=True) ax.bar(class_labels, counts) ax.set_xticks(class_labels)
def main(): plt.ion() params = lab.experiment_params(default_params) device = params['device'] experiment_dir = params['experiment_dir'] experiment_name = params['experiment_name'] experiment_tags = params['experiment_tags'] data_dir = params['data_dir'] train_dir = params['train_dir'] preprocessing_filters = params['preprocessing_filters'] weight_save_epochs = params['weight_save_epochs'] model_settings = params['model_settings'] train_settings = params['train_settings'] deformation_settings = params['deformation_settings'] rng_seeds = params['rng_seeds'] model_type = model_settings['model_type'] n_convs_per_down_block = model_settings['n_convs_per_down_block'] n_convs_per_up_block = model_settings['n_convs_per_up_block'] data_scale = model_settings['data_scale'] data_size_z = model_settings['data_size_z'] data_size_in = model_settings['data_size_in'] data_size_out = model_settings['data_size_out'] up_mode = model_settings['up_mode'] separable = model_settings['separable'] leaky = model_settings['leaky'] instance_norm = model_settings['instance_norm'] unet_depth = model_settings['unet_depth'] n_init_filters = model_settings['n_init_filters'] padding = model_settings['padding'] detect_threshold = model_settings['detect_threshold'] window_spacing_z = train_settings['window_spacing_z'] window_spacing = train_settings['window_spacing'] n_train_slices = train_settings['n_train_slices'] pretrained_model_pth = train_settings['pretrained_model_pth'] pretrained_noise_std_scale = train_settings['pretrained_noise_std_scale'] weight_floor = train_settings['weight_floor'] pred_error_weight_scale = train_settings['pred_error_weight_scale'] pred_error_mode = train_settings['pred_error_mode'] pred_error_blur_std = train_settings['pred_error_blur_std'] pred_error_min_size = train_settings['pred_error_min_size'] pred_error_blending = train_settings['pred_error_blending'] n_epochs = train_settings['n_epochs'] batch_size = train_settings['batch_size'] learning_rate = train_settings['learning_rate'] weight_decay = train_settings['weight_decay'] adam_epsilon = train_settings['adam_epsilon'] lr_factor = train_settings['lr_scheduler_settings']['factor'] lr_patience = train_settings['lr_scheduler_settings']['patience'] lr_cooldown = train_settings['lr_scheduler_settings']['cooldown'] lr_min_lr = train_settings['lr_scheduler_settings']['min_lr'] show_train_report = train_settings['show_train_report'] its_per_report = train_settings['its_per_report'] show_train_image = train_settings['show_train_image'] its_per_image = train_settings['its_per_image'] assert model_type in ('2d-unet', 'thin-3d-unet') # Logger setup filt = DuplicateFilter() stdout_handler = logging.StreamHandler(sys.stdout) logger = logging.getLogger('Logger') logger.addHandler(stdout_handler) logger.addFilter(filt) logger.setLevel(logging.DEBUG) # Comet.ML experiment setup with open(comet_key_file, 'r') as fd: api_key = fd.read().strip() experiment = comet_ml.Experiment(api_key=api_key, project_name=experiment_name, workspace='lcimb', auto_metric_logging=True) experiment.log_parameters(params) for tag in experiment_tags: experiment.add_tag(tag) experiment.add_tag(model_type) # Output directory setup output_dir = lab.create_output_dir(experiment_dir) media_dir = os.path.join(output_dir, 'media') os.makedirs(media_dir, exist_ok=True) lab.save_config(os.path.join(output_dir, 'experiment.cfg'), params) lab.archive_experiment(os.path.dirname(os.path.realpath(__file__)), output_dir, ['py']) # Tag the experiment with the output folder name dir_parts = [p for p in output_dir.split('/') if p] experiment.add_tag(f'{"/".join(dir_parts[-2:])}') # Add a filehandler to the logger to save a log in the output dir log_file = os.path.join(output_dir, 'log.log') logger.addHandler(logging.FileHandler(log_file)) # 2D vs 3D model settings. For the 2D model, ignore the specified data # z size and force it to use z size 1 if model_type == '2d-unet': data_size_z = 1 window_spacing_z = 1 # Data windowing configuration for training and eval datasets train_windowing_params = { 'scaled_image_window_shape': [data_size_z, data_size_in, data_size_in], 'scaled_label_window_shape': [data_size_z, data_size_out, data_size_out], 'scaled_window_spacing': [window_spacing_z, window_spacing, window_spacing], 'random_windowing': True } eval_windowing_params = { 'scaled_image_window_shape': [data_size_z, data_size_in, data_size_in], 'scaled_label_window_shape': [data_size_z, data_size_out, data_size_out], 'scaled_window_spacing': [data_size_z, data_size_out, data_size_out], 'random_windowing': False } experiment.log_parameters(train_windowing_params) experiment.log_parameters(eval_windowing_params) # Inference data windowing configuration forward_windowing_params = { 'shape_in': [data_size_z, data_size_in, data_size_in], 'shape_out': [data_size_z, data_size_out, data_size_out], 'data_scale': data_scale } experiment.log_parameters(forward_windowing_params) # Load the data train_image_dir = os.path.join(data_dir, train_dir, 'image', preprocessing_filters) train_image = load_image_dir(train_image_dir, n_train_slices) n_train_slices = train_image.shape[0] experiment.add_tag(f'{n_train_slices}-slices') train_label_dir = os.path.join(data_dir, train_dir, 'label', 'corrected') train_label = load_label_dir(train_label_dir, n_train_slices) if pred_error_blending: pred_error_weight_slices = \ [pred_error_weight_scale * mistake_correction(i, 0, pred_error_mode, pred_error_min_size, os.path.join(data_dir, train_dir), os.path.join('label', 'raw'), os.path.join('label', 'corrected')) for i in range(n_train_slices)] pred_error_weight_stack = np.stack(pred_error_weight_slices, axis=0) # Load the prediction error for the 10x800x800 BI 3 training region and # use it instead of the BI 4 prediction errors old_train_dir = os.path.join(data_dir, 'train') old_pred_weight_slices = \ [pred_error_weight_scale * mistake_correction(i, 0, pred_error_mode, pred_error_min_size, old_train_dir, 'label-raw', 'label') for i in range(1, 10)] old_pred_weight_stack = np.stack(old_pred_weight_slices, axis=0) pred_error_weight_stack[3:12, 200:1000, 200:1000] = np.maximum( old_pred_weight_stack, pred_error_weight_stack[3:12, 200:1000, 200:1000]) pred_error_weight_stack = gaussian_filter( pred_error_weight_stack.astype(float), sigma=pred_error_blur_std) pred_error_weight_stack = pred_error_weight_stack / \ pred_error_weight_stack.max() else: pred_error_weight_slices = \ [pred_error_weight_scale * mistake_correction(i, pred_error_blur_std, pred_error_mode, pred_error_min_size, os.path.join(data_dir, train_dir), os.path.join('label', 'raw'), os.path.join('label', 'corrected')) for i in range(n_train_slices)] pred_error_weight_stack = np.stack(pred_error_weight_slices, axis=0) pred_error_weight = 1 + pred_error_weight_stack weight_sample = pred_error_weight[0] weight_sample = (weight_sample - weight_sample.min()) / \ (weight_sample.max() - weight_sample.min()) error_viz = np.zeros(list(weight_sample.shape) + [4]) error_viz[..., 0] = weight_sample error_viz[..., 3] = weight_sample**(1 / 3) experiment.log_image(error_viz, name='pred_error_weight sample') # Generate class frequency balance weights balance_weight = class_balance_weights(train_label) # Train weight multiplies class balancing and prediction error weighting train_weight = np.maximum(weight_floor, pred_error_weight * balance_weight) # Eval data setup eval_near_image_dir = os.path.join(data_dir, 'eval-near', 'image', preprocessing_filters) eval_near_image = load_image_dir(eval_near_image_dir, -1) eval_near_label_dir = os.path.join(data_dir, 'eval-near', 'label', 'corrected') eval_near_label = load_label_dir(eval_near_label_dir, -1) eval_far_image_dir = os.path.join(data_dir, 'eval-far', 'image', preprocessing_filters) eval_far_image = load_image_dir(eval_far_image_dir, -1) eval_far_label_dir = os.path.join(data_dir, 'eval-far', 'label', 'corrected') eval_far_label = load_label_dir(eval_far_label_dir, -1) if model_type == '2d-unet': # Build the model model = UNet(n_classes=1, padding=padding, up_mode=up_mode, depth=unet_depth, n_init_filters=n_init_filters, instance_norm=instance_norm, separable=separable, leaky=leaky) elif model_type == 'thin-3d-unet': model = Thin3DUNet(z_size=data_size_z, n_classes=1, n_convs_per_down_block=n_convs_per_down_block, n_convs_per_up_block=n_convs_per_up_block, depth=unet_depth, n_init_filters=n_init_filters, padding=padding, instance_norm=instance_norm, up_mode=up_mode, separable=separable, leaky=leaky) else: raise ValueError(f'Model type {model_type} not recognized') # Load pretrained weights if specified if pretrained_model_pth is not None: model.load_state_dict(torch.load(pretrained_model_pth)) # If the std scaling > 0, add Gaussian noise to the weights if pretrained_noise_std_scale > 0: with torch.no_grad(): for param in model.parameters(): std = torch.std(param).item() if not np.isnan(std): param.add_( torch.randn(param.size()) * pretrained_noise_std_scale * std) # Create a model settings JSON file to save along with trained weights # Currently hardwired to save UNet config info and source file if model_type == '2d-unet': model_json = { 'module': 'UNet', 'init': { 'in_channels': 1, 'n_classes': 1, 'up_mode': up_mode, 'depth': unet_depth, 'n_init_filters': n_init_filters, 'padding': padding, 'instance_norm': instance_norm, 'separable': separable, 'leaky': leaky }, 'window': forward_windowing_params } elif model_type == 'thin-3d-unet': model_json = { 'module': 'Thin3DUNet', 'init': { 'z_size': data_size_z, 'in_channels': 1, 'n_classes': 1, 'n_convs_per_down_block': n_convs_per_down_block, 'n_convs_per_up_block': n_convs_per_up_block, 'depth': unet_depth, 'n_init_filters': n_init_filters, 'padding': padding, 'instance_norm': instance_norm, 'up_mode': up_mode, 'separable': separable, 'leaky': leaky }, 'window': forward_windowing_params } else: raise ValueError(f'Model type {model_type} not recognized') with open(os.path.join(output_dir, 'model.json'), 'w') as fd: json.dump(model_json, fd) # Save the UNet src file to the output directory isbi2021_src_dir = os.path.dirname(os.path.realpath(__file__)) if model_type == '2d-unet': src_name = 'unet.py' elif model_type == 'thin-3d-unet': src_name = 'thin3dunet.py' else: raise ValueError(f'Model type {model_type} not recognized') unet_src_file = os.path.join(isbi2021_src_dir, 'src', 'models', src_name) shutil.copyfile(unet_src_file, os.path.join(output_dir, src_name)) # Move the model to device model.to(device) # Set random seeds # Seed before model weight init, then again before DataLoader shuffling np.random.seed((rng_seeds['model'] + 3) % 2**32) random.seed((rng_seeds['model'] + 2) % 2**32) torch.manual_seed((rng_seeds['model'] + 1) % 2**32) torch.cuda.manual_seed_all((rng_seeds['model']) % 2**32) # Initialize weights if necessary if pretrained_model_pth is None: model.apply(init_weights) # Optimization setup optim = torch.optim.Adam(model.parameters(), weight_decay=weight_decay, lr=learning_rate, eps=adam_epsilon) lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optim, factor=lr_factor, patience=lr_patience, verbose=True, cooldown=lr_cooldown, min_lr=lr_min_lr) # Reseed all the torch-related RNGs for deterministic data shuffling np.random.seed((rng_seeds['data'] + 3) % 2**32) random.seed((rng_seeds['data'] + 2) % 2**32) torch.manual_seed((rng_seeds['data'] + 1) % 2**32) torch.cuda.manual_seed_all((rng_seeds['data']) % 2**32) # Create a fixed eval batch # eval_images, eval_labels = scalable_batch_generator( # eval_image, # eval_label, # data_scale, # return_generators=True, # **eval_windowing_params) # eval_dataset = PlateletIterableDataset( # eval_images, # eval_labels, # train=False) # eval_dataloader = DataLoader( # eval_dataset, # batch_size=batch_size, # shuffle=False, # num_workers=1) # Color settings for plots generated during training and eval plot_settings = ({ 'cmap': 'gray' }, { 'cmap': 'jet', 'vmin': 0, 'vmax': train_label.max() }, { 'cmap': 'jet', 'vmin': 0, 'vmax': train_label.max() }) if show_train_image: fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(6, 6)) obj0 = None obj1 = None # Track best eval MIoU for model saving best_miou = 0 for epoch in range(n_epochs): time0 = time.time() experiment.set_epoch(epoch) with experiment.train(): deformation_settings['seed'] = (rng_seeds['data'] + epoch) % 2**32 images, labels, weights = scalable_batch_generator( image=train_image, label=train_label, data_scale=data_scale, weight=train_weight, do_deformation=True, deformation_settings=deformation_settings, return_generators=True, **train_windowing_params) train_dataset = PlateletIterableDataset(images, labels, weights, train=True) train_dataloader = DataLoader(train_dataset, batch_size=batch_size, num_workers=1) model.train() n_examples = 0 for i, (x, y, w) in enumerate(train_dataloader): if show_train_image and i % its_per_image == 0: x_img = np.squeeze(x.detach().numpy())[0] cmap_gray = plt.get_cmap('gray') x_img_rgb = cmap_gray(x_img) if i == 0: obj0 = axs[0].imshow(x_img_rgb) else: obj0.set_data(x_img_rgb) axs[0].set_title(f'Train image {i}') assert len(torch.unique(y)) == 2 n_examples += 1 x = x.to(device) y = y.float().to(device) w = torch.squeeze(w.to(device)) prediction = torch.squeeze(model(x), dim=1) if show_train_image and i % its_per_image == 0: pred_img = np.squeeze(prediction.cpu().detach().numpy())[0] cmap_pred = plt.get_cmap('Blues') pred_img_rgb = cmap_pred(pred_img) overlay_rgb = np.copy(x_img_rgb) overlay_rgb[pred_img > 0.01, :] = pred_img_rgb[ pred_img > 0.01, :] if i == 0: obj1 = axs[1].imshow(overlay_rgb) else: obj1.set_data(overlay_rgb) axs[1].set_title(f'Prediction overlay {i}') plt.show() plt.pause(0.0001) loss = F.binary_cross_entropy_with_logits(prediction, torch.squeeze(y, dim=1), reduction='none') loss = torch.sum(torch.mul(loss, w)) optim.zero_grad() loss.backward() optim.step() experiment.log_metric('loss', loss.item()) if show_train_report and (i + 1) % its_per_report == 0: time1 = time.time() dtime = time1 - time0 batch_per_sec = its_per_report / dtime its_per_sec = batch_per_sec * batch_size time0 = time1 print(f'Epoch [{epoch + 1}/{n_epochs}], Step {i + 1} ' f'({batch_per_sec:.2g} batch/sec, {its_per_sec:.2g} ' f'its/sec), Loss: {loss.item():.3f}') classes = (prediction > detect_threshold).int() if data_size_z > 1: z_half = (data_size_z - 1) // 2 x = x[..., z_half, :, :] classes = classes[..., z_half, :, :] y = y[..., z_half, :, :] if batch_size > 1: images = (np.squeeze(x.cpu().numpy())[0], np.squeeze(classes.cpu().numpy())[0], np.squeeze(y.cpu().numpy())[0]) else: images = (np.squeeze(x.cpu().numpy()), np.squeeze(classes.cpu().numpy()), np.squeeze(y.cpu().numpy())) fig = imshow(images, (15, 5), plot_settings) experiment.log_figure( figure_name=f"epoch_{epoch + 1}_step_{i + 1}", figure=fig) plt.close(fig) print(f'Epoch {epoch}: {n_examples} samples') # Get stats on eval data eval_images = [eval_near_image, eval_far_image] eval_labels = [eval_near_label, eval_far_label] eval_names = ['Near', 'Far'] for eval_image, eval_label, eval_name in \ zip(eval_images, eval_labels, eval_names): if eval_image.shape[0] > 1: save_file = os.path.join(media_dir, f'epoch_{epoch:04}.tif') else: save_file = os.path.join(media_dir, f'epoch_{epoch:04}.png') print(f'\n==========') print(f'Eval Stats {eval_name}, Epoch {epoch}:') eval_segmentation = segment_online(eval_image, model, forward_windowing_params, save_file, window_spacing=None, device=device) z_half = eval_segmentation.shape[0] // 2 eval_seg_img = np.squeeze(eval_segmentation).astype(float) if eval_seg_img.ndim == 3: eval_seg_img = eval_seg_img[z_half] with experiment.validate(): experiment.log_image(eval_seg_img, name=f'Eval {eval_name} {epoch}', image_colormap='jet') # Compute eval MIoU miou = jaccard_score(eval_label.flatten(), eval_segmentation.flatten(), average='macro') print(f' Mean IoU ({eval_name}): {miou}') experiment.log_metric(f'eval_{eval_name.lower()}_miou', miou) # Compute false positive and false negative rates, using several # size thresholds for detection regions false_negatives = eval_label.astype(bool) & ~eval_segmentation false_positives = ~eval_label.astype(bool) & eval_segmentation neg_percs = [] pos_percs = [] thresholds = [0, 5, 65, 401] for threshold in thresholds: if threshold == 0: thresholded_fn = false_negatives thresholded_fp = false_positives else: thresholded_fn = remove_small_objects( false_negatives, threshold) thresholded_fp = remove_small_objects( false_positives, threshold) neg_perc = thresholded_fn.sum() / thresholded_fn.size * 100 neg_percs.append(neg_perc) pos_perc = thresholded_fp.sum() / thresholded_fp.size * 100 pos_percs.append(pos_perc) experiment.log_metric( f'eval_{eval_name.lower()}_fn-{threshold}', neg_perc) experiment.log_metric( f'eval_{eval_name.lower()}_fp-{threshold}', pos_perc) neg_str = '. '.join( [f'{p:.3f}% ({t})' for p, t in zip(neg_percs, thresholds)]) pos_str = '. '.join( [f'{p:.3f}% ({t})' for p, t in zip(pos_percs, thresholds)]) print(f' False negatives (min size): ' + neg_str) print(f' False positives (min size): ' + pos_str) print('==========\n') if eval_name == 'Near': if miou > best_miou: best_weight_path = os.path.join( output_dir, 'best_weights.pth') print("Saving best model") best_miou = miou torch.save(model.state_dict(), best_weight_path) # Save trained weights after specified epochs if epoch in weight_save_epochs: weight_save_path = os.path.join(output_dir, f'weights_{epoch}.pth') torch.save(model.state_dict(), weight_save_path) lr_scheduler.step(loss) experiment.end() return model
bird_view_image) # lane marking points in bird view left_lane_marking_points = find_lane_marking_points( bird_view_image, left_lane_marking_u) right_lane_marking_points = find_lane_marking_points( bird_view_image, right_lane_marking_u) # transform lane marking points back to front view, with the inverse perspective transform matrix left_points_front_view = convert_points_to_front_view( transform, left_lane_marking_points) right_points_front_view = convert_points_to_front_view( transform, right_lane_marking_points) # three order polynomial curve fitting left_poly_curve = PolynomialCurve(start_v, end_v, left_points_front_view) right_poly_curve = PolynomialCurve(start_v, end_v, right_points_front_view) # draw polynomial curve on original image bgr_image = draw_polynomial_curve(bgr_image, left_poly_curve) bgr_image = draw_polynomial_curve(bgr_image, right_poly_curve) # draw drivable area bgr_image = draw_drivable_area(bgr_image, left_poly_curve, right_poly_curve) return bgr_image if __name__ == "__main__": test_image_filename = "../data/test.png" lane_marking = process_image(test_image_filename) imshow(lane_marking)
def detectPage(image): ratio = image.shape[0] / 500.0 orig = image.copy() image = imutils.resize(image, height=500) # convert the image to grayscale, blur it, and find edges # in the image gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) gray = cv2.GaussianBlur(gray, (5, 5), 0) edged = cv2.Canny(gray, 75, 200) utils.imshow(edged) # show the original image and the edge detected image # print("STEP 1: Edge Detection") # cv2.imshow("Image", image) # cv2.imshow("Edged", edged) # cv2.waitKey(0) # cv2.destroyAllWindows() # find the contours in the edged image, keeping only the # largest ones, and initialize the screen contour cnts = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) cnts = cnts[0] if imutils.is_cv2() else cnts[1] cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:5] # loop over the contours screenCnt = None for c in cnts: # approximate the contour peri = cv2.arcLength(c, True) approx = cv2.approxPolyDP(c, 0.02 * peri, True) # if our approximated contour has four points, then we # can assume that we have found our screen if len(approx) == 4: screenCnt = approx break # show the contour (outline) of the piece of paper if screenCnt is None: print("Page not detected") return orig # print("STEP 2: Find contours of paper") # cv2.drawContours(image, [screenCnt], -1, (0, 255, 0), 2) # cv2.imshow("Outline", image) # cv2.waitKey(0) # cv2.destroyAllWindows() # apply the four point transform to obtain a top-down # view of the original image warped = four_point_transform(orig, screenCnt.reshape(4, 2) * ratio) # convert the warped image to grayscale, then threshold it # to give it that 'black and white' paper effect # warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY) # T = threshold_local(warped, 11, offset=10, method="gaussian") # warped = np.uint8(warped > T) * 255 # show the original and scanned images # print("STEP 3: Apply perspective transform") # cv2.imshow("Original", imutils.resize(orig, height=650)) # cv2.imshow("Scanned", imutils.resize(warped, height=650)) # cv2.waitKey(0) return warped
# print("STEP 2: Find contours of paper") # cv2.drawContours(image, [screenCnt], -1, (0, 255, 0), 2) # cv2.imshow("Outline", image) # cv2.waitKey(0) # cv2.destroyAllWindows() # apply the four point transform to obtain a top-down # view of the original image warped = four_point_transform(orig, screenCnt.reshape(4, 2) * ratio) # convert the warped image to grayscale, then threshold it # to give it that 'black and white' paper effect # warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY) # T = threshold_local(warped, 11, offset=10, method="gaussian") # warped = np.uint8(warped > T) * 255 # show the original and scanned images # print("STEP 3: Apply perspective transform") # cv2.imshow("Original", imutils.resize(orig, height=650)) # cv2.imshow("Scanned", imutils.resize(warped, height=650)) # cv2.waitKey(0) return warped if __name__ == "__main__": loc = "C:/Users/Abhishek Bansal/Desktop/Image Processing/Logo Identification/Logo2.jpg" image = cv2.imread(loc) utils.imshow(image) page = detectPage(image) page = utils.resize(page) utils.imshow(page)