def train(self):
        print(f'training model...')
        for j, epoch in enumerate(tqdm(range(N_EPOCHS), position=0, leave=True)):
            epoch_running_loss = 0.0
            running_loss = 0.0
            for i, data in enumerate(tqdm(self.trainloader, position=0, leave=True), 0):
                # get the inputs; data is a list of [inputs, labels]
                inputs, labels = [data[i].to(self.device) for i in [0, 1]]

                # zero the parameter gradients
                self.optimizer.zero_grad()

                # forward + backward + optimize
                outputs = self.model(inputs)
                loss = self.criterion(outputs, labels)
                loss.backward()
                self.optimizer.step()

                # print statistics
                running_loss += loss.item()
                epoch_running_loss += loss.item()
                # print every 2000 mini-batches
                if i % 2000 == 1999:
                    print(inputs[0].size())
                    imshow(inputs[0].to('cpu'))

                    print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
                    running_loss = 0.0
            self.train_losses_by_epoch.append(epoch_running_loss / len(self.trainloader))
        print('finished training')
Exemple #2
0
def predict_image():
    print(request.headers)
    print("Running")
    base64Image = base64.b64decode(request.data)
    pilImage = Image.open(io.BytesIO(base64Image))
    print("Still Running")
    cv2Image = cv2.cvtColor(np.array(pilImage), cv2.COLOR_RGB2BGR)
    print("Extracting Logo!")
    extract = ExtractLogo()
    import src.utils as utils
    # Detecting page
    # from src.extraction.detectPage import detectPage
    # cv2Image = detectPage(cv2Image)

    # Instead of detecting page, directly reduce resolution and process
    cv2Image = utils.resize(cv2Image)
    utils.imshow(cv2Image)

    # No need of segmentation
    predictedLogoList, urlList = extract.extract(cv2Image, segment=False)

    # imFileLoc = "C:/Users/Abhishek Bansal/Desktop/img.jpg"
    # cv2.imwrite(imFileLoc, cv2Image)

    response = {}
    response["status"] = 200
    response["answer"] = "Predicted logos are: "
    response["url"] = urlList[0][2:-1]
    print(urlList[0])
    print(response["url"])
    for logo in predictedLogoList:
        response["answer"] += logo
    print(json.dumps(response))
    return json.dumps(response)
    def extract(self, image, debug=False, segment=True):
        self.document = image
        self.debug = debug

        self.preprocess()
        self.findConnComp()
        # utils.imshow(self.erodedImage)
        sl = SegmentLogo(self.resizeImage,
                         self.mergedComponents.mergedComponents)
        # sl = SegmentLogo(orig_image, comp.components)
        if segment is True:
            sl.segmentLogoByMean()
        else:
            sl.logos.append(image)
        predictedLogoList = []
        urlList = []
        for logo in sl.logos:
            utils.imshow(logo)
            # utils.imshow(process_image(logo))
            ctx = Context()
            ctx.loadModels()
            pred = Predict(ctx)
            predictedClass = pred.predictLabel(logo)
            a = pred.predictedSURFClass
            b = pred.predictedSIFTClass
            print(ctx.stringLabels[a - 1], ctx.stringLabels[b - 1])
            print("Predicted Class: ", ctx.stringLabels[predictedClass - 1])
            if predictedClass is not -1:
                predictedLogoList.append(ctx.stringLabels[predictedClass - 1])
                urlList.append(ctx.urlList[predictedClass - 1])
        return predictedLogoList, urlList
 def allMiss(self):
     count = 0
     for i in range(74):
         if self.HOGMis[i] and self.SURFMis[i] and self.predFromProbMis[
                 i] and self.SIFTMis[i]:
             #    if bestMis[i]:
             count += 1
             utils.imshow(self.images.testImages[i])
     print(count)
 def preprocess(self):
     # self.blurredImage = cv2.GaussianBlur(self.document, (5, 5), 0)
     self.removedEdgeImage = self.document[10:-10][10:-10]
     self.resizeImage = utils.resize(self.removedEdgeImage)
     if self.debug:
         utils.imshow(self.resizeImage)
     self.erodedImage = erodeImage.erode(self.resizeImage, self.debug)
     utils.imshow(self.erodedImage)
     print("Processed Image for Extraction!")
def erode(image, debug=True):
    kernel = constants.kernel_ones
    image = utils.rgb2gray(image)
    if constants.binarizeOriginal:
        image = utils.imbinarize(image)
    dilate_image = cv2.dilate(image,
                              kernel,
                              iterations=constants.numOfDilation)
    binary_dilate_image = utils.imbinarize(dilate_image)
    if debug:
        utils.imshow(dilate_image)
        utils.imshow(binary_dilate_image)
    return binary_dilate_image
def captch_ex(img):

    # img = cv2.imread(file_name)
    img2gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    ret, mask = cv2.threshold(img2gray, 180, 255, cv2.THRESH_BINARY)
    image_final = cv2.bitwise_and(img2gray, img2gray, mask=mask)
    ret, new_img = cv2.threshold(
        image_final, 180, 255,
        cv2.THRESH_BINARY)  # for black text , cv.THRESH_BINARY_INV
    kernel = cv2.getStructuringElement(
        cv2.MORPH_CROSS, (3, 3)
    )  # to manipulate the orientation of dilution , large x means horizonatally dilating  more, large y means vertically dilating more
    dilated = cv2.dilate(
        new_img, kernel,
        iterations=9)  # dilate , more the iteration more the dilation
    _, contours, hierarchy = cv2.findContours(
        dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE
    )  # findContours returns 3 variables for getting contours

    # for cv3.x.x comment above line and uncomment line below

    #image, contours, hierarchy = cv2.findContours(dilated,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE)

    for contour in contours:
        # get rectangle bounding contour
        [x, y, w, h] = cv2.boundingRect(contour)

        # Don't plot small false positives that aren't text
        if w < 35 and h < 35:
            continue

        # draw rectangle around contour on original image
        cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 255), 2)

    # write original image with added contours to disk
    # cv2.imshow('captcha_result', img)
    # cv2.waitKey()
    utils.imshow(img)
                                                      dtype=torch.float32)

        srgb = ciexyzNet.forward_global(xyz, target='srgb')

        if tasks[3] != 'none':
            srgb = pp.postprocessing(srgb, tasks[3]).to(device=device,
                                                        dtype=torch.float32)

        local_t_srgb = ciexyzNet.forward_local(srgb, target='srgb')

        if tasks[4] != 'none':
            local_t_srgb = pp.postprocessing(local_t_srgb,
                                             tasks[4]).to(device=device,
                                                          dtype=torch.float32)

        result = utils.outOfGamutClipping(
            utils.from_tensor_to_image(srgb + local_t_srgb))

        if args.show:
            logging.info(
                "Visualizing results for image: {}, close to continue ...".
                format(filename))
            utils.imshow(in_img, srgb_out=result, task='pp')

        if save_output:
            in_dir, fn = os.path.split(filename)
            name, _ = os.path.splitext(fn)
            out_filename = os.path.join(out_dir, name + '_result.png')
            result = result * 255
            cv2.imwrite(out_filename, result.astype(np.uint8))
Exemple #9
0
        if task.lower() == 'srgb-2-xyz-2-srgb':
            with torch.no_grad():
                output_XYZ, output_sRGB = ciexyzNet(in_img_tensor)
            output_XYZ = utils.from_tensor_to_image(output_XYZ, device=device)
            output_sRGB = utils.from_tensor_to_image(output_sRGB,
                                                     device=device)
            output_XYZ = utils.outOfGamutClipping(output_XYZ)
            output_sRGB = utils.outOfGamutClipping(output_sRGB)

            if args.show:
                logging.info(
                    "Visualizing results for image: {}, close to continue ...".
                    format(filename))
                utils.imshow(in_img,
                             xyz_out=output_XYZ,
                             srgb_out=output_sRGB,
                             task=task)

            if save_output:
                in_dir, fn = os.path.split(filename)
                name, _ = os.path.splitext(fn)
                outxyz_name = os.path.join(out_dir['xyz-rec'],
                                           name + '_XYZ_reconstructed.png')
                outsrgb_name = os.path.join(out_dir['re-rendered'],
                                            name + '_sRGB_re-rendered.png')
                output_XYZ = output_XYZ * 65536
                output_sRGB = output_sRGB * 255
                cv2.imwrite(outxyz_name, output_XYZ.astype(np.uint16))
                cv2.imwrite(outsrgb_name, output_sRGB.astype(np.uint8))

        elif task.lower() == 'srgb-2-xyz':
Exemple #10
0
                                                        sampler=valid_sampler,
                                                        num_workers=2)
        self.test_loader = torch.utils.data.DataLoader(test_data,
                                                       batch_size=batch_size,
                                                       shuffle=True,
                                                       num_workers=2)

        self.classes = ('piste-cyclable', 'route', 'sentier', 'trottoir',
                        'voie-partagee')


if __name__ == '__main__':
    data_loader = Data()
    data_iter = iter(data_loader.train_loader)
    images, labels = data_iter.next()
    utils.imshow(torchvision.utils.make_grid(images))
    print('GroundTruth: ',
          ' '.join('%5s' % data_loader.classes[labels[j]] for j in range(4)))

    data_iter = iter(data_loader.test_loader)
    images, labels = data_iter.next()
    utils.imshow(torchvision.utils.make_grid(images))
    print('GroundTruth: ',
          ' '.join('%5s' % data_loader.classes[labels[j]] for j in range(4)))

    print('Distribution of classes in train dataset:')
    fig, ax = plt.subplots()
    labels = [label for _, label in data_loader.train_loader.dataset.imgs]
    class_labels, counts = np.unique(labels, return_counts=True)
    ax.bar(class_labels, counts)
    ax.set_xticks(class_labels)
Exemple #11
0
def main():
    plt.ion()
    params = lab.experiment_params(default_params)
    device = params['device']
    experiment_dir = params['experiment_dir']
    experiment_name = params['experiment_name']
    experiment_tags = params['experiment_tags']
    data_dir = params['data_dir']
    train_dir = params['train_dir']
    preprocessing_filters = params['preprocessing_filters']
    weight_save_epochs = params['weight_save_epochs']
    model_settings = params['model_settings']
    train_settings = params['train_settings']
    deformation_settings = params['deformation_settings']
    rng_seeds = params['rng_seeds']

    model_type = model_settings['model_type']
    n_convs_per_down_block = model_settings['n_convs_per_down_block']
    n_convs_per_up_block = model_settings['n_convs_per_up_block']
    data_scale = model_settings['data_scale']
    data_size_z = model_settings['data_size_z']
    data_size_in = model_settings['data_size_in']
    data_size_out = model_settings['data_size_out']
    up_mode = model_settings['up_mode']
    separable = model_settings['separable']
    leaky = model_settings['leaky']
    instance_norm = model_settings['instance_norm']
    unet_depth = model_settings['unet_depth']
    n_init_filters = model_settings['n_init_filters']
    padding = model_settings['padding']
    detect_threshold = model_settings['detect_threshold']

    window_spacing_z = train_settings['window_spacing_z']
    window_spacing = train_settings['window_spacing']
    n_train_slices = train_settings['n_train_slices']
    pretrained_model_pth = train_settings['pretrained_model_pth']
    pretrained_noise_std_scale = train_settings['pretrained_noise_std_scale']
    weight_floor = train_settings['weight_floor']
    pred_error_weight_scale = train_settings['pred_error_weight_scale']
    pred_error_mode = train_settings['pred_error_mode']
    pred_error_blur_std = train_settings['pred_error_blur_std']
    pred_error_min_size = train_settings['pred_error_min_size']
    pred_error_blending = train_settings['pred_error_blending']
    n_epochs = train_settings['n_epochs']
    batch_size = train_settings['batch_size']
    learning_rate = train_settings['learning_rate']
    weight_decay = train_settings['weight_decay']
    adam_epsilon = train_settings['adam_epsilon']
    lr_factor = train_settings['lr_scheduler_settings']['factor']
    lr_patience = train_settings['lr_scheduler_settings']['patience']
    lr_cooldown = train_settings['lr_scheduler_settings']['cooldown']
    lr_min_lr = train_settings['lr_scheduler_settings']['min_lr']
    show_train_report = train_settings['show_train_report']
    its_per_report = train_settings['its_per_report']
    show_train_image = train_settings['show_train_image']
    its_per_image = train_settings['its_per_image']

    assert model_type in ('2d-unet', 'thin-3d-unet')

    # Logger setup
    filt = DuplicateFilter()
    stdout_handler = logging.StreamHandler(sys.stdout)
    logger = logging.getLogger('Logger')
    logger.addHandler(stdout_handler)
    logger.addFilter(filt)
    logger.setLevel(logging.DEBUG)

    # Comet.ML experiment setup
    with open(comet_key_file, 'r') as fd:
        api_key = fd.read().strip()
    experiment = comet_ml.Experiment(api_key=api_key,
                                     project_name=experiment_name,
                                     workspace='lcimb',
                                     auto_metric_logging=True)
    experiment.log_parameters(params)
    for tag in experiment_tags:
        experiment.add_tag(tag)
    experiment.add_tag(model_type)

    # Output directory setup
    output_dir = lab.create_output_dir(experiment_dir)
    media_dir = os.path.join(output_dir, 'media')
    os.makedirs(media_dir, exist_ok=True)
    lab.save_config(os.path.join(output_dir, 'experiment.cfg'), params)
    lab.archive_experiment(os.path.dirname(os.path.realpath(__file__)),
                           output_dir, ['py'])
    # Tag the experiment with the output folder name
    dir_parts = [p for p in output_dir.split('/') if p]
    experiment.add_tag(f'{"/".join(dir_parts[-2:])}')

    # Add a filehandler to the logger to save a log in the output dir
    log_file = os.path.join(output_dir, 'log.log')
    logger.addHandler(logging.FileHandler(log_file))

    # 2D vs 3D model settings. For the 2D model, ignore the specified data
    # z size and force it to use z size 1

    if model_type == '2d-unet':
        data_size_z = 1
        window_spacing_z = 1

    # Data windowing configuration for training and eval datasets
    train_windowing_params = {
        'scaled_image_window_shape': [data_size_z, data_size_in, data_size_in],
        'scaled_label_window_shape':
        [data_size_z, data_size_out, data_size_out],
        'scaled_window_spacing':
        [window_spacing_z, window_spacing, window_spacing],
        'random_windowing': True
    }
    eval_windowing_params = {
        'scaled_image_window_shape': [data_size_z, data_size_in, data_size_in],
        'scaled_label_window_shape':
        [data_size_z, data_size_out, data_size_out],
        'scaled_window_spacing': [data_size_z, data_size_out, data_size_out],
        'random_windowing': False
    }
    experiment.log_parameters(train_windowing_params)
    experiment.log_parameters(eval_windowing_params)

    # Inference data windowing configuration
    forward_windowing_params = {
        'shape_in': [data_size_z, data_size_in, data_size_in],
        'shape_out': [data_size_z, data_size_out, data_size_out],
        'data_scale': data_scale
    }
    experiment.log_parameters(forward_windowing_params)

    # Load the data

    train_image_dir = os.path.join(data_dir, train_dir, 'image',
                                   preprocessing_filters)

    train_image = load_image_dir(train_image_dir, n_train_slices)
    n_train_slices = train_image.shape[0]

    experiment.add_tag(f'{n_train_slices}-slices')

    train_label_dir = os.path.join(data_dir, train_dir, 'label', 'corrected')
    train_label = load_label_dir(train_label_dir, n_train_slices)

    if pred_error_blending:
        pred_error_weight_slices = \
            [pred_error_weight_scale *
             mistake_correction(i, 0, pred_error_mode,
                                pred_error_min_size,
                                os.path.join(data_dir, train_dir),
                                os.path.join('label', 'raw'),
                                os.path.join('label', 'corrected'))
             for i in range(n_train_slices)]
        pred_error_weight_stack = np.stack(pred_error_weight_slices, axis=0)

        # Load the prediction error for the 10x800x800 BI 3 training region and
        # use it instead of the BI 4 prediction errors
        old_train_dir = os.path.join(data_dir, 'train')
        old_pred_weight_slices = \
            [pred_error_weight_scale *
             mistake_correction(i, 0, pred_error_mode, pred_error_min_size, old_train_dir, 'label-raw', 'label')
             for i in range(1, 10)]
        old_pred_weight_stack = np.stack(old_pred_weight_slices, axis=0)
        pred_error_weight_stack[3:12, 200:1000, 200:1000] = np.maximum(
            old_pred_weight_stack, pred_error_weight_stack[3:12, 200:1000,
                                                           200:1000])
        pred_error_weight_stack = gaussian_filter(
            pred_error_weight_stack.astype(float), sigma=pred_error_blur_std)
        pred_error_weight_stack = pred_error_weight_stack / \
                                  pred_error_weight_stack.max()
    else:
        pred_error_weight_slices = \
            [pred_error_weight_scale *
             mistake_correction(i, pred_error_blur_std, pred_error_mode,
                                pred_error_min_size,
                                os.path.join(data_dir, train_dir),
                                os.path.join('label', 'raw'),
                                os.path.join('label', 'corrected'))
             for i in range(n_train_slices)]
        pred_error_weight_stack = np.stack(pred_error_weight_slices, axis=0)
    pred_error_weight = 1 + pred_error_weight_stack

    weight_sample = pred_error_weight[0]
    weight_sample = (weight_sample - weight_sample.min()) / \
                    (weight_sample.max() - weight_sample.min())
    error_viz = np.zeros(list(weight_sample.shape) + [4])
    error_viz[..., 0] = weight_sample
    error_viz[..., 3] = weight_sample**(1 / 3)
    experiment.log_image(error_viz, name='pred_error_weight sample')

    # Generate class frequency balance weights
    balance_weight = class_balance_weights(train_label)

    # Train weight multiplies class balancing and prediction error weighting
    train_weight = np.maximum(weight_floor, pred_error_weight * balance_weight)

    # Eval data setup

    eval_near_image_dir = os.path.join(data_dir, 'eval-near', 'image',
                                       preprocessing_filters)
    eval_near_image = load_image_dir(eval_near_image_dir, -1)
    eval_near_label_dir = os.path.join(data_dir, 'eval-near', 'label',
                                       'corrected')
    eval_near_label = load_label_dir(eval_near_label_dir, -1)

    eval_far_image_dir = os.path.join(data_dir, 'eval-far', 'image',
                                      preprocessing_filters)
    eval_far_image = load_image_dir(eval_far_image_dir, -1)
    eval_far_label_dir = os.path.join(data_dir, 'eval-far', 'label',
                                      'corrected')
    eval_far_label = load_label_dir(eval_far_label_dir, -1)

    if model_type == '2d-unet':
        # Build the model
        model = UNet(n_classes=1,
                     padding=padding,
                     up_mode=up_mode,
                     depth=unet_depth,
                     n_init_filters=n_init_filters,
                     instance_norm=instance_norm,
                     separable=separable,
                     leaky=leaky)
    elif model_type == 'thin-3d-unet':
        model = Thin3DUNet(z_size=data_size_z,
                           n_classes=1,
                           n_convs_per_down_block=n_convs_per_down_block,
                           n_convs_per_up_block=n_convs_per_up_block,
                           depth=unet_depth,
                           n_init_filters=n_init_filters,
                           padding=padding,
                           instance_norm=instance_norm,
                           up_mode=up_mode,
                           separable=separable,
                           leaky=leaky)
    else:
        raise ValueError(f'Model type {model_type} not recognized')

    # Load pretrained weights if specified
    if pretrained_model_pth is not None:
        model.load_state_dict(torch.load(pretrained_model_pth))
        # If the std scaling > 0, add Gaussian noise to the weights
        if pretrained_noise_std_scale > 0:
            with torch.no_grad():
                for param in model.parameters():
                    std = torch.std(param).item()
                    if not np.isnan(std):
                        param.add_(
                            torch.randn(param.size()) *
                            pretrained_noise_std_scale * std)

    # Create a model settings JSON file to save along with trained weights
    # Currently hardwired to save UNet config info and source file
    if model_type == '2d-unet':
        model_json = {
            'module': 'UNet',
            'init': {
                'in_channels': 1,
                'n_classes': 1,
                'up_mode': up_mode,
                'depth': unet_depth,
                'n_init_filters': n_init_filters,
                'padding': padding,
                'instance_norm': instance_norm,
                'separable': separable,
                'leaky': leaky
            },
            'window': forward_windowing_params
        }
    elif model_type == 'thin-3d-unet':
        model_json = {
            'module': 'Thin3DUNet',
            'init': {
                'z_size': data_size_z,
                'in_channels': 1,
                'n_classes': 1,
                'n_convs_per_down_block': n_convs_per_down_block,
                'n_convs_per_up_block': n_convs_per_up_block,
                'depth': unet_depth,
                'n_init_filters': n_init_filters,
                'padding': padding,
                'instance_norm': instance_norm,
                'up_mode': up_mode,
                'separable': separable,
                'leaky': leaky
            },
            'window': forward_windowing_params
        }
    else:
        raise ValueError(f'Model type {model_type} not recognized')

    with open(os.path.join(output_dir, 'model.json'), 'w') as fd:
        json.dump(model_json, fd)
    # Save the UNet src file to the output directory
    isbi2021_src_dir = os.path.dirname(os.path.realpath(__file__))

    if model_type == '2d-unet':
        src_name = 'unet.py'
    elif model_type == 'thin-3d-unet':
        src_name = 'thin3dunet.py'
    else:
        raise ValueError(f'Model type {model_type} not recognized')

    unet_src_file = os.path.join(isbi2021_src_dir, 'src', 'models', src_name)
    shutil.copyfile(unet_src_file, os.path.join(output_dir, src_name))

    # Move the model to device
    model.to(device)

    # Set random seeds
    # Seed before model weight init, then again before DataLoader shuffling
    np.random.seed((rng_seeds['model'] + 3) % 2**32)
    random.seed((rng_seeds['model'] + 2) % 2**32)
    torch.manual_seed((rng_seeds['model'] + 1) % 2**32)
    torch.cuda.manual_seed_all((rng_seeds['model']) % 2**32)

    # Initialize weights if necessary
    if pretrained_model_pth is None:
        model.apply(init_weights)

    # Optimization setup

    optim = torch.optim.Adam(model.parameters(),
                             weight_decay=weight_decay,
                             lr=learning_rate,
                             eps=adam_epsilon)
    lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optim,
        factor=lr_factor,
        patience=lr_patience,
        verbose=True,
        cooldown=lr_cooldown,
        min_lr=lr_min_lr)

    # Reseed all the torch-related RNGs for deterministic data shuffling
    np.random.seed((rng_seeds['data'] + 3) % 2**32)
    random.seed((rng_seeds['data'] + 2) % 2**32)
    torch.manual_seed((rng_seeds['data'] + 1) % 2**32)
    torch.cuda.manual_seed_all((rng_seeds['data']) % 2**32)

    # Create a fixed eval batch
    # eval_images, eval_labels = scalable_batch_generator(
    #     eval_image,
    #     eval_label,
    #     data_scale,
    #     return_generators=True,
    #     **eval_windowing_params)
    # eval_dataset = PlateletIterableDataset(
    #     eval_images,
    #     eval_labels,
    #     train=False)
    # eval_dataloader = DataLoader(
    #     eval_dataset,
    #     batch_size=batch_size,
    #     shuffle=False,
    #     num_workers=1)

    # Color settings for plots generated during training and eval
    plot_settings = ({
        'cmap': 'gray'
    }, {
        'cmap': 'jet',
        'vmin': 0,
        'vmax': train_label.max()
    }, {
        'cmap': 'jet',
        'vmin': 0,
        'vmax': train_label.max()
    })

    if show_train_image:
        fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(6, 6))
        obj0 = None
        obj1 = None

    # Track best eval MIoU for model saving
    best_miou = 0
    for epoch in range(n_epochs):
        time0 = time.time()
        experiment.set_epoch(epoch)
        with experiment.train():
            deformation_settings['seed'] = (rng_seeds['data'] + epoch) % 2**32
            images, labels, weights = scalable_batch_generator(
                image=train_image,
                label=train_label,
                data_scale=data_scale,
                weight=train_weight,
                do_deformation=True,
                deformation_settings=deformation_settings,
                return_generators=True,
                **train_windowing_params)

            train_dataset = PlateletIterableDataset(images,
                                                    labels,
                                                    weights,
                                                    train=True)
            train_dataloader = DataLoader(train_dataset,
                                          batch_size=batch_size,
                                          num_workers=1)
            model.train()

            n_examples = 0

            for i, (x, y, w) in enumerate(train_dataloader):
                if show_train_image and i % its_per_image == 0:
                    x_img = np.squeeze(x.detach().numpy())[0]
                    cmap_gray = plt.get_cmap('gray')
                    x_img_rgb = cmap_gray(x_img)
                    if i == 0:
                        obj0 = axs[0].imshow(x_img_rgb)
                    else:
                        obj0.set_data(x_img_rgb)
                    axs[0].set_title(f'Train image {i}')
                assert len(torch.unique(y)) == 2
                n_examples += 1
                x = x.to(device)
                y = y.float().to(device)
                w = torch.squeeze(w.to(device))
                prediction = torch.squeeze(model(x), dim=1)
                if show_train_image and i % its_per_image == 0:
                    pred_img = np.squeeze(prediction.cpu().detach().numpy())[0]
                    cmap_pred = plt.get_cmap('Blues')
                    pred_img_rgb = cmap_pred(pred_img)
                    overlay_rgb = np.copy(x_img_rgb)
                    overlay_rgb[pred_img > 0.01, :] = pred_img_rgb[
                        pred_img > 0.01, :]
                    if i == 0:
                        obj1 = axs[1].imshow(overlay_rgb)
                    else:
                        obj1.set_data(overlay_rgb)
                    axs[1].set_title(f'Prediction overlay {i}')
                    plt.show()
                    plt.pause(0.0001)
                loss = F.binary_cross_entropy_with_logits(prediction,
                                                          torch.squeeze(y,
                                                                        dim=1),
                                                          reduction='none')
                loss = torch.sum(torch.mul(loss, w))

                optim.zero_grad()
                loss.backward()
                optim.step()
                experiment.log_metric('loss', loss.item())
                if show_train_report and (i + 1) % its_per_report == 0:
                    time1 = time.time()
                    dtime = time1 - time0
                    batch_per_sec = its_per_report / dtime
                    its_per_sec = batch_per_sec * batch_size
                    time0 = time1

                    print(f'Epoch [{epoch + 1}/{n_epochs}], Step {i + 1} '
                          f'({batch_per_sec:.2g} batch/sec, {its_per_sec:.2g} '
                          f'its/sec), Loss: {loss.item():.3f}')

                    classes = (prediction > detect_threshold).int()

                    if data_size_z > 1:
                        z_half = (data_size_z - 1) // 2
                        x = x[..., z_half, :, :]
                        classes = classes[..., z_half, :, :]
                        y = y[..., z_half, :, :]

                    if batch_size > 1:

                        images = (np.squeeze(x.cpu().numpy())[0],
                                  np.squeeze(classes.cpu().numpy())[0],
                                  np.squeeze(y.cpu().numpy())[0])
                    else:
                        images = (np.squeeze(x.cpu().numpy()),
                                  np.squeeze(classes.cpu().numpy()),
                                  np.squeeze(y.cpu().numpy()))

                    fig = imshow(images, (15, 5), plot_settings)
                    experiment.log_figure(
                        figure_name=f"epoch_{epoch + 1}_step_{i + 1}",
                        figure=fig)
                    plt.close(fig)

            print(f'Epoch {epoch}: {n_examples} samples')

        # Get stats on eval data
        eval_images = [eval_near_image, eval_far_image]
        eval_labels = [eval_near_label, eval_far_label]
        eval_names = ['Near', 'Far']
        for eval_image, eval_label, eval_name in \
                zip(eval_images, eval_labels, eval_names):
            if eval_image.shape[0] > 1:
                save_file = os.path.join(media_dir, f'epoch_{epoch:04}.tif')
            else:
                save_file = os.path.join(media_dir, f'epoch_{epoch:04}.png')
            print(f'\n==========')
            print(f'Eval Stats {eval_name}, Epoch {epoch}:')
            eval_segmentation = segment_online(eval_image,
                                               model,
                                               forward_windowing_params,
                                               save_file,
                                               window_spacing=None,
                                               device=device)

            z_half = eval_segmentation.shape[0] // 2

            eval_seg_img = np.squeeze(eval_segmentation).astype(float)
            if eval_seg_img.ndim == 3:
                eval_seg_img = eval_seg_img[z_half]
            with experiment.validate():
                experiment.log_image(eval_seg_img,
                                     name=f'Eval {eval_name} {epoch}',
                                     image_colormap='jet')

                # Compute eval MIoU
                miou = jaccard_score(eval_label.flatten(),
                                     eval_segmentation.flatten(),
                                     average='macro')

                print(f'  Mean IoU ({eval_name}): {miou}')
                experiment.log_metric(f'eval_{eval_name.lower()}_miou', miou)

                # Compute false positive and false negative rates, using several
                # size thresholds for detection regions

                false_negatives = eval_label.astype(bool) & ~eval_segmentation
                false_positives = ~eval_label.astype(bool) & eval_segmentation

                neg_percs = []
                pos_percs = []

                thresholds = [0, 5, 65, 401]
                for threshold in thresholds:
                    if threshold == 0:
                        thresholded_fn = false_negatives
                        thresholded_fp = false_positives
                    else:
                        thresholded_fn = remove_small_objects(
                            false_negatives, threshold)
                        thresholded_fp = remove_small_objects(
                            false_positives, threshold)

                    neg_perc = thresholded_fn.sum() / thresholded_fn.size * 100
                    neg_percs.append(neg_perc)
                    pos_perc = thresholded_fp.sum() / thresholded_fp.size * 100
                    pos_percs.append(pos_perc)

                    experiment.log_metric(
                        f'eval_{eval_name.lower()}_fn-{threshold}', neg_perc)
                    experiment.log_metric(
                        f'eval_{eval_name.lower()}_fp-{threshold}', pos_perc)

                neg_str = '.  '.join(
                    [f'{p:.3f}% ({t})' for p, t in zip(neg_percs, thresholds)])
                pos_str = '.  '.join(
                    [f'{p:.3f}% ({t})' for p, t in zip(pos_percs, thresholds)])

                print(f'  False negatives (min size): ' + neg_str)
                print(f'  False positives (min size): ' + pos_str)
                print('==========\n')

                if eval_name == 'Near':
                    if miou > best_miou:
                        best_weight_path = os.path.join(
                            output_dir, 'best_weights.pth')
                        print("Saving best model")
                        best_miou = miou
                        torch.save(model.state_dict(), best_weight_path)

        # Save trained weights after specified epochs
        if epoch in weight_save_epochs:
            weight_save_path = os.path.join(output_dir, f'weights_{epoch}.pth')
            torch.save(model.state_dict(), weight_save_path)

        lr_scheduler.step(loss)

    experiment.end()
    return model
        bird_view_image)

    # lane marking points in bird view
    left_lane_marking_points = find_lane_marking_points(
        bird_view_image, left_lane_marking_u)
    right_lane_marking_points = find_lane_marking_points(
        bird_view_image, right_lane_marking_u)

    # transform lane marking points back to front view, with the inverse perspective transform matrix
    left_points_front_view = convert_points_to_front_view(
        transform, left_lane_marking_points)
    right_points_front_view = convert_points_to_front_view(
        transform, right_lane_marking_points)

    # three order polynomial curve fitting
    left_poly_curve = PolynomialCurve(start_v, end_v, left_points_front_view)
    right_poly_curve = PolynomialCurve(start_v, end_v, right_points_front_view)
    # draw polynomial curve on original image
    bgr_image = draw_polynomial_curve(bgr_image, left_poly_curve)
    bgr_image = draw_polynomial_curve(bgr_image, right_poly_curve)
    # draw drivable area
    bgr_image = draw_drivable_area(bgr_image, left_poly_curve,
                                   right_poly_curve)
    return bgr_image


if __name__ == "__main__":
    test_image_filename = "../data/test.png"
    lane_marking = process_image(test_image_filename)
    imshow(lane_marking)
def detectPage(image):

    ratio = image.shape[0] / 500.0
    orig = image.copy()
    image = imutils.resize(image, height=500)

    # convert the image to grayscale, blur it, and find edges
    # in the image
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = cv2.GaussianBlur(gray, (5, 5), 0)
    edged = cv2.Canny(gray, 75, 200)
    utils.imshow(edged)
    # show the original image and the edge detected image
    # print("STEP 1: Edge Detection")
    # cv2.imshow("Image", image)
    # cv2.imshow("Edged", edged)
    # cv2.waitKey(0)
    # cv2.destroyAllWindows()

    # find the contours in the edged image, keeping only the
    # largest ones, and initialize the screen contour
    cnts = cv2.findContours(edged.copy(), cv2.RETR_LIST,
                            cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if imutils.is_cv2() else cnts[1]
    cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:5]

    # loop over the contours
    screenCnt = None
    for c in cnts:
        # approximate the contour
        peri = cv2.arcLength(c, True)
        approx = cv2.approxPolyDP(c, 0.02 * peri, True)

        # if our approximated contour has four points, then we
        # can assume that we have found our screen
        if len(approx) == 4:
            screenCnt = approx
            break

    # show the contour (outline) of the piece of paper
    if screenCnt is None:
        print("Page not detected")
        return orig
    # print("STEP 2: Find contours of paper")
    # cv2.drawContours(image, [screenCnt], -1, (0, 255, 0), 2)
    # cv2.imshow("Outline", image)
    # cv2.waitKey(0)
    # cv2.destroyAllWindows()

    # apply the four point transform to obtain a top-down
    # view of the original image
    warped = four_point_transform(orig, screenCnt.reshape(4, 2) * ratio)

    # convert the warped image to grayscale, then threshold it
    # to give it that 'black and white' paper effect
    # warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
    # T = threshold_local(warped, 11, offset=10, method="gaussian")
    # warped = np.uint8(warped > T) * 255

    # show the original and scanned images
    # print("STEP 3: Apply perspective transform")
    # cv2.imshow("Original", imutils.resize(orig, height=650))
    # cv2.imshow("Scanned", imutils.resize(warped, height=650))
    # cv2.waitKey(0)
    return warped
    # print("STEP 2: Find contours of paper")
    # cv2.drawContours(image, [screenCnt], -1, (0, 255, 0), 2)
    # cv2.imshow("Outline", image)
    # cv2.waitKey(0)
    # cv2.destroyAllWindows()

    # apply the four point transform to obtain a top-down
    # view of the original image
    warped = four_point_transform(orig, screenCnt.reshape(4, 2) * ratio)

    # convert the warped image to grayscale, then threshold it
    # to give it that 'black and white' paper effect
    # warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
    # T = threshold_local(warped, 11, offset=10, method="gaussian")
    # warped = np.uint8(warped > T) * 255

    # show the original and scanned images
    # print("STEP 3: Apply perspective transform")
    # cv2.imshow("Original", imutils.resize(orig, height=650))
    # cv2.imshow("Scanned", imutils.resize(warped, height=650))
    # cv2.waitKey(0)
    return warped


if __name__ == "__main__":
    loc = "C:/Users/Abhishek Bansal/Desktop/Image Processing/Logo Identification/Logo2.jpg"
    image = cv2.imread(loc)
    utils.imshow(image)
    page = detectPage(image)
    page = utils.resize(page)
    utils.imshow(page)