Esempio n. 1
0
    def __call__(self, input):
        with slim.arg_scope(
            [slim.conv2d, slim.conv2d_transpose],
                # Only backprop this network if trainable
                trainable=True,
                # He (aka MSRA) weight initialization
                weights_initializer=slim.variance_scaling_initializer(),
                activation_fn=LeakyReLU,
                # We will do our own padding to match the original Caffe code
                padding='VALID'):

            weights_regularizer = slim.l2_regularizer(0.0004)
            with slim.arg_scope([slim.conv2d],
                                weights_regularizer=weights_regularizer):
                with slim.arg_scope([slim.conv2d], stride=2):
                    conv_a_1 = slim.conv2d(pad(input, 3),
                                           64,
                                           7,
                                           scope='FlowNetC/conv1',
                                           reuse=self.reuse)
                    conv_a_2 = slim.conv2d(pad(conv_a_1, 2),
                                           128,
                                           5,
                                           scope='FlowNetC/conv2',
                                           reuse=self.reuse)
                    conv_a_3 = slim.conv2d(pad(conv_a_2, 2),
                                           256,
                                           5,
                                           scope='FlowNetC/conv3',
                                           reuse=self.reuse)

        return {'conv_1': conv_a_1, 'conv_2': conv_a_2, 'conv_3': conv_a_3}
def patch_based_trimming(image, trimap, minDist, maxDist, windowRadius, K):
    is_fg = trimap > 0.8
    is_bg = trimap < 0.2
    is_known = np.logical_or(is_fg, is_bg)
    is_unknown = np.logical_not(is_known)

    trimap = trimap.copy()
    height, width, depth = image.shape

    eps = 1e-8

    # shape: h w 3
    means = make_windows(pad(image)).mean(axis=2)
    # shape: h w 9 3
    centered_neighbors = make_windows(pad(image)) - means.reshape(height, width, 1, depth)
    # shape: h w 3 3
    covariance = mul_matT_mat(centered_neighbors, centered_neighbors) / (3 * 3) + eps / (3 * 3) * np.eye(3, 3)

    unk_ind, fg_neigh = find_non_local_neighbors(means, K, None, is_unknown, is_fg)
    _, bg_neigh = find_non_local_neighbors(means, K, None, is_unknown, is_bg)

    mean_image = means.transpose(0, 1, 2).reshape(height * width, depth)

    covariance = covariance.transpose(0, 1, 2, 3).reshape(width * height, 3, 3)

    pix_means = mean_image[unk_ind]
    pix_covars = covariance[unk_ind]
    pix_dets = np.linalg.det(pix_covars)
    pix_covars = pix_covars.reshape(unk_ind.shape[0], 1, 3, 3)

    n_means = mean_image[fg_neigh] - pix_means.reshape(unk_ind.shape[0], 1, 3)
    n_covars = covariance[fg_neigh]
    n_dets = np.linalg.det(n_covars)
    n_covars = (pix_covars + n_covars) / 2

    fg_bhatt = 0.125 * mul_vec_mat_vec(n_means, np.linalg.inv(n_covars), n_means) + 0.5 * np.log(
        np.linalg.det(n_covars) / np.sqrt(pix_dets[:, None] * n_dets))

    n_means = mean_image[bg_neigh] - pix_means.reshape(unk_ind.shape[0], 1, 3)
    n_covars = covariance[bg_neigh]
    n_dets = np.linalg.det(n_covars)
    n_covars = (pix_covars + n_covars) / 2

    bg_bhatt = 0.125 * mul_vec_mat_vec(n_means, np.linalg.inv(n_covars), n_means) + 0.5 * np.log(
        np.linalg.det(n_covars) / np.sqrt(pix_dets[:, None] * n_dets))

    shape = trimap.shape

    min_f_gdist = np.min(fg_bhatt, axis=1)
    min_b_gdist = np.min(bg_bhatt, axis=1)

    mask0 = np.logical_and(min_b_gdist < minDist, min_f_gdist > maxDist)
    mask1 = np.logical_and(min_f_gdist < minDist, min_b_gdist > maxDist)

    trimap[np.unravel_index(unk_ind[mask0], shape)] = 0
    trimap[np.unravel_index(unk_ind[mask1], shape)] = 1

    return trimap
def detect_rnet(im, dets, thresh):
    """通过rent选择box
        参数:
          im:输入图像
          dets:pnet选择的box,是相对原图的绝对坐标
        返回值:
          box绝对坐标
    """
    h, w, c = im.shape
    # 将pnet的box变成包含它的正方形,可以避免信息损失
    dets = convert_to_square(dets)
    dets[:, 0:4] = np.round(dets[:, 0:4])
    # 调整超出图像的box
    [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = pad(dets, w, h)
    delete_size = np.ones_like(tmpw) * 20
    ones = np.ones_like(tmpw)
    zeros = np.zeros_like(tmpw)
    num_boxes = np.sum(
        np.where((np.minimum(tmpw, tmph) >= delete_size), ones, zeros))
    cropped_ims = np.zeros((num_boxes, 3, 24, 24), dtype=np.float32)
    if int(num_boxes) == 0:
        print('P模型检测结果为空!')
        return None, None
    for i in range(int(num_boxes)):
        # 将pnet生成的box相对与原图进行裁剪,超出部分用0补
        if tmph[i] < 20 or tmpw[i] < 20:
            continue
        tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
        try:
            tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = im[y[i]:ey[i] + 1,
                                                            x[i]:ex[i] + 1, :]
            img = cv2.resize(tmp, (24, 24))
            img = img.transpose((2, 0, 1))
            img = (img - 127.5) / 128
            cropped_ims[i, :, :, :] = img
        except:
            continue
    cls_scores, reg = predict_rnet(cropped_ims)
    cls_scores = cls_scores[:, 1]
    keep_inds = np.where(cls_scores > thresh)[0]
    if len(keep_inds) > 0:
        boxes = dets[keep_inds]
        boxes[:, 4] = cls_scores[keep_inds]
        reg = reg[keep_inds]
    else:
        return None, None

    keep = py_nms(boxes, 0.6, mode='Union')
    boxes = boxes[keep]
    # 对pnet截取的图像的坐标进行校准,生成rnet的人脸框对于原图的绝对坐标
    boxes_c = calibrate_box(boxes, reg[keep])
    return boxes, boxes_c
def gen_label_samples(np_label, dist_samples, tile_size):
    h, w = np_label.shape
    for row in range(0, h, dist_samples):
        for column in range(0, w, dist_samples):
            target = np_label[row:row + tile_size, column:column + tile_size]
            target_row = target.shape[0]
            target_col = target.shape[1]
            if target_row < tile_size or target_col < tile_size:
                padding = pad_diff(
                    target_row, target_col, tile_size, tile_size
                )  # array, actual height, actual width, desired size
                target = pad(target, padding, fill=-1)
            indices = (row, column)
            yield target, indices
Esempio n. 5
0
def detect_onet(im, dets, thresh):
    """将onet的选框继续筛选基本和rnet差不多但多返回了landmark"""
    h, w, c = im.shape
    dets = convert_to_square(dets)
    dets[:, 0:4] = np.round(dets[:, 0:4])
    [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = pad(dets, w, h)
    num_boxes = dets.shape[0]
    cropped_ims = np.zeros((num_boxes, 3, 48, 48), dtype=np.float32)
    for i in range(num_boxes):
        tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
        tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = im[y[i]:ey[i] + 1,
                                                        x[i]:ex[i] + 1, :]
        img = cv2.resize(tmp, (48, 48), interpolation=cv2.INTER_LINEAR)
        img = img.transpose((2, 0, 1))
        img = (img - 127.5) / 128
        cropped_ims[i, :, :, :] = img

    cls_scores, reg, landmark = predict_onet(cropped_ims)

    cls_scores = cls_scores[:, 1]
    keep_inds = np.where(cls_scores > thresh)[0]
    if len(keep_inds) > 0:
        boxes = dets[keep_inds]
        boxes[:, 4] = cls_scores[keep_inds]
        reg = reg[keep_inds]
        landmark = landmark[keep_inds]
    else:
        return None, None

    w = boxes[:, 2] - boxes[:, 0] + 1

    h = boxes[:, 3] - boxes[:, 1] + 1
    landmark[:, 0::2] = (np.tile(w, (5, 1)) * landmark[:, 0::2].T +
                         np.tile(boxes[:, 0], (5, 1)) - 1).T
    landmark[:, 1::2] = (np.tile(h, (5, 1)) * landmark[:, 1::2].T +
                         np.tile(boxes[:, 1], (5, 1)) - 1).T
    boxes_c = calibrate_box(boxes, reg)

    keep = py_nms(boxes_c, 0.6, mode='Minimum')
    boxes_c = boxes_c[keep]
    landmark = landmark[keep]
    return boxes_c, landmark
Esempio n. 6
0
def try_tensor_collate_fn(batch):
    """
    Try to batch the inputs and convert them into tensors, but fall back to lists.
    In practice this means that only numeric input will be tensors, and meta data like sql, db stuff and questions are kept as lists
    """
    output = {}

    for example in batch:
        for key in example:
            if key in output:
                output[key] += [example[key]]
            else:
                output[key] = [example[key]]

    for key in output:
        try:
            output[key] = torch.tensor(pad(output[key])[0])
        except:
            pass
    return output
def gen_img_samples(rst_pth, tile_size, dist_samples, *band_order):
    with rasterio.open(rst_pth) as src:
        for row in range(0, src.height, dist_samples):
            for column in range(0, src.width, dist_samples):
                window = Window.from_slices(slice(row, row + tile_size),
                                            slice(column, column + tile_size))
                if band_order:
                    window_array = reshape_as_image(
                        src.read(band_order[0], window=window))
                else:
                    window_array = reshape_as_image(src.read(window=window))

                if window_array.shape[0] < tile_size or window_array.shape[
                        1] < tile_size:
                    padding = pad_diff(window_array.shape[0],
                                       window_array.shape[1], tile_size,
                                       tile_size)
                    window_array = pad(window_array, padding, fill=np.nan)

                yield window_array
Esempio n. 8
0
    def __call__(self, sample):
        """
        Args:
            sample (ndarray): Image to be cropped.

        Returns:
            ndarray: Cropped image.
        """
        sat_img = sample['sat_img']
        map_img = sample['map_img']

        if self.padding is not None:
            sat_img = pad(sat_img, self.padding,
                          np.nan)  # Pad with nan values for sat_img
            map_img = pad(
                map_img, self.padding,
                self.ignore_index)  # Pad with dontcare values for map_img

        # pad the height if needed
        if self.pad_if_needed and sat_img.shape[0] < self.size[0]:
            sat_img = pad(sat_img, (0, self.size[0] - sat_img.shape[0]),
                          np.nan)
        # pad the width if needed
        if self.pad_if_needed and sat_img.shape[1] < self.size[1]:
            sample = pad(sat_img, (self.size[1] - sat_img.shape[1], 0), np.nan)

        # pad the height if needed
        if self.pad_if_needed and map_img.shape[0] < self.size[0]:
            map_img = pad(map_img, (0, self.size[0] - map_img.shape[0]),
                          self.ignore_index)
        # pad the width if needed
        if self.pad_if_needed and map_img.shape[1] < self.size[1]:
            map_img = pad(map_img, (self.size[1] - map_img.shape[1], 0),
                          self.ignore_index)

        i, j, h, w = self.get_params(sat_img, self.size)

        sat_img = sat_img[i:i + h, j:j + w]
        map_img = map_img[i:i + h, j:j + w]

        sample['sat_img'] = sat_img
        sample['map_img'] = map_img
        return sample
Esempio n. 9
0
def samples_preparation(in_img_array,
                        label_array,
                        sample_size,
                        overlap,
                        samples_count,
                        num_classes,
                        samples_file,
                        val_percent,
                        val_sample_file,
                        dataset,
                        pixel_classes,
                        image_metadata=None,
                        dontcare=0,
                        min_annot_perc=None,
                        class_prop=None):
    """
    Extract and write samples from input image and reference image
    :param in_img_array: numpy array of the input image
    :param label_array: numpy array of the annotation image
    :param sample_size: (int) Size (in pixel) of the samples to create # TODO: could there be a different sample size for tst dataset? shows results closer to inference
    :param overlap: (int) Desired overlap between samples in %
    :param samples_count: (dict) Current number of samples created (will be appended and return)
    :param num_classes: (dict) Number of classes in reference data (will be appended and return)
    :param samples_file: (hdf5 dataset) hdfs file where samples will be written
    :param val_percent: (int) percentage of validation samples
    :param val_sample_file: (hdf5 dataset) hdfs file where samples will be written (val)
    :param dataset: (str) Type of dataset where the samples will be written. Can be 'trn' or 'val' or 'tst'
    :param pixel_classes: (dict) samples pixel statistics
    :param image_metadata: (dict) metadata associated to source raster
    :param dontcare: Value in gpkg features that will ignored during training
    :param min_annot_perc: optional, minimum annotated percent required for sample to be created
    :param class_prop: optional, minimal proportion of pixels for each class required for sample to be created
    :return: updated samples count and number of classes.
    """

    # read input and reference images as array
    h, w, num_bands = in_img_array.shape
    if dataset == 'trn':
        idx_samples = samples_count['trn']
        append_to_dataset(val_sample_file["metadata"], repr(image_metadata))
    elif dataset == 'tst':
        idx_samples = samples_count['tst']
    else:
        raise ValueError(
            f"Dataset value must be trn or tst. Provided value is {dataset}")

    idx_samples_v = samples_count['val']

    # Adds raster metadata to the dataset. All samples created by tiling below will point to that metadata by index
    metadata_idx = append_to_dataset(samples_file["metadata"],
                                     repr(image_metadata))

    if overlap > 25:
        warnings.warn(
            "high overlap >25%, note that automatic train/val split creates very similar samples in both sets"
        )
    dist_samples = round(sample_size * (1 - (overlap / 100)))
    added_samples = 0
    excl_samples = 0

    with tqdm(
            range(0, h, dist_samples),
            position=1,
            leave=True,
            desc=f'Writing samples. Dataset currently contains {idx_samples} '
            f'samples') as _tqdm:

        for row in _tqdm:
            for column in range(0, w, dist_samples):
                data = (in_img_array[row:row + sample_size,
                                     column:column + sample_size, :])
                target = np.squeeze(
                    label_array[row:row + sample_size,
                                column:column + sample_size, :],
                    axis=2)
                data_row = data.shape[0]
                data_col = data.shape[1]
                if data_row < sample_size or data_col < sample_size:
                    padding = pad_diff(
                        data_row, data_col, sample_size
                    )  # array, actual height, actual width, desired size
                    data = pad(
                        data, padding, fill=np.nan
                    )  # don't fill with 0 if possible. Creates false min value when scaling.

                target_row = target.shape[0]
                target_col = target.shape[1]
                if target_row < sample_size or target_col < sample_size:
                    padding = pad_diff(
                        target_row, target_col, sample_size
                    )  # array, actual height, actual width, desired size
                    target = pad(target, padding, fill=dontcare)
                u, count = np.unique(target, return_counts=True)
                target_background_percent = round(
                    count[0] / np.sum(count) * 100 if 0 in u else 0, 1)

                sample_metadata = {'sample_indices': (row, column)}

                val = False
                if minimum_annotated_percent(target_background_percent, min_annot_perc) and \
                        class_proportion(target, sample_size, class_prop):
                    val = add_to_datasets(dataset=dataset,
                                          samples_file=samples_file,
                                          val_percent=val_percent,
                                          val_sample_file=val_sample_file,
                                          data=data,
                                          target=target,
                                          sample_metadata=sample_metadata,
                                          metadata_idx=metadata_idx,
                                          dict_classes=pixel_classes)
                    if val:
                        idx_samples_v += 1
                    else:
                        idx_samples += 1
                    added_samples += 1
                else:
                    excl_samples += 1

                target_class_num = np.max(u)
                if num_classes < target_class_num:
                    num_classes = target_class_num

                final_dataset = 'val' if val else dataset
                _tqdm.set_postfix(
                    Dataset=final_dataset,
                    Excld_samples=excl_samples,
                    Added_samples=
                    f'{added_samples}/{len(_tqdm) * len(range(0, w, dist_samples))}',
                    Target_annot_perc=100 - target_background_percent)

    assert added_samples > 0, "No sample added for current raster. Problems may occur with use of metadata"
    if dataset == 'tst':
        samples_count['tst'] = idx_samples
    else:
        samples_count['trn'] = idx_samples
        samples_count['val'] = idx_samples_v
    # return the appended samples count and number of classes.
    return samples_count, num_classes
Esempio n. 10
0
def segmentation_with_smoothing(raster, clip_gpkg, model, sample_size, overlap,
                                num_bands, device):
    # switch to evaluate mode
    model.eval()
    img_array, input_image, dataset_nodata = image_reader_as_array(
        input_image=raster, clip_gpkg=clip_gpkg)
    metadata = add_metadata_from_raster_to_sample(img_array,
                                                  input_image,
                                                  meta_map=None,
                                                  raster_info=None)
    h, w, bands = img_array.shape
    assert num_bands <= bands, f"Num of specified bands is not compatible with image shape {img_array.shape}"
    if num_bands < bands:
        img_array = img_array[:, :, :num_bands]

    padding = int(round(sample_size * (1 - 1.0 / overlap)))
    padded_img = pad(img_array, padding=padding, fill=0)
    WINDOW_SPLINE_2D = _window_2D(window_size=sample_size, power=1)
    WINDOW_SPLINE_2D = np.moveaxis(WINDOW_SPLINE_2D, 2, 0)
    step = int(sample_size / overlap)
    h_, w_ = padded_img.shape[:2]
    pred_img = np.empty((h_, w_), dtype=np.uint8)
    for row in tqdm(range(0, h_ - sample_size + 1, step),
                    position=1,
                    leave=False,
                    desc='Inferring rows'):
        with tqdm(range(0, w_ - sample_size + 1, step),
                  position=2,
                  leave=False,
                  desc='Inferring columns') as _tqdm:
            for col in _tqdm:
                sample = {'sat_img': None, 'metadata': None}
                sample['metadata'] = metadata
                totensor_transform = augmentation.compose_transforms(
                    params, dataset="tst", type='totensor')
                sub_images = padded_img[row:row + sample_size,
                                        col:col + sample_size, :]
                sample['sat_img'] = sub_images
                sample = totensor_transform(sample)
                inputs = sample['sat_img'].unsqueeze_(0)
                inputs = inputs.to(device)

                if inputs.shape[1] == 4 and any(
                        "module.modelNIR" in s
                        for s in model.state_dict().keys()):
                    ############################
                    # Test Implementation of the NIR
                    ############################
                    # Init NIR   TODO: make a proper way to read the NIR channel
                    #                  and put an option to be able to give the idex of the NIR channel
                    # Extract the NIR channel -> [batch size, H, W] since it's only one channel
                    inputs_NIR = inputs[:, -1, ...]
                    # add a channel to get the good size -> [:, 1, :, :]
                    inputs_NIR.unsqueeze_(1)
                    # take out the NIR channel and take only the RGB for the inputs
                    inputs = inputs[:, :-1, ...]
                    # Suggestion of implementation
                    #inputs_NIR = data['NIR'].to(device)
                    inputs = [inputs, inputs_NIR]
                    #outputs = model(inputs, inputs_NIR)
                    ############################
                    # End of the test implementation module
                    ############################

                outputs = model(inputs)
                # torchvision models give output in 'out' key.
                # May cause problems in future versions of torchvision.
                if isinstance(outputs,
                              OrderedDict) and 'out' in outputs.keys():
                    outputs = outputs['out']
                outputs = F.softmax(outputs,
                                    dim=1).squeeze(dim=0).cpu().numpy()
                outputs = WINDOW_SPLINE_2D * outputs
                outputs = outputs.argmax(axis=0)
                pred_img[row:row + sample_size,
                         col:col + sample_size] = outputs
    pred_img = pred_img[padding:-padding, padding:-padding]
    return pred_img[:h, :w]
Esempio n. 11
0
def segmentation(raster, clip_gpkg, model, sample_size, num_bands, device):
    # switch to evaluate mode
    model.eval()
    img_array, input_image, dataset_nodata = image_reader_as_array(
        input_image=raster, clip_gpkg=clip_gpkg)
    metadata = add_metadata_from_raster_to_sample(img_array,
                                                  input_image,
                                                  meta_map=None,
                                                  raster_info=None)
    h, w, bands = img_array.shape
    assert num_bands <= bands, f"Num of specified bands is not compatible with image shape {img_array.shape}"
    if num_bands < bands:
        img_array = img_array[:, :, :num_bands]
    h_ = sample_size * math.ceil(h / sample_size)
    w_ = sample_size * math.ceil(w / sample_size)
    pred_img = np.empty((h_, w_), dtype=np.uint8)
    for row in tqdm(range(0, h, sample_size),
                    position=1,
                    leave=False,
                    desc='Inferring rows'):
        with tqdm(range(0, w, sample_size),
                  position=2,
                  leave=False,
                  desc='Inferring columns') as _tqdm:
            for column in _tqdm:
                sample = {'sat_img': None, 'metadata': None}
                sample['metadata'] = metadata
                totensor_transform = augmentation.compose_transforms(
                    params, dataset="tst", type='totensor')
                sub_images = img_array[row:row + sample_size,
                                       column:column + sample_size, :]
                sub_images_row = sub_images.shape[0]
                sub_images_col = sub_images.shape[1]

                if sub_images_row < sample_size or sub_images_col < sample_size:
                    padding = pad_diff(actual_height=sub_images_row,
                                       actual_width=sub_images_col,
                                       desired_shape=sample_size)
                    sub_images = pad(
                        sub_images, padding, fill=0
                    )  # FIXME combine pad and pad_diff into one function
                sample['sat_img'] = sub_images
                sample = totensor_transform(sample)
                inputs = sample['sat_img'].unsqueeze_(0)
                inputs = inputs.to(device)

                if inputs.shape[1] == 4 and any(
                        "module.modelNIR" in s
                        for s in model.state_dict().keys()):
                    ############################
                    # Test Implementation of the NIR
                    ############################
                    # Init NIR   TODO: make a proper way to read the NIR channel
                    #                  and put an option to be able to give the idex of the NIR channel
                    # Extract the NIR channel -> [batch size, H, W] since it's only one channel
                    inputs_NIR = inputs[:, -1, ...]
                    # add a channel to get the good size -> [:, 1, :, :]
                    inputs_NIR.unsqueeze_(1)
                    # take out the NIR channel and take only the RGB for the inputs
                    inputs = inputs[:, :-1, ...]
                    # Suggestion of implementation
                    #inputs_NIR = data['NIR'].to(device)
                    inputs = [inputs, inputs_NIR]
                    #outputs = model(inputs, inputs_NIR)
                    ############################
                    # End of the test implementation module
                    ############################

                outputs = model(inputs)
                # torchvision models give output in 'out' key. May cause problems in future versions of torchvision.
                if isinstance(outputs,
                              OrderedDict) and 'out' in outputs.keys():
                    outputs = outputs['out']
                outputs = F.softmax(
                    outputs, dim=1).argmax(dim=1).squeeze(dim=0).cpu().numpy()

                pred_img[row:row + sample_size,
                         column:column + sample_size] = outputs

    return pred_img[:h, :w]
Esempio n. 12
0
    def __call__(self, featureA, featureB):
        conv_a_2 = featureA['conv_2']
        conv_a_3 = featureA['conv_3']
        conv_b_3 = featureB['conv_3']
        # print(conv_a_3.get_shape().as_list())
        # print(conv_b_3.get_shape().as_list())
        with slim.arg_scope(
            [slim.conv2d, slim.conv2d_transpose],
                # Only backprop this network if trainable
                trainable=True,
                # He (aka MSRA) weight initialization
                weights_initializer=slim.variance_scaling_initializer(),
                activation_fn=LeakyReLU,
                # We will do our own padding to match the original Caffe code
                padding='VALID'):

            cc = correlation(conv_a_3, conv_b_3, 1, 20, 1, 2, 20)
            cc_relu = LeakyReLU(cc)

            # Combine cross correlation results with convolution of feature map A
            net_conv = slim.conv2d(conv_a_3,
                                   32,
                                   1,
                                   scope='FlowNetC/conv_redir',
                                   reuse=self.reuse)
            # Concatenate along the channels axis
            net = tf.concat([net_conv, cc_relu], axis=3)

            conv3_1 = slim.conv2d(pad(net),
                                  256,
                                  3,
                                  scope='FlowNetC/conv3_1',
                                  reuse=self.reuse)
            # print(conv3_1.get_shape().as_list())
            with slim.arg_scope([slim.conv2d],
                                num_outputs=512,
                                kernel_size=3,
                                reuse=self.reuse):
                conv4 = slim.conv2d(pad(conv3_1),
                                    stride=2,
                                    scope='FlowNetC/conv4')

                conv4_1 = slim.conv2d(pad(conv4), scope='FlowNetC/conv4_1')

                conv5 = slim.conv2d(pad(conv4_1),
                                    stride=2,
                                    scope='FlowNetC/conv5')
                conv5_1 = slim.conv2d(pad(conv5), scope='FlowNetC/conv5_1')

            conv6 = slim.conv2d(pad(conv5_1),
                                1024,
                                3,
                                stride=2,
                                scope='FlowNetC/conv6',
                                reuse=self.reuse)
            # print(conv6.get_shape().as_list())
            conv6_1 = slim.conv2d(pad(conv6),
                                  1024,
                                  3,
                                  scope='FlowNetC/conv6_1',
                                  reuse=self.reuse)
            """ START: Refinement Network """
            with slim.arg_scope([slim.conv2d], reuse=self.reuse):
                with slim.arg_scope([slim.conv2d_transpose],
                                    biases_initializer=None,
                                    reuse=self.reuse):
                    predict_flow6 = slim.conv2d(pad(conv6_1),
                                                2,
                                                3,
                                                scope='FlowNetC/predict_flow6',
                                                activation_fn=None)
                    deconv5 = antipad(
                        slim.conv2d_transpose(conv6,
                                              512,
                                              4,
                                              stride=2,
                                              scope='FlowNetC/deconv5'))
                    upsample_flow6to5 = antipad(
                        slim.conv2d_transpose(
                            predict_flow6,
                            2,
                            4,
                            stride=2,
                            scope='FlowNetC/upsample_flow6to5',
                            activation_fn=None))
                    concat5 = tf.concat([conv5_1, deconv5, upsample_flow6to5],
                                        axis=3)

                    predict_flow5 = slim.conv2d(pad(concat5),
                                                2,
                                                3,
                                                scope='FlowNetC/predict_flow5',
                                                activation_fn=None)
                    deconv4 = antipad(
                        slim.conv2d_transpose(concat5,
                                              256,
                                              4,
                                              stride=2,
                                              scope='FlowNetC/deconv4'))
                    upsample_flow5to4 = antipad(
                        slim.conv2d_transpose(
                            predict_flow5,
                            2,
                            4,
                            stride=2,
                            scope='FlowNetC/upsample_flow5to4',
                            activation_fn=None))
                    concat4 = tf.concat([conv4_1, deconv4, upsample_flow5to4],
                                        axis=3)

                    predict_flow4 = slim.conv2d(pad(concat4),
                                                2,
                                                3,
                                                scope='FlowNetC/predict_flow4',
                                                activation_fn=None)
                    deconv3 = antipad(
                        slim.conv2d_transpose(concat4,
                                              128,
                                              4,
                                              stride=2,
                                              scope='FlowNetC/deconv3'))
                    upsample_flow4to3 = antipad(
                        slim.conv2d_transpose(
                            predict_flow4,
                            2,
                            4,
                            stride=2,
                            scope='FlowNetC/upsample_flow4to3',
                            activation_fn=None))
                    concat3 = tf.concat([conv3_1, deconv3, upsample_flow4to3],
                                        axis=3)

                    predict_flow3 = slim.conv2d(pad(concat3),
                                                2,
                                                3,
                                                scope='FlowNetC/predict_flow3',
                                                activation_fn=None)
                    deconv2 = antipad(
                        slim.conv2d_transpose(concat3,
                                              64,
                                              4,
                                              stride=2,
                                              scope='FlowNetC/deconv2'))
                    upsample_flow3to2 = antipad(
                        slim.conv2d_transpose(
                            predict_flow3,
                            2,
                            4,
                            stride=2,
                            scope='FlowNetC/upsample_flow3to2',
                            activation_fn=None))
                    concat2 = tf.concat([conv_a_2, deconv2, upsample_flow3to2],
                                        axis=3)

                    predict_flow2 = slim.conv2d(pad(concat2),
                                                2,
                                                3,
                                                scope='FlowNetC/predict_flow2',
                                                activation_fn=None)
            """ END: Refinement Network """

            flow = predict_flow2
            # TODO: Look at Accum (train) or Resample (deploy) to see if we need to do something different
            flow = tf.image.resize_bilinear(flow,
                                            tf.stack([self.height,
                                                      self.width]),
                                            align_corners=True)

        return {
            'predict_flows': [
                predict_flow6, predict_flow5, predict_flow4, predict_flow3,
                predict_flow2
            ],
            'flow':
            flow,
            'feature':
            conv3_1
        }
Esempio n. 13
0
def samples_preparation(in_img_array,
                        label_array,
                        sample_size,
                        overlap,
                        samples_count,
                        num_classes,
                        samples_file,
                        val_percent,
                        val_sample_file,
                        dataset,
                        pixel_classes,
                        dontcare,
                        image_metadata=None,
                        min_annot_perc=None,
                        class_prop=None,
                        stratd=None):
    """
    Extract and write samples from input image and reference image
    :param in_img_array: numpy array of the input image
    :param label_array: numpy array of the annotation image
    :param sample_size: (int) Size (in pixel) of the samples to create # TODO: could there be a different sample size for tst dataset? shows results closer to inference
    :param overlap: (int) Desired overlap between samples in %
    :param samples_count: (dict) Current number of samples created (will be appended and return)
    :param num_classes: (dict) Number of classes in reference data (will be appended and return)
    :param samples_file: (hdf5 dataset) hdfs file where samples will be written
    :param val_percent: (int) percentage of validation samples
    :param val_sample_file: (hdf5 dataset) hdfs file where samples will be written (val)
    :param dataset: (str) Type of dataset where the samples will be written. Can be 'trn' or 'val' or 'tst'
    :param pixel_classes: (dict) samples pixel statistics
    :param image_metadata: (dict) metadata associated to source raster
    :param dontcare: Value in gpkg features that will ignored during training
    :param min_annot_perc: optional, minimum annotated percent required for sample to be created
    :param class_prop: optional, minimal proportion of pixels for each class required for sample to be created
    :return: updated samples count and number of classes.
    """

    # read input and reference images as array
    h, w, num_bands = in_img_array.shape
    if dataset == 'trn':
        idx_samples = samples_count['trn']
        append_to_dataset(val_sample_file["metadata"], repr(image_metadata))
    elif dataset == 'tst':
        idx_samples = samples_count['tst']
    else:
        raise ValueError(
            f"Dataset value must be trn or tst. Provided value is {dataset}")

    idx_samples_v = samples_count['val']

    # Adds raster metadata to the dataset. All samples created by tiling below will point to that metadata by index
    metadata_idx = append_to_dataset(samples_file["metadata"],
                                     repr(image_metadata))

    if overlap > 25:
        logging.warning(
            "\nhigh overlap >25%, note that automatic train/val split creates very similar samples in both sets"
        )
    dist_samples = round(sample_size * (1 - (overlap / 100)))
    added_samples = 0
    excl_samples = 0

    # with tqdm(range(0, h, dist_samples), position=1, leave=True,
    #           desc=f'Writing samples. Dataset currently contains {idx_samples} '
    #                f'samples') as _tqdm:
    with tqdm(range(0, h, dist_samples), position=1, leave=True) as _tqdm:
        for row in _tqdm:
            for column in range(0, w, dist_samples):
                data = (in_img_array[row:row + sample_size,
                                     column:column + sample_size, :])
                target = np.squeeze(
                    label_array[row:row + sample_size,
                                column:column + sample_size, :],
                    axis=2)
                data_row = data.shape[0]
                data_col = data.shape[1]
                if data_row < sample_size or data_col < sample_size:
                    padding = pad_diff(
                        data_row,
                        data_col,
                        sample_size,
                        sample_size  # array, actual height, actual width, desired size
                    )
                    # don't fill with 0 if possible. Creates false min value when scaling.
                    data = pad(data, padding, fill=np.nan)

                target_row = target.shape[0]
                target_col = target.shape[1]
                if target_row < sample_size or target_col < sample_size:
                    padding = pad_diff(
                        target_row, target_col, sample_size, sample_size
                    )  # array, actual height, actual width, desired size
                    target = pad(target, padding, fill=dontcare)
                backgr_ct = np.sum(target == 0)
                backgr_ct += np.sum(target == dontcare)
                target_background_percent = round(
                    backgr_ct / target.size * 100, 1)

                sample_metadata = {'sample_indices': (row, column)}

                # Stratification bias
                if (stratd is not None) and (dataset == 'trn'):
                    tile_size = target.size
                    u, count = np.unique(target, return_counts=True)
                    tile_counts = {x: y for x, y in zip(u, count)}
                    tile_props = {x: y / tile_size for x, y in zip(u, count)}
                    for key in tile_props.keys():
                        if key not in stratd['trn']['total_counts']:
                            stratd['trn']['total_counts'][key] = 0
                        if key not in stratd['val']['total_counts']:
                            stratd['val']['total_counts'][key] = 0
                    if stratd['trn']['total_pixels'] == 0:
                        stratd['trn']['total_props'] = {
                            key: 0.0
                            for key in stratd['trn']['total_counts'].keys()
                        }
                    else:
                        stratd['trn']['total_props'] = {
                            key: val / stratd['trn']['total_pixels']
                            for key, val in stratd['trn']
                            ['total_counts'].items()
                        }
                    if stratd['val']['total_pixels'] == 0:
                        stratd['val']['total_props'] = {
                            key: 0.0
                            for key in stratd['val']['total_counts'].keys()
                        }
                    else:
                        stratd['val']['total_props'] = {
                            key: val / stratd['val']['total_pixels']
                            for key, val in stratd['val']
                            ['total_counts'].items()
                        }
                    distances_trn = {
                        key: np.abs(val - stratd['trn']['total_props'][key])
                        for key, val in tile_props.items()
                    }
                    distances_val = {
                        key: np.abs(val - stratd['val']['total_props'][key])
                        for key, val in tile_props.items()
                    }
                    dist_trn = np.mean(
                        np.array(list(distances_trn.values()))**2)
                    dist_val = np.mean(
                        np.array(list(distances_val.values()))**2)
                    dist = dist_val - dist_trn
                    stratification_bias = stratd['strat_factor'] * np.sign(
                        dist)
                else:
                    stratification_bias = 0.0

                val = False
                if minimum_annotated_percent(target_background_percent, min_annot_perc) and \
                        class_proportion(target, sample_size, class_prop):
                    val = add_to_datasets(
                        dataset=dataset,
                        samples_file=samples_file,
                        val_percent=val_percent,
                        val_sample_file=val_sample_file,
                        data=data,
                        target=target,
                        sample_metadata=sample_metadata,
                        metadata_idx=metadata_idx,
                        dict_classes=pixel_classes,
                        stratification_bias=stratification_bias,
                        stratification_dict=stratd)
                    if val:
                        idx_samples_v += 1
                    else:
                        idx_samples += 1
                    added_samples += 1

                    # Stratification update
                    if (stratd is not None) and (dataset == 'trn'):
                        for key, val in tile_counts.items():
                            stratd[stratd['latest_assignment']][
                                'total_counts'][key] += val
                        stratd[stratd['latest_assignment']][
                            'total_pixels'] += tile_size

                else:
                    excl_samples += 1

                target_class_num = np.max(target)
                if num_classes < target_class_num:
                    num_classes = target_class_num

                final_dataset = 'val' if val else dataset
                logging.debug(
                    f'Dset={final_dataset}, '
                    f'Added samps={added_samples}/{len(_tqdm) * len(range(0, w, dist_samples))}, '
                    f'Excld samps={excl_samples}/{len(_tqdm) * len(range(0, w, dist_samples))}, '
                    f'Target annot perc={100 - target_background_percent:.1f}')

    if added_samples == 0:
        logging.warning(
            f"No sample added for current raster. Problems may occur with use of metadata"
        )
    if dataset == 'tst':
        samples_count['tst'] = idx_samples
    else:
        samples_count['trn'] = idx_samples
        samples_count['val'] = idx_samples_v
    # return the appended samples count and number of classes.
    return samples_count, num_classes
Esempio n. 14
0
def local(image, trimap, window_radius=1, epsilon=1e-7):
    height, width, depth = image.shape
    n = height * width
    is_fg = trimap > 0.8
    is_bg = trimap < 0.2
    is_known = np.logical_or(is_fg, is_bg)
    is_unknown = np.logical_not(is_known)

    dil_unk = imdilate(is_unknown, window_radius)

    window_size = (2 * window_radius + 1)**2

    # shape: h w 3
    means = make_windows(pad(image)).mean(axis=2)
    # shape: h w 9 3
    centered_neighbors = make_windows(pad(image)) - means.reshape(
        height, width, 1, depth)
    # shape: h w 3 3
    covariance = mul_matT_mat(centered_neighbors,
                              centered_neighbors) / window_size

    inv_cov = np.linalg.inv(covariance + epsilon / window_size * np.eye(3, 3))

    indices = np.arange(width * height).reshape(height, width)
    neigh_ind = make_windows(indices)

    in_map = dil_unk[window_radius:-window_radius,
                     window_radius:-window_radius]

    neigh_ind = neigh_ind.reshape(-1, window_size)

    neigh_ind = neigh_ind[in_map.flatten()]

    in_ind = neigh_ind[:, window_size // 2]

    image = image.reshape(-1, 3)
    means = means.reshape(-1, 3)
    inv_cov = inv_cov.reshape(-1, 3, 3)

    centered_neighbors = image[neigh_ind] - means[in_ind].reshape(-1, 1, 3)

    weights = mul_mat_mat_matT(centered_neighbors, inv_cov[in_ind],
                               centered_neighbors)

    flow_cols = np.repeat(neigh_ind, window_size,
                          axis=1).reshape(-1, window_size, window_size)
    flow_rows = flow_cols.transpose(0, 2, 1)

    weights = (weights + 1) / window_size

    flow_rows = flow_rows.flatten()
    flow_cols = flow_cols.flatten()
    weights = weights.flatten()

    W = csc_matrix((weights, (flow_rows, flow_cols)), shape=(n, n))

    W = W + W.T

    W_row_sum = np.array(W.sum(axis=1)).flatten()
    W_row_sum[W_row_sum < 0.05] = 1.0

    return diags(1 / W_row_sum).dot(W)