Example #1
0
def preprocess_train(datapath, processedpath):
    os.mkdir(processedpath)
    os.mkdir(f'{processedpath}/image')
    os.mkdir(f'{processedpath}/label')

    idxs = range(len(os.listdir(f'{datapath}/image/')))
    n = len(idxs)
    for i, idx in enumerate(idxs):
        sys.stdout.write(f'\rProcessing...{i+1}/{n}')
        sys.stdout.flush()
        img = imread(f'{datapath}/image/{idx}.tif')

        lung_mask = preprocess_helpers.get_lung_mask(img).astype('float')
        if str(idx) + '.tif' in os.listdir('data/special_train_masks'):
            lung_mask += imread(f'data/special_train_masks/{idx}.tif').astype(
                'float')
            lung_mask = np.clip(lung_mask, 0, 1)
        lung_mask = preprocess_helpers.resize(lung_mask)
        if lung_mask.sum() == 0:
            sys.stdout.write(
                f'\rEmpty lung field returned for image {idx}. Skipping\n')
            continue
        img = preprocess_helpers.normalize(img)
        img = preprocess_helpers.resize(img)
        img = img * lung_mask
        pil_im = Image.fromarray(img)
        enhancer = ImageEnhance.Contrast(pil_im)
        enhanced_im = enhancer.enhance(2.0)
        np_im = np.array(enhanced_im)
        imsave(f'{processedpath}/image/{idx}.tif', np_im)

        mask = imread(f'{datapath}/label/{idx}.tif')
        mask = preprocess_helpers.resize(mask)
        imsave(f'{processedpath}/label/{idx}.tif', mask)
    print(f'\nComplete.')
def preprocess(datapath, processedpath):
    os.mkdir(processedpath)
    for i in range(8):
        os.mkdir(f'{processedpath}/image{i}')
        os.mkdir(f'{processedpath}/label{i}')

    idxs = os.listdir(f'{datapath}')
    n = len(idxs)
    for i, idx in enumerate(idxs):
        sys.stdout.write(f'\rProcessing...{i+1}/{n}')
        sys.stdout.flush()
        empty_found = False
        image, mask, spacing, thickness = pickle.load(
            open(f'data/extracted/{idx}', 'rb')
        )

        processed_lungmasks = []
        processed_image = []
        processed_mask = []

        for j in range(len(image)):
            if empty_found:
                continue
            img = image[j]
            processed_lungmasks.append(preprocess_helpers.get_lung_mask(img))
            processed_image.append(img)
            processed_mask.append(mask[j]*100)

        lung_mask = max(processed_lungmasks, key=lambda x: x.sum())
        image = resample(
            np.array(processed_image), (spacing, spacing), thickness
        )
        mask = resample(
            np.array(processed_mask), (spacing, spacing), thickness
        )
        mask = np.clip(mask, 0, 1)

        for k in range(8):
            im = preprocess_helpers.normalize(image[k])
            im = preprocess_helpers.resize(im)
            im = im*preprocess_helpers.resize(lung_mask)
            pil_im = Image.fromarray(im)
            enhancer = ImageEnhance.Contrast(pil_im)
            enhanced_im = enhancer.enhance(2.0)
            np_im = np.array(enhanced_im)

            mk = preprocess_helpers.resize(mask[k])
            imsave(f'{processedpath}/image{k}/{i}.tif', np_im)
            imsave(f'{processedpath}/label{k}/{i}.tif', mk.astype(np.int32))

    print(f'\nComplete.')
def preprocess_img(img, special=False, manual_lung_mask=None):
    lung_mask = get_lung_mask(img).astype('float')
    if special:
        lung_mask += manual_lung_mask
        lung_mask = np.clip(lung_mask, 0, 1)

    lung_mask = resize(lung_mask)
    img = normalize(img)
    img = resize(img)
    img = img * lung_mask
    pil_im = Image.fromarray(img)
    enhancer = ImageEnhance.Contrast(pil_im)
    enhanced_im = enhancer.enhance(2.0)
    return np.array(enhanced_im)
Example #4
0
def preprocess_img(img, special=0, custom_lung_mask=None):
    """
    Preprocessing pipeline for individual images. Applies lung field
    segmentation, resizes, normalizes and enhances constrast

    :param img: input image
    :param special: whether or not the image has a difficult/custom lung mask
    :param custom_lung_mask: custom lung mask (if special != 0)
    """
    lung_mask = get_lung_mask(img).astype('float')
    if special == 1:
        lung_mask += custom_lung_mask
        lung_mask = np.clip(lung_mask, 0, 1)
    if special == 2:
        lung_mask = custom_lung_mask

    lung_mask = resize(lung_mask)
    img = normalize(img)
    img = resize(img)
    img = img * lung_mask
    pil_im = Image.fromarray(img)
    enhancer = ImageEnhance.Contrast(pil_im)
    enhanced_im = enhancer.enhance(2.0)
    return np.array(enhanced_im)
def get_rois(extracted_path, processed_path, roi_2d_path, roi_3d_path, model):
    """
    Given a model and original images, predicts and saves the 2D (50 x 50) and
    3D (50 x 50 x 50) ROI for the nodule

    :param extracted_path: path to the original (unprocessed) CT image slice
    :param processed_path: path to the processed CT image slice
    :param roi_path: path to save predicted ROI output
    :param model: pretrained model for nodule segementation
    """
    os.mkdir(roi_2d_path)
    os.mkdir(roi_3d_path)
    pids = os.listdir(processed_path)
    n = len(pids)
    Extracted = {}
    for i, pid in enumerate(pids):
        max_area = 0
        perimeter = 0
        diameter = 0
        atten = 0
        sys.stdout.write(f"\rGetting ROIs...{i+1}/{n}")
        sys.stdout.flush()
        for im_path in os.listdir(processed_path + '/' + str(pid)):
            x = imread(os.path.join(processed_path + '/' + str(pid),
                                    im_path)).reshape(1, 256, 256, 1) / 255

            # reshape from (256, 256, 1)
            nodule_pred = model.predict(x).reshape(256, 256)
            max_area = max(max_area, nodule_pred.sum())
            try:
                # 2d
                mins, maxs = get_most_activated_roi(nodule_pred)
                xmin, ymin = mins
                xmax, ymax = maxs
                predicted_roi = resize(
                    normalize(
                        imread(
                            os.path.join(extracted_path + '/' + str(pid),
                                         im_path))))[xmin:xmax][:, ymin:ymax]
                if not os.path.isdir(roi_2d_path + '/' + str(pid)):
                    os.mkdir(roi_2d_path + '/' + str(pid))
                imsave(f"{roi_2d_path}/{pid}/{im_path}", predicted_roi)

                # 3d
                inpath = f'data/nlst_extracted_3d/{pid}/{im_path[:-4]}.pkl'
                with open(inpath, "rb") as input_file:
                    cube = pkl.load(input_file)
                new_dims = cube.shape[1]
                nodule_pred = cv.resize(nodule_pred,
                                        dsize=(new_dims, new_dims))
                mins, maxs = get_most_activated_roi(nodule_pred)
                xmin, ymin = mins
                xmax, ymax = maxs
                predicted_roi = np.array(
                    [normalize(slc)[xmin:xmax][:, ymin:ymax] for slc in cube])
                if not os.path.isdir(roi_3d_path + '/' + str(pid)):
                    os.mkdir(roi_3d_path + '/' + str(pid))
                outpath = f"{roi_3d_path}/{pid}/{im_path[:-4]}.pkl"
                pkl.dump(predicted_roi, open(outpath, "wb"))

                new_pred = get_most_activated_roi_mask(nodule_pred, new_dims)
                new_pred[new_pred > .1] = 255
                new_pred[new_pred < .1] = 0
                new_pred = new_pred.reshape(new_dims, new_dims).astype('uint8')
                if max_area < new_pred.sum():
                    max_area = new_pred.sum() / 255
                    perimeter = cv.Canny(new_pred, 100, 200).sum() / 255
                    coords = np.argwhere(new_pred > 0)
                    diameter = 0
                    for i, (x1, y1) in enumerate(coords):
                        for x2, y2 in coords[i + 1:]:
                            diameter = max(
                                diameter, np.sqrt((x1 - x2)**2 + (y1 - y2)**2))
                    atten = (cube[int(len(cube) / 2)] * new_pred).mean()

            except (ValueError, IndexError):
                sys.stdout.write(f"\nNo predicted ROI for {pid} {im_path}\n")
                pass
        Extracted[pid] = max_area, perimeter, diameter, atten
    pkl.dump(Extracted, open('data/geometric_data.pkl', "wb"))
    print(f"\nComplete.")