Esempio n. 1
0
def create_chunks():
    """
    Creates gets unaugmented positive chunks and saves them to positive_no_aug.

    :return:
    """
    client = cloud.authenticate()
    bucket = client.get_bucket('elvos')

    # loop through every positive array on GCS -- no need to loop through
    #   negatives, as those are fine in their current state
    for in_blob in bucket.list_blobs(prefix='chunk_data/normal/positive'):

        # get the file id
        file_id = in_blob.name.split('/')[3]
        file_id = file_id.split('.')[0]

        logging.info(f'getting {file_id}')

        # copy region if it's the original image, not a rotation/reflection
        if file_id.endswith('_1'):
            arr = cloud.download_array(in_blob)
            logging.info(f'downloading {file_id}')
            cloud.save_chunks_to_cloud(arr, 'normal', 'positive_no_aug',
                                       file_id)
Esempio n. 2
0
def inspect_rois(annotations_df):
    """
    Sanity-check function to make sure that the ROIs we're getting actually
    contain occlusions in them.

    :param annotations_df: annotations
    :return:
    """
    client = cloud.authenticate()
    bucket = client.get_bucket('elvos')

    # loop through every array on GCS
    for in_blob in bucket.list_blobs(prefix='airflow/npy'):
        # if in_blob.name != 'airflow/npy/ZZX0ZNWG6Q9I18GK.npy':
        #     continue
        # blacklist
        if in_blob.name == 'airflow/npy/LAUIHISOEZIM5ILF.npy':
            continue

        # get the file id
        file_id = in_blob.name.split('/')[2]
        file_id = file_id.split('.')[0]

        logging.info(f'chunking {file_id}')
        # copy ROI if there's a positive match in the ROI annotations
        roi_df = annotations_df[annotations_df['patient_id'].str.match(
            file_id)]
        # if it's empty, this brain is ELVO negative
        if roi_df.empty:
            elvo_positive = False
        else:
            elvo_positive = True

        arr = cloud.download_array(in_blob)

        # if it's elvo positive
        if elvo_positive:
            chunks = []

            # get ROI location
            blue = int(len(arr) - roi_df['blue2'].iloc[0])
            green = int(roi_df['green1'].iloc[0])
            red = int(roi_df['red1'].iloc[0])
            chunks.append(arr[blue:blue + 32, green:green + 50, red:red + 50])
            chunks.append(arr[blue:blue + 32, red:red + 50, green:green + 50])

            # Loop through all relevant chunks and show the axial, coronal,
            #   and sagittal views to make sure there's an occlusion
            for chunk in chunks:
                axial = np.max(chunk, axis=0)
                coronal = np.max(chunk, axis=1)
                sag = np.max(chunk, axis=2)
                fig, ax = plt.subplots(1, 3, figsize=(6, 4))
                ax[0].imshow(axial, interpolation='none')
                ax[1].imshow(coronal, interpolation='none')
                ax[2].imshow(sag, interpolation='none')
                plt.show()
Esempio n. 3
0
def normal_mip():
    configure_logger()
    client = cloud.authenticate()
    bucket = client.get_bucket('elvos')

    # iterate through every source directory...
    for location in WHENCE:
        prefix = location + '/'
        logging.info(f"MIPing images from {prefix}")

        # get every blob
        for in_blob in bucket.list_blobs(prefix=prefix):
            # blacklist
            if in_blob.name == prefix + 'LAUIHISOEZIM5ILF.npy':
                continue

            file_id = in_blob.name.split('/')[2]
            file_id = file_id.split('.')[0]

            # perform the normal MIPing procedure
            logging.info(f'downloading {in_blob.name}')
            input_arr = cloud.download_array(in_blob)
            logging.info(f"blob shape: {input_arr.shape}")

            # if it's a failure analysis scan, do the failure analysis MIP
            if file_id in FAILURE_ANALYSIS:
                if location == 'numpy/axial':
                    cropped_arr = transforms.crop_normal_axial_fa(input_arr,
                                                                  location)
            # otherwise just do a normal MIP
            else:
                if location == 'numpy/axial':
                    cropped_arr = transforms.crop_normal_axial(input_arr,
                                                               location)
                else:
                    cropped_arr = transforms.crop_normal_coronal(input_arr,
                                                                 location)

            # remove extremes
            not_extreme_arr = transforms.remove_extremes(cropped_arr)
            logging.info(f'removed array extremes')

            # MIP array
            mip_arr = transforms.mip_normal(not_extreme_arr)

            # OPTIONAL: visualize MIP
            # plt.figure(figsize=(6, 6))
            # plt.imshow(mip_arr, interpolation='none')
            # plt.show()

            # save to cloud
            cloud.save_npy_to_cloud(mip_arr, file_id, location, 'normal')
Esempio n. 4
0
def clean_data():
    """
    Deletes everything in chunk_data/normal/positive_no_aug

    :return:
    """
    configure_logger()
    client = cloud.authenticate()
    bucket = client.get_bucket('elvos')
    prefix = "chunk_data/normal/positive_no_aug"
    logging.info(f"cleaning: deleting positive chunks from {prefix}")

    # delete everything
    for in_blob in bucket.list_blobs(prefix=prefix):
        in_blob.delete()
def multichannel_mip():
    configure_logger()
    client = cloud.authenticate()
    bucket = client.get_bucket('elvos')

    # iterate through every source directory...
    for location in WHENCE:
        prefix = location + '/'
        logging.info(f"MIPing images from {prefix}")

        for in_blob in bucket.list_blobs(prefix=prefix):
            # blacklist
            if in_blob.name == prefix + 'LAUIHISOEZIM5ILF.npy':
                continue

            file_id = in_blob.name.split('/')[2]
            file_id = file_id.split('.')[0]

            # perform the normal MIPing procedure
            logging.info(f'downloading {in_blob.name}')
            input_arr = cloud.download_array(in_blob)
            logging.info(f"blob shape: {input_arr.shape}")
            if file_id in FAILURE_ANALYSIS:
                if location == 'numpy/axial':
                    cropped_arr = \
                        transforms.crop_multichannel_axial_fa(input_arr,
                                                              location)
            else:
                if location == 'numpy/axial':
                    cropped_arr = transforms.crop_multichannel_axial(
                        input_arr, location)
                else:
                    cropped_arr = transforms.crop_multichannel_coronal(
                        input_arr)
            not_extreme_arr = transforms.segment_vessels(cropped_arr)
            logging.info(f'removed array extremes')
            mip_arr = transforms.mip_multichannel(not_extreme_arr)
            # plt.figure(figsize=(6, 6))
            # plt.imshow(mip_arr[1], interpolation='none')
            # plt.show()

            # save to the numpy generator source directory
            cloud.save_segmented_npy_to_cloud(mip_arr, file_id, location,
                                              'multichannel')
def axial_to_coronal_and_sagittal():
    configure_logger()
    client = cloud.authenticate()
    bucket = client.get_bucket('elvos')

    # for every axial scan
    for in_blob in bucket.list_blobs(prefix='numpy/axial'):

        # blacklist
        if in_blob.name == 'numpy/LAUIHISOEZIM5ILF.npy':
            continue
        elif in_blob.name == 'numpy/ALOUY4SF3BQKXQCZ.npy':
            continue
        elif in_blob.name == 'numpy/ABPO2BORDNF3OVL3.npy':
            continue

        # download, then transpose, then flip it to orient it correctly
        logging.info(f'downloading {in_blob.name}')
        axial = cloud.download_array(in_blob)
        coronal = np.transpose(axial, (1, 0, 2))
        coronal = np.fliplr(coronal)
        sagittal = np.transpose(axial, (2, 0, 1))
        sagittal = np.fliplr(sagittal)

        file_id = in_blob.name.split('/')[1]
        file_id = file_id.split('.')[0]

        try:
            # save files to GCS
            coronal_io = file_io.FileIO(
                f'gs://elvos/numpy/coronal/'
                f'{file_id}.npy', 'w')
            np.save(coronal_io, coronal)
            sagittal_io = file_io.FileIO(
                f'gs://elvos/numpy/sagittal/'
                f'{file_id}.npy', 'w')
            np.save(sagittal_io, sagittal)
            coronal_io.close()
            sagittal_io.close()

        except Exception as e:
            logging.error(f'for patient ID: {file_id} {e}')
            break
        logging.info(f'saved .npy file to cloud')
Esempio n. 7
0
def clean_old_data():
    """
    Removes old upsampled positives.

    :return:
    """
    client = cloud.authenticate()
    bucket = client.get_bucket('elvos')
    prefix = "chunk_data/normal/positive"

    # iterate through all blobs in the bucket
    for in_blob in bucket.list_blobs(prefix=prefix):
        logging.info(f'downloading {in_blob.name}')
        file_id = in_blob.name.split('/')[-1]
        file_id = file_id.split('.')[0]

        # delete it if it has an underscore in it
        if '_' in file_id:
            in_blob.delete()
Esempio n. 8
0
def clean_new_data():
    """
    Removes non-upsampled positives

    :return:
    """
    client = cloud.authenticate()
    bucket = client.get_bucket('elvos')
    prefix = "chunk_data/filtered/positive"
    logging.info(f"transforming positive chunks from {prefix}")

    # iterate through all blobs in the bucket
    for in_blob in bucket.list_blobs(prefix=prefix):
        logging.info(f'downloading {in_blob.name}')
        file_id = in_blob.name.split('/')[-1]
        file_id = file_id.split('.')[0]

        # if there's no underscore, delete the blob
        if '_' in file_id:
            continue
        in_blob.delete()
Esempio n. 9
0
def transform_positives():
    """
    Script that actually transforms and upsamples all the positives.

    :return:
    """
    client = cloud.authenticate()
    bucket = client.get_bucket('elvos')
    prefix = "chunk_data/filtered/positive"
    logging.info(f"transforming positive chunks from {prefix}")

    # for each blob in chunk_data/filtered/positive
    for in_blob in bucket.list_blobs(prefix=prefix):
        file_id = in_blob.name.split('/')[3]
        file_id = file_id.split('.')[0]

        # download chunk
        logging.info(f'downloading {in_blob.name}')
        input_arr = cloud.download_array(in_blob)
        logging.info(f"blob shape: {input_arr.shape}")

        # upsample chunk
        transform_one(input_arr, file_id)
Esempio n. 10
0
def create_chunks(annotations_df: pd.DataFrame):
    """
    Process and save actual chunks based off of the previously derived
    annotations.

    :param annotations_df: annotations with where the actual occlusion is
    :return:
    """
    client = cloud.authenticate()
    bucket = client.get_bucket('elvos')

    # loop through every array on GCS
    for in_blob in bucket.list_blobs(prefix='airflow/npy'):
        # blacklist
        if in_blob.name == 'airflow/npy/LAUIHISOEZIM5ILF.npy':
            continue

        # get the file id
        file_id = in_blob.name.split('/')[2]
        file_id = file_id.split('.')[0]

        print(f'chunking {file_id}')
        # copy ROI if there's a positive match in the ROI annotations
        roi_df = annotations_df[annotations_df['patient_id'].str.match(
            file_id)]
        # if it's empty, this brain is ELVO negative
        if roi_df.empty:
            elvo_positive = False
        else:
            elvo_positive = True

        arr = cloud.download_array(in_blob)
        rois = []
        centers = []

        # if it's elvo positive
        if elvo_positive:
            # iterate through every occlusion this patient has
            for row in roi_df.itertuples():
                """
                row[0] = index
                row[1] = patient ID
                row[2] = red1
                row[3] = red2
                row[4] = green1
                row[5] = green2
                row[6] = blue1
                row[7] = blue2
                """
                # append the lowest-valued corner of the ROI to rois
                rois.append((int(len(arr) - row[7]), int(row[4]), int(row[2])))

                # append the center of the ROI to centers
                centers.append(
                    (int(((len(arr) - row[6]) + (len(arr) - row[7])) / 2),
                     int((row[4] + row[5]) / 2), int((row[2] + row[3]) / 2)))
            logging.info(rois, centers)

        h = 0
        # loop through every chunk
        for i in range(0, len(arr), 32):
            for j in range(0, len(arr[0]), 32):
                for k in range(0, len(arr[0][0]), 32):
                    found_positive = False

                    # loop through the available ROIs and centers
                    for roi, center in zip(rois, centers):

                        # if the center lies within this chunk
                        if i <= center[0] <= i + 32 \
                                and j <= center[1] <= j + 32 \
                                and k <= center[2] <= k + 32:
                            # save the ROI and skip this block
                            chunk = arr[roi[0]:roi[0] + 32, roi[1]:roi[1] + 32,
                                        roi[2]:roi[2] + 32]
                            cloud.save_chunks_to_cloud(np.asarray(chunk),
                                                       'normal', 'positive',
                                                       file_id + str(h))
                            h += 1
                            found_positive = True

                    if found_positive:
                        continue

                    # copy the chunk
                    chunk = arr[i:(i + 32), j:(j + 32), k:(k + 32)]
                    # calculate the airspace
                    airspace = np.where(chunk < -300)
                    # if it's less than 90% airspace
                    if (airspace[0].size / chunk.size) < 0.9:
                        # save the label as 0 and save it to the cloud
                        cloud.save_chunks_to_cloud(np.asarray(chunk), 'normal',
                                                   'negative',
                                                   file_id + str(h))

                    h += 1
Esempio n. 11
0
def create_labels(annotations_df: pd.DataFrame):
    """
    Process and save labels for the chunks based off of previously-derived
    annotations. Very similar to create_chunks in methodology

    :param annotations_df: annotations to get labels from
    :return:
    """
    client = cloud.authenticate()
    bucket = client.get_bucket('elvos')
    label_dict = {}

    # loop through every array on GCS
    for in_blob in bucket.list_blobs(prefix='airflow/npy'):
        # blacklist
        if in_blob.name == 'airflow/npy/LAUIHISOEZIM5ILF.npy':
            continue

        # get the file id
        file_id = in_blob.name.split('/')[2]
        file_id = file_id.split('.')[0]

        logging.info(f'labeling {file_id}')

        # copy ROI if there's a positive match in the ROI annotations
        roi_df = annotations_df[annotations_df['patient_id'].str.match(
            file_id)]
        # if it's empty, this brain is ELVO negative
        if roi_df.empty:
            elvo_positive = False
        else:
            elvo_positive = True

        arr = cloud.download_array(in_blob)
        rois = []
        centers = []

        # if it's elvo positive
        if elvo_positive:
            # go through each occlusion this patient has
            for row in roi_df.itertuples():
                """
                row[0] = index
                row[1] = patient ID
                row[2] = red1
                row[3] = red2
                row[4] = green1
                row[5] = green2
                row[6] = blue1
                row[7] = blue2
                """
                # append ROI to rois
                rois.append((int(len(arr) - row[7]), int(row[4]), int(row[2])))
                # append center to centers
                centers.append(
                    (int(((len(arr) - row[6]) + (len(arr) - row[7])) / 2),
                     int((row[4] + row[5]) / 2), int((row[2] + row[3]) / 2)))

        # else it's elvo negative
        h = 0
        # loop through every chunk
        for i in range(0, len(arr), 32):
            for j in range(0, len(arr[0]), 32):
                for k in range(0, len(arr[0][0]), 32):
                    found_positive = False

                    # loop through the available ROIs and centers
                    for roi, center in zip(rois, centers):

                        # if the center lies within this chunk
                        if i <= center[0] <= i + 32 \
                                and j <= center[1] <= j + 32 \
                                and k <= center[2] <= k + 32:
                            # save the ROI and skip this block
                            label_dict[file_id + str(h)] = 1
                            h += 1
                            found_positive = True

                    if found_positive:
                        continue

                    # copy the chunk
                    chunk = arr[i:(i + 32), j:(j + 32), k:(k + 32)]
                    # calculate the airspace
                    airspace = np.where(chunk < -300)
                    # if it's less than 90% airspace
                    if (airspace[0].size / chunk.size) < 0.9:
                        # save the label as 0 and save it to the cloud
                        label_dict[file_id + str(h)] = 0
                    h += 1

    # convert the labels to a df
    labels_df = pd.DataFrame.from_dict(label_dict,
                                       orient='index',
                                       columns=['label'])
    labels_df.to_csv('annotated_labels.csv')