Exemple #1
0
def create_chunks():
    """
    Creates gets unaugmented positive chunks and saves them to positive_no_aug.

    :return:
    """
    client = cloud.authenticate()
    bucket = client.get_bucket('elvos')

    # loop through every positive array on GCS -- no need to loop through
    #   negatives, as those are fine in their current state
    for in_blob in bucket.list_blobs(prefix='chunk_data/normal/positive'):

        # get the file id
        file_id = in_blob.name.split('/')[3]
        file_id = file_id.split('.')[0]

        logging.info(f'getting {file_id}')

        # copy region if it's the original image, not a rotation/reflection
        if file_id.endswith('_1'):
            arr = cloud.download_array(in_blob)
            logging.info(f'downloading {file_id}')
            cloud.save_chunks_to_cloud(arr, 'normal', 'positive_no_aug',
                                       file_id)
Exemple #2
0
def inspect_rois(annotations_df):
    """
    Sanity-check function to make sure that the ROIs we're getting actually
    contain occlusions in them.

    :param annotations_df: annotations
    :return:
    """
    client = cloud.authenticate()
    bucket = client.get_bucket('elvos')

    # loop through every array on GCS
    for in_blob in bucket.list_blobs(prefix='airflow/npy'):
        # if in_blob.name != 'airflow/npy/ZZX0ZNWG6Q9I18GK.npy':
        #     continue
        # blacklist
        if in_blob.name == 'airflow/npy/LAUIHISOEZIM5ILF.npy':
            continue

        # get the file id
        file_id = in_blob.name.split('/')[2]
        file_id = file_id.split('.')[0]

        logging.info(f'chunking {file_id}')
        # copy ROI if there's a positive match in the ROI annotations
        roi_df = annotations_df[annotations_df['patient_id'].str.match(
            file_id)]
        # if it's empty, this brain is ELVO negative
        if roi_df.empty:
            elvo_positive = False
        else:
            elvo_positive = True

        arr = cloud.download_array(in_blob)

        # if it's elvo positive
        if elvo_positive:
            chunks = []

            # get ROI location
            blue = int(len(arr) - roi_df['blue2'].iloc[0])
            green = int(roi_df['green1'].iloc[0])
            red = int(roi_df['red1'].iloc[0])
            chunks.append(arr[blue:blue + 32, green:green + 50, red:red + 50])
            chunks.append(arr[blue:blue + 32, red:red + 50, green:green + 50])

            # Loop through all relevant chunks and show the axial, coronal,
            #   and sagittal views to make sure there's an occlusion
            for chunk in chunks:
                axial = np.max(chunk, axis=0)
                coronal = np.max(chunk, axis=1)
                sag = np.max(chunk, axis=2)
                fig, ax = plt.subplots(1, 3, figsize=(6, 4))
                ax[0].imshow(axial, interpolation='none')
                ax[1].imshow(coronal, interpolation='none')
                ax[2].imshow(sag, interpolation='none')
                plt.show()
Exemple #3
0
def normal_mip():
    configure_logger()
    client = cloud.authenticate()
    bucket = client.get_bucket('elvos')

    # iterate through every source directory...
    for location in WHENCE:
        prefix = location + '/'
        logging.info(f"MIPing images from {prefix}")

        # get every blob
        for in_blob in bucket.list_blobs(prefix=prefix):
            # blacklist
            if in_blob.name == prefix + 'LAUIHISOEZIM5ILF.npy':
                continue

            file_id = in_blob.name.split('/')[2]
            file_id = file_id.split('.')[0]

            # perform the normal MIPing procedure
            logging.info(f'downloading {in_blob.name}')
            input_arr = cloud.download_array(in_blob)
            logging.info(f"blob shape: {input_arr.shape}")

            # if it's a failure analysis scan, do the failure analysis MIP
            if file_id in FAILURE_ANALYSIS:
                if location == 'numpy/axial':
                    cropped_arr = transforms.crop_normal_axial_fa(input_arr,
                                                                  location)
            # otherwise just do a normal MIP
            else:
                if location == 'numpy/axial':
                    cropped_arr = transforms.crop_normal_axial(input_arr,
                                                               location)
                else:
                    cropped_arr = transforms.crop_normal_coronal(input_arr,
                                                                 location)

            # remove extremes
            not_extreme_arr = transforms.remove_extremes(cropped_arr)
            logging.info(f'removed array extremes')

            # MIP array
            mip_arr = transforms.mip_normal(not_extreme_arr)

            # OPTIONAL: visualize MIP
            # plt.figure(figsize=(6, 6))
            # plt.imshow(mip_arr, interpolation='none')
            # plt.show()

            # save to cloud
            cloud.save_npy_to_cloud(mip_arr, file_id, location, 'normal')
def multichannel_mip():
    configure_logger()
    client = cloud.authenticate()
    bucket = client.get_bucket('elvos')

    # iterate through every source directory...
    for location in WHENCE:
        prefix = location + '/'
        logging.info(f"MIPing images from {prefix}")

        for in_blob in bucket.list_blobs(prefix=prefix):
            # blacklist
            if in_blob.name == prefix + 'LAUIHISOEZIM5ILF.npy':
                continue

            file_id = in_blob.name.split('/')[2]
            file_id = file_id.split('.')[0]

            # perform the normal MIPing procedure
            logging.info(f'downloading {in_blob.name}')
            input_arr = cloud.download_array(in_blob)
            logging.info(f"blob shape: {input_arr.shape}")
            if file_id in FAILURE_ANALYSIS:
                if location == 'numpy/axial':
                    cropped_arr = \
                        transforms.crop_multichannel_axial_fa(input_arr,
                                                              location)
            else:
                if location == 'numpy/axial':
                    cropped_arr = transforms.crop_multichannel_axial(
                        input_arr, location)
                else:
                    cropped_arr = transforms.crop_multichannel_coronal(
                        input_arr)
            not_extreme_arr = transforms.segment_vessels(cropped_arr)
            logging.info(f'removed array extremes')
            mip_arr = transforms.mip_multichannel(not_extreme_arr)
            # plt.figure(figsize=(6, 6))
            # plt.imshow(mip_arr[1], interpolation='none')
            # plt.show()

            # save to the numpy generator source directory
            cloud.save_segmented_npy_to_cloud(mip_arr, file_id, location,
                                              'multichannel')
def axial_to_coronal_and_sagittal():
    configure_logger()
    client = cloud.authenticate()
    bucket = client.get_bucket('elvos')

    # for every axial scan
    for in_blob in bucket.list_blobs(prefix='numpy/axial'):

        # blacklist
        if in_blob.name == 'numpy/LAUIHISOEZIM5ILF.npy':
            continue
        elif in_blob.name == 'numpy/ALOUY4SF3BQKXQCZ.npy':
            continue
        elif in_blob.name == 'numpy/ABPO2BORDNF3OVL3.npy':
            continue

        # download, then transpose, then flip it to orient it correctly
        logging.info(f'downloading {in_blob.name}')
        axial = cloud.download_array(in_blob)
        coronal = np.transpose(axial, (1, 0, 2))
        coronal = np.fliplr(coronal)
        sagittal = np.transpose(axial, (2, 0, 1))
        sagittal = np.fliplr(sagittal)

        file_id = in_blob.name.split('/')[1]
        file_id = file_id.split('.')[0]

        try:
            # save files to GCS
            coronal_io = file_io.FileIO(
                f'gs://elvos/numpy/coronal/'
                f'{file_id}.npy', 'w')
            np.save(coronal_io, coronal)
            sagittal_io = file_io.FileIO(
                f'gs://elvos/numpy/sagittal/'
                f'{file_id}.npy', 'w')
            np.save(sagittal_io, sagittal)
            coronal_io.close()
            sagittal_io.close()

        except Exception as e:
            logging.error(f'for patient ID: {file_id} {e}')
            break
        logging.info(f'saved .npy file to cloud')
def transform_positives():
    """
    Script that actually transforms and upsamples all the positives.

    :return:
    """
    client = cloud.authenticate()
    bucket = client.get_bucket('elvos')
    prefix = "chunk_data/filtered/positive"
    logging.info(f"transforming positive chunks from {prefix}")

    # for each blob in chunk_data/filtered/positive
    for in_blob in bucket.list_blobs(prefix=prefix):
        file_id = in_blob.name.split('/')[3]
        file_id = file_id.split('.')[0]

        # download chunk
        logging.info(f'downloading {in_blob.name}')
        input_arr = cloud.download_array(in_blob)
        logging.info(f"blob shape: {input_arr.shape}")

        # upsample chunk
        transform_one(input_arr, file_id)
train_chunks = []
train_labels = []
val_chunks = []
val_labels = []
test_chunks = []
test_labels = []

# get positive train chunks and labels
i = 1
for id_, label in list(positive_train_label_data.items()):
    if i % 500 == 0:
        logging.info(f'got chunk {i}')
    i += 1
    blob = bucket.get_blob('chunk_data/normal/positive/' + id_ + '.npy')
    arr = cloud_management.download_array(blob)
    if arr.shape == (32, 32, 32):
        arr = np.expand_dims(arr, axis=-1)
        train_chunks.append(arr)
        train_labels.append(label)
logging.info(f'{i} total positive training chunks')

# get negative train chunks and labels
i = 1
for id_, label in list(negative_train_label_data.items()):
    if i % 500 == 0:
        logging.info(f'got chunk {i}')
    i += 1
    blob = bucket.get_blob('chunk_data/normal/negative/' + id_ + '.npy')
    arr = cloud_management.download_array(blob)
    if arr.shape == (32, 32, 32):
Exemple #8
0
def main():
    configure_logger()
    # Access Google Cloud Storage
    gcs_client = storage.Client.from_service_account_json(
        '/home/harold_triedman/elvo-analysis/credentials/client_secret.json'
        # 'credentials/client_secret.json'
    )
    bucket = gcs_client.get_bucket('elvos')

    # Get label data from Google Cloud Storage
    blob = storage.Blob('augmented_annotated_labels.csv', bucket)
    blob.download_to_filename('tmp/augmented_annotated_labels.csv')
    prelim_label_data = {}

    # load labels from augmented_annotated_labels.csv
    with open('tmp/augmented_annotated_labels.csv', 'r') as pos_file:
        reader = csv.reader(pos_file, delimiter=',')
        for row in reader:
            if row[1] != 'Unnamed: 0.1':
                prelim_label_data[row[1]] = int(row[2])
                # prelim_label_data[row[2]] = int(row[3])

    # Get all of the positives from the label data
    positive_label_data = {}
    logging.info('getting 12168 positive labels')
    for id_, label in list(prelim_label_data.items()):
        if label == 1 and '_' in id_:
            positive_label_data[id_] = label

    positive_train_label_data = {}
    positive_val_label_data = {}
    train = {}
    val = {}

    # Loop through positives
    for i, id_ in enumerate(list(positive_label_data.keys())):
        if i % 24 == 0:

            # Split into train/val sets based off of random flips
            seed = random.randint(1, 100)
            stripped_id = id_[:-1]
            meta_id = id_[:16]

            # add ID to positive train data and train metadata
            if seed > 10:
                positive_train_label_data[id_] = 1
                train[meta_id] = ''
                for j in range(2, 25):
                    positive_train_label_data[stripped_id + str(j)] = 1

            # add ID to positive val data and val metadata
            else:
                positive_val_label_data[id_] = 1
                val[meta_id] = ''
                for j in range(2, 25):
                    positive_val_label_data[stripped_id + str(j)] = 1

    # Get 14500 random negatives from the label data to feed into our generator
    negative_counter = 0
    negative_train_label_data = {}
    negative_val_label_data = {}
    logging.info("getting 14500 random negative labels")
    while negative_counter < 14500:

        # Get random chunk
        id_, label = random.choice(list(prelim_label_data.items()))

        # if it's a negative
        if label == 0:
            if negative_counter % 500 == 0:
                logging.info(f'gotten {negative_counter} labels so far')

            meta_id = id_[:16]
            # if another chunk in this brain is in train metadata dict
            if meta_id in train:
                negative_train_label_data[id_] = label

            # else if another chunk in this brain is in val metadata dict
            elif meta_id in val:
                negative_val_label_data[id_] = label

            # otherwise flip a coin to see where it's going to end up
            else:
                seed = random.randint(1, 100)
                if seed > 10:
                    negative_train_label_data[id_] = label
                    train[meta_id] = ''
                else:
                    negative_val_label_data[id_] = label
                    val[meta_id] = ''

            # delete it from prelim_label_data to ensure no re-picks
            del prelim_label_data[id_]
            negative_counter += 1

    # save train/val metadata
    train_df = pd.DataFrame.from_dict(train, orient='index')
    val_df = pd.DataFrame.from_dict(val, orient='index')
    train_df.to_csv('train_ids.csv')
    val_df.to_csv('val_ids.csv')

    train_chunks = []
    train_labels = []
    val_chunks = []
    val_labels = []

    # Get positive train chunks
    i = 1
    for id_, label in list(positive_train_label_data.items()):
        if i % 500 == 0:
            logging.info(f'got chunk {i}')
        i += 1
        blob = bucket.get_blob('chunk_data/normal/positive/' + id_ + '.npy')
        arr = cloud_management.download_array(blob)
        if arr.shape == (32, 32, 32):
            arr = np.expand_dims(arr, axis=-1)
            train_chunks.append(arr)
            train_labels.append(label)
    logging.info(f'{i} total positive training chunks')

    # Get positive val chunks
    i = 1
    for id_, label in list(positive_val_label_data.items()):
        if i % 500 == 0:
            logging.info(f'got chunk {i}')
        i += 1
        blob = bucket.get_blob('chunk_data/normal/positive/' + id_ + '.npy')
        arr = cloud_management.download_array(blob)
        if arr.shape == (32, 32, 32):
            arr = np.expand_dims(arr, axis=-1)
            val_chunks.append(arr)
            val_labels.append(label)
    logging.info(f'{i} total positive validation chunks')

    # Get negative train chunks
    i = 1
    for id_, label in list(negative_train_label_data.items()):
        if i % 500 == 0:
            logging.info(f'got chunk {i}')
        i += 1
        blob = bucket.get_blob('chunk_data/normal/negative/' + id_ + '.npy')
        arr = cloud_management.download_array(blob)
        if arr.shape == (32, 32, 32):
            arr = np.expand_dims(arr, axis=-1)
            train_chunks.append(arr)
            train_labels.append(label)
    logging.info(f'{i} total negative chunks')

    # Get negative val chunks
    i = 1
    for id_, label in list(negative_val_label_data.items()):
        if i % 500 == 0:
            logging.info(f'got chunk {i}')
        i += 1
        blob = bucket.get_blob('chunk_data/normal/negative/' + id_ + '.npy')
        arr = cloud_management.download_array(blob)
        if arr.shape == (32, 32, 32):
            arr = np.expand_dims(arr, axis=-1)
            val_chunks.append(arr)
            val_labels.append(label)
    logging.info(f'{i} total negative chunks')

    # shuffle order of training data
    tmp = list(zip(train_chunks, train_labels))
    random.shuffle(tmp)
    train_chunks, train_labels = zip(*tmp)

    # shuffle order of validation data
    tmp = list(zip(val_chunks, val_labels))
    random.shuffle(tmp)
    val_chunks, val_labels = zip(*tmp)

    # Turn into numpy arrays
    logging.info('splitting based on validation split')
    full_x_train = np.asarray(train_chunks)
    full_y_train = np.asarray(train_labels)
    x_val = np.asarray(val_chunks)
    y_val = np.asarray(val_labels)

    logging.info(f'{len(train_chunks)} total chunks to train with')
    logging.info(f'full training data: {full_x_train.shape},'
                 f'{full_y_train.shape}')
    logging.info(f'full validation data: {x_val.shape}, {y_val.shape}')

    full_arr = np.array([full_x_train, full_y_train, x_val, y_val])

    # Save to compressed pickle to maintain ordering
    with open('chunk_data_separated_ids.pkl', 'wb') as outfile:
        pickle.dump(full_arr, outfile, pickle.HIGHEST_PROTOCOL)
Exemple #9
0
def create_chunks(annotations_df: pd.DataFrame):
    """
    Process and save actual chunks based off of the previously derived
    annotations.

    :param annotations_df: annotations with where the actual occlusion is
    :return:
    """
    client = cloud.authenticate()
    bucket = client.get_bucket('elvos')

    # loop through every array on GCS
    for in_blob in bucket.list_blobs(prefix='airflow/npy'):
        # blacklist
        if in_blob.name == 'airflow/npy/LAUIHISOEZIM5ILF.npy':
            continue

        # get the file id
        file_id = in_blob.name.split('/')[2]
        file_id = file_id.split('.')[0]

        print(f'chunking {file_id}')
        # copy ROI if there's a positive match in the ROI annotations
        roi_df = annotations_df[annotations_df['patient_id'].str.match(
            file_id)]
        # if it's empty, this brain is ELVO negative
        if roi_df.empty:
            elvo_positive = False
        else:
            elvo_positive = True

        arr = cloud.download_array(in_blob)
        rois = []
        centers = []

        # if it's elvo positive
        if elvo_positive:
            # iterate through every occlusion this patient has
            for row in roi_df.itertuples():
                """
                row[0] = index
                row[1] = patient ID
                row[2] = red1
                row[3] = red2
                row[4] = green1
                row[5] = green2
                row[6] = blue1
                row[7] = blue2
                """
                # append the lowest-valued corner of the ROI to rois
                rois.append((int(len(arr) - row[7]), int(row[4]), int(row[2])))

                # append the center of the ROI to centers
                centers.append(
                    (int(((len(arr) - row[6]) + (len(arr) - row[7])) / 2),
                     int((row[4] + row[5]) / 2), int((row[2] + row[3]) / 2)))
            logging.info(rois, centers)

        h = 0
        # loop through every chunk
        for i in range(0, len(arr), 32):
            for j in range(0, len(arr[0]), 32):
                for k in range(0, len(arr[0][0]), 32):
                    found_positive = False

                    # loop through the available ROIs and centers
                    for roi, center in zip(rois, centers):

                        # if the center lies within this chunk
                        if i <= center[0] <= i + 32 \
                                and j <= center[1] <= j + 32 \
                                and k <= center[2] <= k + 32:
                            # save the ROI and skip this block
                            chunk = arr[roi[0]:roi[0] + 32, roi[1]:roi[1] + 32,
                                        roi[2]:roi[2] + 32]
                            cloud.save_chunks_to_cloud(np.asarray(chunk),
                                                       'normal', 'positive',
                                                       file_id + str(h))
                            h += 1
                            found_positive = True

                    if found_positive:
                        continue

                    # copy the chunk
                    chunk = arr[i:(i + 32), j:(j + 32), k:(k + 32)]
                    # calculate the airspace
                    airspace = np.where(chunk < -300)
                    # if it's less than 90% airspace
                    if (airspace[0].size / chunk.size) < 0.9:
                        # save the label as 0 and save it to the cloud
                        cloud.save_chunks_to_cloud(np.asarray(chunk), 'normal',
                                                   'negative',
                                                   file_id + str(h))

                    h += 1
Exemple #10
0
def create_labels(annotations_df: pd.DataFrame):
    """
    Process and save labels for the chunks based off of previously-derived
    annotations. Very similar to create_chunks in methodology

    :param annotations_df: annotations to get labels from
    :return:
    """
    client = cloud.authenticate()
    bucket = client.get_bucket('elvos')
    label_dict = {}

    # loop through every array on GCS
    for in_blob in bucket.list_blobs(prefix='airflow/npy'):
        # blacklist
        if in_blob.name == 'airflow/npy/LAUIHISOEZIM5ILF.npy':
            continue

        # get the file id
        file_id = in_blob.name.split('/')[2]
        file_id = file_id.split('.')[0]

        logging.info(f'labeling {file_id}')

        # copy ROI if there's a positive match in the ROI annotations
        roi_df = annotations_df[annotations_df['patient_id'].str.match(
            file_id)]
        # if it's empty, this brain is ELVO negative
        if roi_df.empty:
            elvo_positive = False
        else:
            elvo_positive = True

        arr = cloud.download_array(in_blob)
        rois = []
        centers = []

        # if it's elvo positive
        if elvo_positive:
            # go through each occlusion this patient has
            for row in roi_df.itertuples():
                """
                row[0] = index
                row[1] = patient ID
                row[2] = red1
                row[3] = red2
                row[4] = green1
                row[5] = green2
                row[6] = blue1
                row[7] = blue2
                """
                # append ROI to rois
                rois.append((int(len(arr) - row[7]), int(row[4]), int(row[2])))
                # append center to centers
                centers.append(
                    (int(((len(arr) - row[6]) + (len(arr) - row[7])) / 2),
                     int((row[4] + row[5]) / 2), int((row[2] + row[3]) / 2)))

        # else it's elvo negative
        h = 0
        # loop through every chunk
        for i in range(0, len(arr), 32):
            for j in range(0, len(arr[0]), 32):
                for k in range(0, len(arr[0][0]), 32):
                    found_positive = False

                    # loop through the available ROIs and centers
                    for roi, center in zip(rois, centers):

                        # if the center lies within this chunk
                        if i <= center[0] <= i + 32 \
                                and j <= center[1] <= j + 32 \
                                and k <= center[2] <= k + 32:
                            # save the ROI and skip this block
                            label_dict[file_id + str(h)] = 1
                            h += 1
                            found_positive = True

                    if found_positive:
                        continue

                    # copy the chunk
                    chunk = arr[i:(i + 32), j:(j + 32), k:(k + 32)]
                    # calculate the airspace
                    airspace = np.where(chunk < -300)
                    # if it's less than 90% airspace
                    if (airspace[0].size / chunk.size) < 0.9:
                        # save the label as 0 and save it to the cloud
                        label_dict[file_id + str(h)] = 0
                    h += 1

    # convert the labels to a df
    labels_df = pd.DataFrame.from_dict(label_dict,
                                       orient='index',
                                       columns=['label'])
    labels_df.to_csv('annotated_labels.csv')