Exemple #1
0
def compute_ROIs(
        generate_csv=False,
        version=0,
        patientTxtPath='/media/shared/datasets/LUNA/CSVFILES/patients_train.txt',
        mode='train'):
    if generate_csv:
        with open('ROIs_v{}_{}.csv'.format(version, mode), 'w') as f:
            f.write('patient,nodules,detected_regions')
    # ## PATIENTS FILE LIST  // not really useful, remove
    filter_annotated = False
    patients_with_annotations = pd.read_csv(
        NODULES_PATH
    )  # filter patients with no annotations to avoid having to read them
    patients_with_annotations = list(
        set(patients_with_annotations['seriesuid']))
    patients_with_annotations = [
        "luna_%s.npz" % p.split('.')[-1] for p in patients_with_annotations
    ]

    filenames = []
    with open(patientTxtPath, 'r') as f:
        for line in f:
            filenames.append(line.strip())
    filenames = [
        os.path.join(INPUT_PATH, fp) for fp in filenames
        if fp in patients_with_annotations or not filter_annotated
    ]
    filenames = filter(os.path.isfile, filenames)

    def __load_and_store(filename):
        patient_data = np.load(filename)['arr_0']
        patient_data = patient_data.astype(np.int16)
        X, y, rois, stats = common.load_patient(patient_data,
                                                discard_empty_nodules=True,
                                                output_rois=True,
                                                debug=True,
                                                include_ground_truth=True,
                                                thickness=1)
        if not stats:
            stats = {'tp': 0, 'fn': 0, 'fp': 0}

        logging.info("Patient: %s, stats: %s" %
                     (filename.split('/')[-1], stats))
        if generate_csv:
            with open('ROIs_v{}_{}.csv'.format(version, mode), 'a') as f:
                f.write('{},{},{}\n'.format(
                    filename.split('/')[-1][:-4], stats['tp'],
                    sum(stats.values())))
        # If the patient is empty
        return X, y, stats

    common.multiproc_crop_generator(
        filenames,
        os.path.join(PATCHES_PATH, 'dl1_v{}_x_{}.npz'.format(version, mode)),
        os.path.join(PATCHES_PATH, 'dl1_v{}_y_{}.npz'.format(version, mode)),
        __load_and_store)
Exemple #2
0
def compute_ROIs(generate_csv=False, version_dl2 = 0, SCORE_TH = 0.5,
                 patientTxtPath = '/media/shared/datasets/LUNA/CSVFILES/patients_train.txt', 
                 mode = 'train', dataset = 'luna'):
    """Loads the output of DL-I and load just the 1's (TP or FN's) and the FP's for a given score (SCORE_TH) to train DL-II"""
    if generate_csv:
        with open('/home/shared/output/ROIs_dl2_v{}_{}.csv'.format(version_dl2, mode), 'w') as f:
            f.write('patient,nodules,detected_regions')
    # ## PATIENTS FILE LIST  // not really useful, remove
    if mode == 'train':
        nodules_df = pd.read_csv(OUTPUT_DL1)
    else:
        nodules_df = pd.read_csv(OUTPUT_DL1_TEST)
    nodules_df = nodules_df[(nodules_df['score'] > SCORE_TH) | (nodules_df['label']==1)]

    nodules_df['nslice'] = nodules_df['nslice'].astype(int)
    logging.info("Shape nodules df: %s" % str(nodules_df.shape))

    patients = [ p + '.npz' for p in set(nodules_df['patientid'])]

    files = []
    with open(patientTxtPath, 'r') as f:
        for line in f:
            files.append(line.strip())

    if dataset == 'isbi':
        patients = ['/media/shared/datasets/ISBI/preprocessedNew/' + p for p in patients] 
        filenames = [fp for fp in files if fp in patients]
    else: 
        filenames = [os.path.join(INPUT_PATH, fp) for fp in files if fp in patients]

    def __load_and_store(filename):
        patient_data = np.load(filename)['arr_0'].astype(np.int16)
        ndf = nodules_df[nodules_df['patientid']==filename.split('/')[-1].split('.')[0]]
        X, y, rois, stats = common.load_patient(patient_data, ndf, output_rois=True, thickness=1)
        if not stats:
            stats = {'tp' : 0, 'fn' : 0, 'fp' : 0}

        logging.info("Patient: %s, stats: %s" % (filename.split('/')[-1], stats))
        if generate_csv:
            with open('/home/shared/output/ROIs_dl2_v{}_{}_{}.csv'.format(version_dl2, mode, dataset), 'a') as f:
                f.write('{},{},{}\n'.format(filename.split('/')[-1][:-4], stats['tp'], sum(stats.values())))

        return X, y, stats
    
    common.multiproc_crop_generator(filenames,
                                    os.path.join(PATCHES_PATH,'dl2_v{}_x_{}_{}.npz'.format(version_dl2, mode, dataset)),
                                    os.path.join(PATCHES_PATH,'dl2_v{}_y_{}_{}.npz'.format(version_dl2, mode, dataset)),
                                    __load_and_store)
Exemple #3
0
filenames_train = [os.path.join(INPUT_PATH, fp) for fp in filenames if fp in patients_with_annotations]
filenames_test = [os.path.join(INPUT_PATH, fp) for fp in filenames_t if fp in patients_with_annotations]


def __load_and_store(filename):
    patient_data = np.load(filename)['arr_0']
    patient_data = patient_data.astype(np.int16)

    X, y, rois, stats = common.load_patient(patient_data, discard_empty_nodules=True, output_rois=True, debug=True, include_ground_truth=True, thickness=1)
    logging.info("Patient: %s, stats: %s" % (filename.split('/')[-1], stats))
    return X, y, stats



common.multiproc_crop_generator(filenames_train,
                                os.path.join(PATCHES_PATH, 'dl1_v2_x_train.npz'),
                                os.path.join(PATCHES_PATH, 'dl1_v2_y_train.npz'),
                                __load_and_store)

common.multiproc_crop_generator(filenames_test,
                                os.path.join(PATCHES_PATH, 'dl1_v2_x_test.npz'),
                                os.path.join(PATCHES_PATH, 'dl1_v2_y_test.npz'),
                                __load_and_store)
=======
### PATCHES GENERATION -----------------------------------------------------------------
#
# ## PATIENTS FILE LIST
# patients_with_annotations = pd.read_csv(NODULES_PATH)  # filter patients with no annotations to avoid having to read them
# patients_with_annotations = list(set(patients_with_annotations['seriesuid']))
# patients_with_annotations = ["luna_%s.npz" % p.split('.')[-1] for p in patients_with_annotations]
#
# filenames = os.listdir(INPUT_PATH)
Exemple #4
0
    patid = filename.split('/')[-1]
    ndf = nodules_df[nodules_df['patientid'] == patid]
    X, y, rois, stats = common.load_patient(patient_data,
                                            ndf,
                                            output_rois=True,
                                            thickness=1)
    label = int(label_df[label_df['id'] == patid]['cancer'])
    y = [label] * len(X)
    logging.info("Patient: %s, cancer:%d, stats: %s" % (patid, label, stats))
    return X, y, stats


common.multiproc_crop_generator(filenames_train,
                                os.path.join(PATCHES_PATH,
                                             'dl3_v1_x_train.npz'),
                                os.path.join(PATCHES_PATH,
                                             'dl3_v1_y_train.npz'),
                                __load_and_store,
                                parallel=True)

common.multiproc_crop_generator(filenames_test,
                                os.path.join(PATCHES_PATH,
                                             'dl3_v1_x_test.npz'),
                                os.path.join(PATCHES_PATH,
                                             'dl3_v1_y_test.npz'),
                                __load_and_store,
                                parallel=True)

### TRAINING -------------------------------------------------------------------------------------------------------

# Data augmentation generator