Beispiel #1
0
def make_crops(bids_folder, metadata_file, out_dir, out_file, new_size):
    """
    Create a new dataset of crops from an existing dataset.

    Given a folder of images, and a csv file containing the info
    about the dataset, this function makes random crops of the images and
    save them to disk
    bids_folder: folder where the images are stored (in BIDS format)
    metadata_file: path to csv file where the info about images is stored.
    out_dir: directory where to save the cropped images
    out_file: file where to store the info about the crops
    resample_size: size to resample the images.
    """
    df_metadata = pd.read_csv(metadata_file)
    layout = bids.layout.BIDSLayout([(bids_folder, 'bids')])

    # For each entry in the metadata file
    rows_list = []
    for subj in df_metadata.itertuples():
        # locate the corresponding MRI scan
        ptid = subj.PTID
        ptid_bids = 'ADNI' + ptid[0:3] + 'S' + ptid[6:]
        # Hardcoded baselines
        try:
            file = layout.get(subject=ptid_bids,
                              extensions='.nii.gz',
                              modality='anat',
                              session='M00',
                              return_type='file')[0]
        except:
            print('Ignoring subject ' + ptid)
        # Actually perform the cropping
        new_crops = slice_generator(file, out_dir, new_size)
        # Iterate over all the new crops
        for crop in new_crops:
            dict = {"path": crop, "PTID": subj.PTID, "DX": subj.DX}
            rows_list.append(dict)
    # Save the new info about the image in df_crop
    df_crop = pd.DataFrame(rows_list)
    df_crop.to_csv(out_file)
Beispiel #2
0
else:
    is_hpc = True

#
# Initial checks
#

# Check that bids directory is not empty(TODO)
project_root = args.in_dir[0]
print(project_root)
layout = bids.layout.BIDSLayout([(project_root, 'bids')],
                                exclude='derivatives/')
assert len(layout.get_subjects()) > 0, "No subjects in directory!"

# Create img list
files = layout.get(extensions='.nii.gz', modality='anat')

n_total_jobs = int(args.num_threads[0])

if not args.histmatch:
    assert args.template_file is None and not args.template_norm, "Unnecessary template if not histmatch"

if args.histmatch:
    assert args.template_file is not None, "Need template for histogram matching"

if args.template_file is not None:
    assert os.path.exists(args.template_file[0]), "Template file not found"

# Get list of input images.
# metadata = pd.read_csv(args.in_metadata[0]).dropna()
# img_list = metadata["MRI_PATH"].values
def main(config_file, out_dir_name):
    """
    Execute Main function for the classifier.

    Trains the model with a given dataset.
    """

    t0 = time.time()
    # Load configuration
    # Load the configuration of a given experiment.
    config = configparser.ConfigParser()
    config.read(config_file)

    # Create output directory to store results
    out_dir = (config["folders"]["EXPERIMENTS"] +
               out_dir_name + os.sep)
    # Create out directory
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    # Load tranining parameters
    batch_size = int(config["model"]["batch_size"])
    epochs = int(config["model"]["epochs"])
    weights_file = config["model"]["weights"]
    mean_file = config["model"]["mean"]
    metadata_file = config["data"]["metadata"]
    bids_folder = config["data"]["bids_folder"]

    # Load BIDS layout
    layout = bids.layout.BIDSLayout([(bids_folder, 'bids')])

    # Divide between test, train and validation
    df_metadata = pd.read_csv(metadata_file)

    # add a new column with the path of each file
    paths = []
    for subj in df_metadata.itertuples():
        # locate the corresponding MRI scan
        ptid = subj.PTID
        ptid_bids = 'ADNI' + ptid[0:3] + 'S' + ptid[6:]
        # Hardcoded baselines
        try:
            file = layout.get(subject=ptid_bids, extensions='.nii.gz',
                              modality='anat', session='M00',
                              return_type='file')[0]
            paths.append(file)
        except:
            print('Ignoring subject ' + ptid)
            paths.append(np.nan)

    # remove subjects with missing entries
    df_metadata['path'] = paths
    df_metadata = df_metadata.dropna()
    print(len(df_metadata))

    # Get list of unique subjects
    subj = df_metadata.PTID.values
    dx = df_metadata.DX.values
    s = list(set(zip(subj, dx)))
    # DEBUGGING: SELECT SMALL AMOUNT OF THINGS
    x, y = zip(*s)

    # Get train/test/val
    # THOSE ARE SUBJECT NAMES
    rd_seed = 1714
    S_train, S_test, DX_train, DX_test = train_test_split(x, y, test_size=.2, random_state=rd_seed)

    # Preprocess labels
    label_dict = dict(zip(["NL", "MCI", "AD"], range(0, 3)))

    # GET CORRESPONDING DX AND PATHS OF SAID SUBJECTS
    X_train = df_metadata[df_metadata["PTID"].isin(S_train)].path.values
    Y_train = df_metadata[df_metadata["PTID"].isin(S_train)].DX.map(label_dict, na_action='ignore').values

    X_test = df_metadata[df_metadata["PTID"].isin(S_test)].path.values
    Y_test = df_metadata[df_metadata["PTID"].isin(S_test)].DX.map(label_dict, na_action='ignore').values

    # Create sequences of train/test (no really need for validation here)
    BrainSeq = BrainSequence(X_train, to_categorical(Y_train), batch_size, norm='hist',
                             norm_param=mean_file, train=True, crop=True)
    BrainSeq_test = BrainSequence(X_test, to_categorical(Y_test), batch_size, norm='hist',
                                  norm_param=mean_file, train=False, crop=True)

    # Load model

    model = HighRes3DNet_base(input_shape=(96, 96, 96, 1), weights=True, summary=True,
                              weights_dir=config['model']['weights'])

    # Extract representations and train the simple model
    img_train = extractRepresentation(model, BrainSeq)
    img_test = extractRepresentation(model, BrainSeq_test)

    ad_svm = SVC()

    ad_lr = LogisticRegression()
    is_hpc = True
#
# Initial checks
#
os.environ["ANTSPATH"] = "/homedtic/gmarti/LIB/ANTsbin/bin"
os.environ["ANTSSCRIPTS"] = "/homedtic/gmarti/LIB/ANTs/Scripts"


# Check that bids directory is not empty(TODO)
project_root = args.in_dir[0]
layout = bids.layout.BIDSLayout([(project_root, 'bids')], exclude='derivatives/')

assert len(layout.get_subjects()) > 0, "No subjects in directory!"

# Create img list
files = layout.get(extensions='.nii.gz', modality='anat', session='M00')

# Keep only the baselines and the files from the subject_file
df_subjects = pd.read_csv(args.subject_file[0])
files_true = [x for x in layout.get_subjects() if str(x[4:7] + "_S_" + x[8:12]) in df_subjects.PTID.values]
print(len(files))
print(len(files_true))

# create output directory
out_dir = args.output_path[0]
if not os.path.exists(out_dir):
    os.makedirs(out_dir)

# Main loop
#
wait_jobs = [os.path.join(os.environ['ANTSSCRIPTS'], "waitForSlurmJobs.pl"), '0', '10']
Beispiel #5
0
def train(config_file, out_dir_name):
    """
    Execute Main function for training.

    Trains the model with a given dataset.
    """
    t0 = time.time()
    # Load configuration
    # Load the configuration of a given experiment.
    config = configparser.ConfigParser()
    config.read(config_file)

    # Create output directory to store results
    out_dir = (config["folders"]["EXPERIMENTS"] +
               out_dir_name + os.sep)
    # Create out directory
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    # Load tranining parameters
    batch_size = int(config["model"]["batch_size"])
    epochs = int(config["model"]["epochs"])
    weights_file = config["model"]["weights"]
    mean_file = config["model"]["mean"]
    metadata_file = config["data"]["metadata"]
    bids_folder = config["data"]["bids_folder"]

    # Load BIDS layout
    layout = bids.layout.BIDSLayout([(bids_folder, 'bids')])

    # Divide between test, train and validation
    df_metadata = pd.read_csv(metadata_file)

    # add a new column with the path of each file
    paths = []
    for subj in df_metadata.itertuples():
        # locate the corresponding MRI scan
        ptid = subj.PTID
        ptid_bids = 'ADNI' + ptid[0:3] + 'S' + ptid[6:]
        # Hardcoded baselines
        try:
            file = layout.get(subject=ptid_bids, extensions='.nii.gz',
                              modality='anat', session='M00',
                              return_type='file')[0]
            paths.append(file)
        except:
            print('Ignoring subject ' + ptid)
            paths.append(np.nan)

    # remove subjects with missing entries
    df_metadata['path'] = paths
    df_metadata = df_metadata.dropna()
    print(len(df_metadata))

    # Get list of unique subjects
    subj = df_metadata.PTID.values
    dx = df_metadata.DX.values
    s = list(set(zip(subj, dx)))
    # DEBUGGING: SELECT SMALL AMOUNT OF THINGS
    x, y = zip(*s)

    # Get train/test/val
    # THOSE ARE SUBJECT NAMES
    rd_seed = 1714
    S_train, S_test, DX_train, DX_test = train_test_split(x, y, test_size=.2, random_state=rd_seed)
    S_train, S_val, DX_train, DX_val = train_test_split(S_train, DX_train, test_size=.2, random_state=rd_seed)

    # Preprocess labels
    label_dict = dict(zip(["NL", "MCI", "AD"], range(0, 3)))

    # GET CORRESPONDING DX AND PATHS OF SAID SUBJECTS
    X_train = df_metadata[df_metadata["PTID"].isin(S_train)].path.values
    Y_train = df_metadata[df_metadata["PTID"].isin(S_train)].DX.map(label_dict, na_action='ignore').values

    X_valid = df_metadata[df_metadata["PTID"].isin(S_val)].path.values
    Y_valid = df_metadata[df_metadata["PTID"].isin(S_val)].DX.map(label_dict, na_action='ignore').values

    X_test = df_metadata[df_metadata["PTID"].isin(S_test)].path.values
    Y_test = df_metadata[df_metadata["PTID"].isin(S_test)].DX.map(label_dict, na_action='ignore').values

    # Test: create list of images
    # X_train_img = [load_img(x) for x in X_train]
    # X_train_val = [load_img(x) for x in X_valid]
    # Create generator File
    BrainSeq = BrainSequence(X_train, to_categorical(Y_train), batch_size, norm='hist',
                             norm_param=mean_file, train=True, crop=True)
    BrainSeq_val = BrainSequence(X_valid, to_categorical(Y_valid), batch_size, norm='hist',
                                 norm_param=mean_file, train=False, crop=True)

    # Initialize model
    model = HighRes3DNet_cs(input_shape=(96, 96, 96, 1), weights=True, summary=True,
                              weights_dir=config['model']['weights'])
    # sgd = SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True)
    adam = Adam(lr=0.001, amsgrad=False)
    model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])
    # Train
    callb = TensorBoard(log_dir=out_dir + 'logs/', histogram_freq=0, batch_size=batch_size,
                        write_graph=True, write_grads=False, write_images=False,
                        embeddings_freq=0, embeddings_layer_names=None,
                        embeddings_metadata=None, embeddings_data=None)

    model.fit_generator(BrainSeq,
                        steps_per_epoch=None,
                        epochs=epochs,
                        shuffle=True,
                        callbacks=[callb],
                        verbose=1,
                        validation_data=BrainSeq_val)

    # TODO: Validate the model with a custom predictive function
    BrainSeq_test = BrainSequence(X_test, to_categorical(Y_test), batch_size, norm='hist',
                                  norm_param=mean_file, train=False, crop=True)

    # evaluate
    score = model.evaluate_generator(BrainSeq_test)
    print(score)

    print('Proces finished.')
    t1 = time.time()
    print('Time to compute the script: ', t1 - t0)
out_dir = args.in_dir[0] + 'derivatives/' + args.out_name[0]
if not os.path.exists(out_dir):
    os.makedirs(out_dir)

# Main loop
#

antsregistration_path = os.path.join(os.environ['ANTSPATH'],
                                     'antsRegistration')
wait_jobs = [
    os.path.join(os.environ['ANTSSCRIPTS'], "waitForSlurmJobs.pl"), '0', '10'
]

for s in subjects:
    # Get baseline subject
    files = layout.get(subject=s, session='M00', extensions='.nii.gz')
    try:
        baseline = files[0]
    except:
        print('Error in ' + s + ', no baseline.')
        continue
    baseline_path = baseline.filename
    # Get list of all subjets
    files = layout.get(subject=s, extensions='.nii.gz')
    for img in files:
        if img.session == baseline.session:
            # If baseline is the same, remove it
            continue

        img_path = img.filename
        img_file = os.path.basename(img_path)
Beispiel #7
0
# # Check platform

if platform == 'darwin':
    is_hpc = False
else:
    is_hpc = True

# Check that bids directory is not empty(TODO)
project_root = args.input_dir[0]
print(project_root)
layout = bids.layout.BIDSLayout([(project_root, 'bids')])
assert len(layout.get_subjects()) > 0, "No subjects in directory!"

# Create img list
files = layout.get(extensions=args.input_suffix[0], modality='anat')

# create output directory
# output directory is of the form:
out_dir = args.out_dir[0]
if not os.path.exists(out_dir):
    os.makedirs(out_dir)

wait_jobs = [
    os.path.join(os.environ['ANTSSCRIPTS'], "waitForSGEQJobs.pl"), '0', '30'
]

for img in files:
    img_path = img.filename
    img_file = os.path.basename(img_path)
    img_name = img_file.split(args.input_suffix[0])[0]
        MRI_BIDS.append("No")
        continue
    f = glob.glob(ADNI_DIR + subj + "/*/*/*/*I" + imageid + ".nii")
    # if found, add information to columns
    if f:
        MRI_ADNI.append("Yes")
    # If not, add missing data
    else:
        MRI_ADNI.append("No")

    # Test for BIDS
    # Get session name
    session = ''
    if row.VISCODE == 'bl':
        session = 'M00'
    else:
        session = 'M' + row.VISCODE[1:]
    patient_id = 'ADNI' + subj[0:3] + 'S' + subj[6:]
    imgs = layout.get(subject=patient_id, modality='anat', session=session)
    # If exists
    if imgs:
        MRI_BIDS.append("Yes")
    else:
        MRI_BIDS.append("No")

df_metadata.loc[:, "MRI_ADNI"] = MRI_ADNI
df_metadata.loc[:, "PET_ADNI"] = PET_ADNI
df_metadata.loc[:, "MRI_BIDS"] = MRI_BIDS
df_metadata.loc[:, "PET_BIDS"] = PET_BIDS
df_metadata.to_csv("summary_files.csv")
Beispiel #9
0
def main(config_file, out_dir_name):
    """
    Execute Main function for training.

    Trains the model with a given dataset.
    """
    t0 = time.time()
    rd_seed = 1714
    np.random.seed(rd_seed)
    # Load configuration
    # Load the configuration of a given experiment.
    config = configparser.ConfigParser()
    config.read(config_file)

    # Create output directory to store results
    out_dir = (config["folders"]["EXPERIMENTS"] + out_dir_name + os.sep)
    # Create out directory
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    # Load tranining parameters
    batch_size = int(config["model"]["batch_size"])
    epochs = int(config["model"]["epochs"])
    metadata_file = config["data"]["metadata"]
    bids_folder = config["data"]["bids_folder"]

    # Load BIDS layout
    layout = bids.layout.BIDSLayout([(bids_folder, 'bids')])

    ## Load data (THIS NEED TO BE CHANGED)
    # ALL THE DATA LOADING MUST BE CHANGED
    # Divide between test, train and validation
    df_metadata = pd.read_csv(metadata_file)

    # add a new column with the path of each file
    paths = []
    for subj in df_metadata.itertuples():
        # locate the corresponding MRI scan
        ptid = subj.PTID

        # If it is not NL or AD, ignore
        if subj.DX not in ['NL', 'AD']:
            paths.append(np.nan)
            continue
        ptid_bids = 'ADNI' + ptid[0:3] + 'S' + ptid[6:]
        # Hardcoded baselines
        try:
            file = layout.get(subject=ptid_bids,
                              extensions='.nii.gz',
                              modality='anat',
                              session='M00',
                              return_type='file')[0]
            paths.append(file)
        except:
            print('Ignoring subject ' + ptid)
            paths.append(np.nan)

    # remove subjects with missing entries
    df_metadata['path'] = paths
    df_metadata = df_metadata.dropna()
    print(len(df_metadata))

    # Get list of unique subjects
    subj = df_metadata.PTID.values
    dx = df_metadata.DX.values
    s = list(set(zip(subj, dx)))
    x, y = zip(*s)

    # Get train/test/val
    # THOSE ARE SUBJECT NAMES
    S_train, S_test, DX_train, DX_test = train_test_split(x,
                                                          y,
                                                          test_size=.1,
                                                          random_state=rd_seed)

    # Preprocess labels
    label_dict = dict(zip(["NL", "AD"], range(0, 2)))

    # GET CORRESPONDING DX AND PATHS OF SAID SUBJECTS
    X_train = df_metadata[df_metadata["PTID"].isin(S_train)].path.values
    Y_train = df_metadata[df_metadata["PTID"].isin(S_train)].DX.map(
        label_dict, na_action='ignore').values

    X_test = df_metadata[df_metadata["PTID"].isin(S_test)].path.values
    Y_test = df_metadata[df_metadata["PTID"].isin(S_test)].DX.map(
        label_dict, na_action='ignore').values

    # Create sequences of train/test (no really need for validation here)
    BrainSeq = BrainSequence(X_train,
                             to_categorical(Y_train),
                             batch_size,
                             norm='none',
                             train=True,
                             crop=False,
                             new_size=(193, 229, 193))
    BrainSeq_val = BrainSequence(X_test,
                                 to_categorical(Y_test),
                                 batch_size,
                                 norm='none',
                                 train=False,
                                 crop=False,
                                 new_size=(193, 229, 193))

    # Load data (THIS NEEDS TO BE CHANGED)

    # Create model
    model = CNN3D(input_shape=(193, 229, 193, 1))

    opt = Adam(lr=0.0001)
    # Compile model

    model.compile(optimizer=opt,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    # Create callbacks

    # Model checkpoint to save the training results
    checkpointer = ModelCheckpoint(filepath=out_dir + "model_trained.h5",
                                   verbose=0,
                                   save_best_only=True,
                                   save_weights_only=True)

    # CSVLogger to save the training results in a csv file
    csv_logger = CSVLogger(out_dir + 'csv_log.csv', separator=';')

    # Callback to reduce learning rate
    def lr_scheduler(epoch, lr):
        if epoch == 15:
            return lr
        elif epoch == 25:
            return lr * .1
        elif epoch == 35:
            return lr * .1
        else:
            return lr

    lrs = LearningRateScheduler(lr_scheduler)

    # Callback to terminate on NaN loss (so terminate on error)
    NanLoss = TerminateOnNaN()

    callbacks = [checkpointer, csv_logger, NanLoss, lrs]

    # Train model
    model.fit_generator(BrainSeq,
                        steps_per_epoch=None,
                        epochs=epochs,
                        shuffle=True,
                        callbacks=callbacks,
                        verbose=1,
                        validation_data=BrainSeq_val)

    # Model is saved due to callbacks

    print('The end.')
    t1 = time.time()
    print('Time to compute the script: ', t1 - t0)