def make_crops(bids_folder, metadata_file, out_dir, out_file, new_size): """ Create a new dataset of crops from an existing dataset. Given a folder of images, and a csv file containing the info about the dataset, this function makes random crops of the images and save them to disk bids_folder: folder where the images are stored (in BIDS format) metadata_file: path to csv file where the info about images is stored. out_dir: directory where to save the cropped images out_file: file where to store the info about the crops resample_size: size to resample the images. """ df_metadata = pd.read_csv(metadata_file) layout = bids.layout.BIDSLayout([(bids_folder, 'bids')]) # For each entry in the metadata file rows_list = [] for subj in df_metadata.itertuples(): # locate the corresponding MRI scan ptid = subj.PTID ptid_bids = 'ADNI' + ptid[0:3] + 'S' + ptid[6:] # Hardcoded baselines try: file = layout.get(subject=ptid_bids, extensions='.nii.gz', modality='anat', session='M00', return_type='file')[0] except: print('Ignoring subject ' + ptid) # Actually perform the cropping new_crops = slice_generator(file, out_dir, new_size) # Iterate over all the new crops for crop in new_crops: dict = {"path": crop, "PTID": subj.PTID, "DX": subj.DX} rows_list.append(dict) # Save the new info about the image in df_crop df_crop = pd.DataFrame(rows_list) df_crop.to_csv(out_file)
else: is_hpc = True # # Initial checks # # Check that bids directory is not empty(TODO) project_root = args.in_dir[0] print(project_root) layout = bids.layout.BIDSLayout([(project_root, 'bids')], exclude='derivatives/') assert len(layout.get_subjects()) > 0, "No subjects in directory!" # Create img list files = layout.get(extensions='.nii.gz', modality='anat') n_total_jobs = int(args.num_threads[0]) if not args.histmatch: assert args.template_file is None and not args.template_norm, "Unnecessary template if not histmatch" if args.histmatch: assert args.template_file is not None, "Need template for histogram matching" if args.template_file is not None: assert os.path.exists(args.template_file[0]), "Template file not found" # Get list of input images. # metadata = pd.read_csv(args.in_metadata[0]).dropna() # img_list = metadata["MRI_PATH"].values
def main(config_file, out_dir_name): """ Execute Main function for the classifier. Trains the model with a given dataset. """ t0 = time.time() # Load configuration # Load the configuration of a given experiment. config = configparser.ConfigParser() config.read(config_file) # Create output directory to store results out_dir = (config["folders"]["EXPERIMENTS"] + out_dir_name + os.sep) # Create out directory if not os.path.exists(out_dir): os.makedirs(out_dir) # Load tranining parameters batch_size = int(config["model"]["batch_size"]) epochs = int(config["model"]["epochs"]) weights_file = config["model"]["weights"] mean_file = config["model"]["mean"] metadata_file = config["data"]["metadata"] bids_folder = config["data"]["bids_folder"] # Load BIDS layout layout = bids.layout.BIDSLayout([(bids_folder, 'bids')]) # Divide between test, train and validation df_metadata = pd.read_csv(metadata_file) # add a new column with the path of each file paths = [] for subj in df_metadata.itertuples(): # locate the corresponding MRI scan ptid = subj.PTID ptid_bids = 'ADNI' + ptid[0:3] + 'S' + ptid[6:] # Hardcoded baselines try: file = layout.get(subject=ptid_bids, extensions='.nii.gz', modality='anat', session='M00', return_type='file')[0] paths.append(file) except: print('Ignoring subject ' + ptid) paths.append(np.nan) # remove subjects with missing entries df_metadata['path'] = paths df_metadata = df_metadata.dropna() print(len(df_metadata)) # Get list of unique subjects subj = df_metadata.PTID.values dx = df_metadata.DX.values s = list(set(zip(subj, dx))) # DEBUGGING: SELECT SMALL AMOUNT OF THINGS x, y = zip(*s) # Get train/test/val # THOSE ARE SUBJECT NAMES rd_seed = 1714 S_train, S_test, DX_train, DX_test = train_test_split(x, y, test_size=.2, random_state=rd_seed) # Preprocess labels label_dict = dict(zip(["NL", "MCI", "AD"], range(0, 3))) # GET CORRESPONDING DX AND PATHS OF SAID SUBJECTS X_train = df_metadata[df_metadata["PTID"].isin(S_train)].path.values Y_train = df_metadata[df_metadata["PTID"].isin(S_train)].DX.map(label_dict, na_action='ignore').values X_test = df_metadata[df_metadata["PTID"].isin(S_test)].path.values Y_test = df_metadata[df_metadata["PTID"].isin(S_test)].DX.map(label_dict, na_action='ignore').values # Create sequences of train/test (no really need for validation here) BrainSeq = BrainSequence(X_train, to_categorical(Y_train), batch_size, norm='hist', norm_param=mean_file, train=True, crop=True) BrainSeq_test = BrainSequence(X_test, to_categorical(Y_test), batch_size, norm='hist', norm_param=mean_file, train=False, crop=True) # Load model model = HighRes3DNet_base(input_shape=(96, 96, 96, 1), weights=True, summary=True, weights_dir=config['model']['weights']) # Extract representations and train the simple model img_train = extractRepresentation(model, BrainSeq) img_test = extractRepresentation(model, BrainSeq_test) ad_svm = SVC() ad_lr = LogisticRegression()
is_hpc = True # # Initial checks # os.environ["ANTSPATH"] = "/homedtic/gmarti/LIB/ANTsbin/bin" os.environ["ANTSSCRIPTS"] = "/homedtic/gmarti/LIB/ANTs/Scripts" # Check that bids directory is not empty(TODO) project_root = args.in_dir[0] layout = bids.layout.BIDSLayout([(project_root, 'bids')], exclude='derivatives/') assert len(layout.get_subjects()) > 0, "No subjects in directory!" # Create img list files = layout.get(extensions='.nii.gz', modality='anat', session='M00') # Keep only the baselines and the files from the subject_file df_subjects = pd.read_csv(args.subject_file[0]) files_true = [x for x in layout.get_subjects() if str(x[4:7] + "_S_" + x[8:12]) in df_subjects.PTID.values] print(len(files)) print(len(files_true)) # create output directory out_dir = args.output_path[0] if not os.path.exists(out_dir): os.makedirs(out_dir) # Main loop # wait_jobs = [os.path.join(os.environ['ANTSSCRIPTS'], "waitForSlurmJobs.pl"), '0', '10']
def train(config_file, out_dir_name): """ Execute Main function for training. Trains the model with a given dataset. """ t0 = time.time() # Load configuration # Load the configuration of a given experiment. config = configparser.ConfigParser() config.read(config_file) # Create output directory to store results out_dir = (config["folders"]["EXPERIMENTS"] + out_dir_name + os.sep) # Create out directory if not os.path.exists(out_dir): os.makedirs(out_dir) # Load tranining parameters batch_size = int(config["model"]["batch_size"]) epochs = int(config["model"]["epochs"]) weights_file = config["model"]["weights"] mean_file = config["model"]["mean"] metadata_file = config["data"]["metadata"] bids_folder = config["data"]["bids_folder"] # Load BIDS layout layout = bids.layout.BIDSLayout([(bids_folder, 'bids')]) # Divide between test, train and validation df_metadata = pd.read_csv(metadata_file) # add a new column with the path of each file paths = [] for subj in df_metadata.itertuples(): # locate the corresponding MRI scan ptid = subj.PTID ptid_bids = 'ADNI' + ptid[0:3] + 'S' + ptid[6:] # Hardcoded baselines try: file = layout.get(subject=ptid_bids, extensions='.nii.gz', modality='anat', session='M00', return_type='file')[0] paths.append(file) except: print('Ignoring subject ' + ptid) paths.append(np.nan) # remove subjects with missing entries df_metadata['path'] = paths df_metadata = df_metadata.dropna() print(len(df_metadata)) # Get list of unique subjects subj = df_metadata.PTID.values dx = df_metadata.DX.values s = list(set(zip(subj, dx))) # DEBUGGING: SELECT SMALL AMOUNT OF THINGS x, y = zip(*s) # Get train/test/val # THOSE ARE SUBJECT NAMES rd_seed = 1714 S_train, S_test, DX_train, DX_test = train_test_split(x, y, test_size=.2, random_state=rd_seed) S_train, S_val, DX_train, DX_val = train_test_split(S_train, DX_train, test_size=.2, random_state=rd_seed) # Preprocess labels label_dict = dict(zip(["NL", "MCI", "AD"], range(0, 3))) # GET CORRESPONDING DX AND PATHS OF SAID SUBJECTS X_train = df_metadata[df_metadata["PTID"].isin(S_train)].path.values Y_train = df_metadata[df_metadata["PTID"].isin(S_train)].DX.map(label_dict, na_action='ignore').values X_valid = df_metadata[df_metadata["PTID"].isin(S_val)].path.values Y_valid = df_metadata[df_metadata["PTID"].isin(S_val)].DX.map(label_dict, na_action='ignore').values X_test = df_metadata[df_metadata["PTID"].isin(S_test)].path.values Y_test = df_metadata[df_metadata["PTID"].isin(S_test)].DX.map(label_dict, na_action='ignore').values # Test: create list of images # X_train_img = [load_img(x) for x in X_train] # X_train_val = [load_img(x) for x in X_valid] # Create generator File BrainSeq = BrainSequence(X_train, to_categorical(Y_train), batch_size, norm='hist', norm_param=mean_file, train=True, crop=True) BrainSeq_val = BrainSequence(X_valid, to_categorical(Y_valid), batch_size, norm='hist', norm_param=mean_file, train=False, crop=True) # Initialize model model = HighRes3DNet_cs(input_shape=(96, 96, 96, 1), weights=True, summary=True, weights_dir=config['model']['weights']) # sgd = SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True) adam = Adam(lr=0.001, amsgrad=False) model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy']) # Train callb = TensorBoard(log_dir=out_dir + 'logs/', histogram_freq=0, batch_size=batch_size, write_graph=True, write_grads=False, write_images=False, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None, embeddings_data=None) model.fit_generator(BrainSeq, steps_per_epoch=None, epochs=epochs, shuffle=True, callbacks=[callb], verbose=1, validation_data=BrainSeq_val) # TODO: Validate the model with a custom predictive function BrainSeq_test = BrainSequence(X_test, to_categorical(Y_test), batch_size, norm='hist', norm_param=mean_file, train=False, crop=True) # evaluate score = model.evaluate_generator(BrainSeq_test) print(score) print('Proces finished.') t1 = time.time() print('Time to compute the script: ', t1 - t0)
out_dir = args.in_dir[0] + 'derivatives/' + args.out_name[0] if not os.path.exists(out_dir): os.makedirs(out_dir) # Main loop # antsregistration_path = os.path.join(os.environ['ANTSPATH'], 'antsRegistration') wait_jobs = [ os.path.join(os.environ['ANTSSCRIPTS'], "waitForSlurmJobs.pl"), '0', '10' ] for s in subjects: # Get baseline subject files = layout.get(subject=s, session='M00', extensions='.nii.gz') try: baseline = files[0] except: print('Error in ' + s + ', no baseline.') continue baseline_path = baseline.filename # Get list of all subjets files = layout.get(subject=s, extensions='.nii.gz') for img in files: if img.session == baseline.session: # If baseline is the same, remove it continue img_path = img.filename img_file = os.path.basename(img_path)
# # Check platform if platform == 'darwin': is_hpc = False else: is_hpc = True # Check that bids directory is not empty(TODO) project_root = args.input_dir[0] print(project_root) layout = bids.layout.BIDSLayout([(project_root, 'bids')]) assert len(layout.get_subjects()) > 0, "No subjects in directory!" # Create img list files = layout.get(extensions=args.input_suffix[0], modality='anat') # create output directory # output directory is of the form: out_dir = args.out_dir[0] if not os.path.exists(out_dir): os.makedirs(out_dir) wait_jobs = [ os.path.join(os.environ['ANTSSCRIPTS'], "waitForSGEQJobs.pl"), '0', '30' ] for img in files: img_path = img.filename img_file = os.path.basename(img_path) img_name = img_file.split(args.input_suffix[0])[0]
MRI_BIDS.append("No") continue f = glob.glob(ADNI_DIR + subj + "/*/*/*/*I" + imageid + ".nii") # if found, add information to columns if f: MRI_ADNI.append("Yes") # If not, add missing data else: MRI_ADNI.append("No") # Test for BIDS # Get session name session = '' if row.VISCODE == 'bl': session = 'M00' else: session = 'M' + row.VISCODE[1:] patient_id = 'ADNI' + subj[0:3] + 'S' + subj[6:] imgs = layout.get(subject=patient_id, modality='anat', session=session) # If exists if imgs: MRI_BIDS.append("Yes") else: MRI_BIDS.append("No") df_metadata.loc[:, "MRI_ADNI"] = MRI_ADNI df_metadata.loc[:, "PET_ADNI"] = PET_ADNI df_metadata.loc[:, "MRI_BIDS"] = MRI_BIDS df_metadata.loc[:, "PET_BIDS"] = PET_BIDS df_metadata.to_csv("summary_files.csv")
def main(config_file, out_dir_name): """ Execute Main function for training. Trains the model with a given dataset. """ t0 = time.time() rd_seed = 1714 np.random.seed(rd_seed) # Load configuration # Load the configuration of a given experiment. config = configparser.ConfigParser() config.read(config_file) # Create output directory to store results out_dir = (config["folders"]["EXPERIMENTS"] + out_dir_name + os.sep) # Create out directory if not os.path.exists(out_dir): os.makedirs(out_dir) # Load tranining parameters batch_size = int(config["model"]["batch_size"]) epochs = int(config["model"]["epochs"]) metadata_file = config["data"]["metadata"] bids_folder = config["data"]["bids_folder"] # Load BIDS layout layout = bids.layout.BIDSLayout([(bids_folder, 'bids')]) ## Load data (THIS NEED TO BE CHANGED) # ALL THE DATA LOADING MUST BE CHANGED # Divide between test, train and validation df_metadata = pd.read_csv(metadata_file) # add a new column with the path of each file paths = [] for subj in df_metadata.itertuples(): # locate the corresponding MRI scan ptid = subj.PTID # If it is not NL or AD, ignore if subj.DX not in ['NL', 'AD']: paths.append(np.nan) continue ptid_bids = 'ADNI' + ptid[0:3] + 'S' + ptid[6:] # Hardcoded baselines try: file = layout.get(subject=ptid_bids, extensions='.nii.gz', modality='anat', session='M00', return_type='file')[0] paths.append(file) except: print('Ignoring subject ' + ptid) paths.append(np.nan) # remove subjects with missing entries df_metadata['path'] = paths df_metadata = df_metadata.dropna() print(len(df_metadata)) # Get list of unique subjects subj = df_metadata.PTID.values dx = df_metadata.DX.values s = list(set(zip(subj, dx))) x, y = zip(*s) # Get train/test/val # THOSE ARE SUBJECT NAMES S_train, S_test, DX_train, DX_test = train_test_split(x, y, test_size=.1, random_state=rd_seed) # Preprocess labels label_dict = dict(zip(["NL", "AD"], range(0, 2))) # GET CORRESPONDING DX AND PATHS OF SAID SUBJECTS X_train = df_metadata[df_metadata["PTID"].isin(S_train)].path.values Y_train = df_metadata[df_metadata["PTID"].isin(S_train)].DX.map( label_dict, na_action='ignore').values X_test = df_metadata[df_metadata["PTID"].isin(S_test)].path.values Y_test = df_metadata[df_metadata["PTID"].isin(S_test)].DX.map( label_dict, na_action='ignore').values # Create sequences of train/test (no really need for validation here) BrainSeq = BrainSequence(X_train, to_categorical(Y_train), batch_size, norm='none', train=True, crop=False, new_size=(193, 229, 193)) BrainSeq_val = BrainSequence(X_test, to_categorical(Y_test), batch_size, norm='none', train=False, crop=False, new_size=(193, 229, 193)) # Load data (THIS NEEDS TO BE CHANGED) # Create model model = CNN3D(input_shape=(193, 229, 193, 1)) opt = Adam(lr=0.0001) # Compile model model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy']) # Create callbacks # Model checkpoint to save the training results checkpointer = ModelCheckpoint(filepath=out_dir + "model_trained.h5", verbose=0, save_best_only=True, save_weights_only=True) # CSVLogger to save the training results in a csv file csv_logger = CSVLogger(out_dir + 'csv_log.csv', separator=';') # Callback to reduce learning rate def lr_scheduler(epoch, lr): if epoch == 15: return lr elif epoch == 25: return lr * .1 elif epoch == 35: return lr * .1 else: return lr lrs = LearningRateScheduler(lr_scheduler) # Callback to terminate on NaN loss (so terminate on error) NanLoss = TerminateOnNaN() callbacks = [checkpointer, csv_logger, NanLoss, lrs] # Train model model.fit_generator(BrainSeq, steps_per_epoch=None, epochs=epochs, shuffle=True, callbacks=callbacks, verbose=1, validation_data=BrainSeq_val) # Model is saved due to callbacks print('The end.') t1 = time.time() print('Time to compute the script: ', t1 - t0)