Python ModelCheckpointParallel Exemples, neuron.callbacks.ModelCheckpointParallel Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : train.py Projet : Untribium/agemorph

def train(csv_path,
          tag,
          gpu_id,
          epochs,
          steps_per_epoch,
          batch_size,
          int_steps,
          vel_resize,
          lr,
          prior_lambda,
          image_sigma,
          enc_nf,
          dec_nf,
          vol_shape,
          loss_weights,
          split_col,
          split_train,
          split_eval):
    
    """
    model training function
    :param gpu_id: integer specifying the gpu to use
    :param lr: learning rate
    :param epochs: number of training iterations
    :param prior_lambda: the prior_lambda, the scalar in front of the smoothing laplacian, in MICCAI paper
    :param image_sigma: the image sigma in MICCAI paper
    :param steps_per_epoch: frequency with which to save models
    :param batch_size: Optional, default of 1. can be larger, depends on GPU memory and volume size
    """

    model_config = locals()
    
    vol_shape = tuple(vol_shape)
    model_config['vol_shape'] = vol_shape 
    print('input vol_shape is {}'.format(vol_shape))
    
    assert os.path.isfile(csv_path), 'csv not found at {}'.format(csv_path)

    model_dir = './runs/'
    model_dir += 'vae_{:%Y%m%d_%H%M}'.format(datetime.now())
    model_dir += '_gpu={}'.format(str(gpu_id))
    model_dir += '_bs={}'.format(batch_size)
    model_dir += '_enc={}'.format(enc_nf)
    model_dir += '_dec={}'.format(dec_nf)
    model_dir += '_lr={}'.format(lr)
    model_dir += '_pl={}'.format(prior_lambda)
    model_dir += '_is={}'.format(image_sigma)
    model_dir += '_vr={}'.format(vel_resize)
    model_dir += '_lw={}'.format(loss_weights)
    model_dir += '_tag={}'.format(tag) if tag != '' else ''
    
    model_dir = model_dir.replace(' ', '')
    model_dir = model_dir.replace(',', '_')

    print('model_dir is {}'.format(model_dir))

    flow_shape = tuple(int(d * vel_resize) for d in vol_shape)

    valid_dir = os.path.join(model_dir, 'eval')

    # prepare model folder
    if not os.path.isdir(model_dir):
        os.mkdir(model_dir)

    if not os.path.isdir(valid_dir):
        os.mkdir(valid_dir)

    # gpu handling
    gpu = '/gpu:%d' % 0 # gpu_id
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu_id
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.allow_soft_placement = True
    set_session(tf.Session(config=config))

    # prepare the model
    with tf.device(gpu):

        model = networks.miccai2018_net(vol_shape, enc_nf, dec_nf)

        save_file_name = os.path.join(model_dir, 'gen_{epoch:03d}.h5')

        # save first iteration
        model.save(save_file_name.format(epoch=0))

        # compile
        loss_class = losses.Miccai2018(image_sigma, prior_lambda, flow_shape=flow_shape)
        
        model_losses = [loss_class.recon_loss, loss_class.kl_loss]
        
    
    # data generator
    nb_gpus = len(gpu_id.split(','))
    assert np.mod(batch_size, nb_gpus) == 0, \
        'batch_size should be a multiple of the nr. of gpus. ' + \
        'Got batch_size %d, %d gpus' % (batch_size, nb_gpus)

    csv = pd.read_csv(csv_path)

    img_keys = ['img_path_0', 'img_path_1']
    lbl_keys = ['delta_t', 'pat_dx_1']

    max_delta = csv['delta_t'].max()

    model_config['max_delta'] = max_delta

    train_csv_gen = datagenerators.csv_gen(csv_path, img_keys=img_keys,
                            lbl_keys=lbl_keys, batch_size=batch_size,
                            sample=True, weights='weight',
                            split=(split_col, split_train))

    valid_csv_gen = datagenerators.csv_gen(csv_path, img_keys=img_keys,
                            lbl_keys=lbl_keys, batch_size=batch_size,
                            sample=True, split=(split_col, split_eval))
    
    board_csv_gen = datagenerators.csv_gen(csv_path, img_keys=img_keys,
                            lbl_keys=lbl_keys, batch_size=batch_size,
                            sample=True, weights='weight',
                            split=(split_col, split_eval))

    train_data = datagenerators.vae_generator(train_csv_gen, flow_shape, max_delta, int_steps)
    valid_data = datagenerators.vae_generator(valid_csv_gen, flow_shape, max_delta, int_steps)
    board_data = datagenerators.vae_generator(board_csv_gen, flow_shape, max_delta, int_steps)
   
    # write model_config
    config_path = os.path.join(model_dir, 'config.pkl')
    pickle.dump(model_config, open(config_path, 'wb'))
 
    # prepare callbacks
    tboard_callback = TensorBoardExt(log_dir=model_dir, valid_data=board_data,
                                                        int_steps=int_steps)

    # fit generator
    with tf.device(gpu):

        # multi-gpu support
        if nb_gpus > 1:
            save_callback = nrn_gen.ModelCheckpointParallel(save_file_name)
            mg_model = multi_gpu_model(model, gpus=nb_gpus)
        
        # single gpu
        else:
            save_callback = ModelCheckpoint(save_file_name)
            mg_model = model

        mg_model.compile(optimizer=Adam(lr=lr), loss=model_losses, loss_weights=loss_weights)

        tboard_callback.set_model(mg_model)

        callbacks = [save_callback, tboard_callback]
        
        mg_model.fit_generator(train_data, 
                               epochs=epochs,
                               callbacks=callbacks,
                               steps_per_epoch=steps_per_epoch,
                               verbose=1,
                               validation_data=valid_data,
                               validation_steps=25)

Exemple #2

0

Afficher le fichier

def train(data_dir,
          atlas_file,
          model_dir,
          gpu_id,
          lr,
          nb_epochs,
          prior_lambda,
          image_sigma,
          steps_per_epoch,
          batch_size,
          load_model_file,
          bidir,
          initial_epoch=0):
    """
    model training function
    :param data_dir: folder with npz files for each subject.
    :param atlas_file: atlas filename. So far we support npz file with a 'vol' variable
    :param model_dir: model folder to save to
    :param gpu_id: integer specifying the gpu to use
    :param lr: learning rate
    :param nb_epochs: number of training iterations
    :param prior_lambda: the prior_lambda, the scalar in front of the smoothing laplacian, in MICCAI paper
    :param image_sigma: the image sigma in MICCAI paper
    :param steps_per_epoch: frequency with which to save models
    :param batch_size: Optional, default of 1. can be larger, depends on GPU memory and volume size
    :param load_model_file: optional h5 model file to initialize with
    :param bidir: logical whether to use bidirectional cost function
    """

    # load atlas from provided files. The atlas we used is 160x192x224.
    atlas_vol = np.load(atlas_file)['vol'][np.newaxis, ..., np.newaxis]
    vol_size = atlas_vol.shape[1:-1]
    # prepare data files
    # for the CVPR and MICCAI papers, we have data arranged in train/validate/test folders
    # inside each folder is a /vols/ and a /asegs/ folder with the volumes
    # and segmentations. All of our papers use npz formated data.
    train_vol_names = glob.glob(os.path.join(data_dir, '*.npz'))
    random.shuffle(train_vol_names)  # shuffle volume list
    assert len(train_vol_names) > 0, "Could not find any training data"

    # Diffeomorphic network architecture used in MICCAI 2018 paper
    nf_enc = [16, 32, 32, 32]
    nf_dec = [32, 32, 32, 32, 16, 3]

    # prepare model folder
    if not os.path.isdir(model_dir):
        os.mkdir(model_dir)

    # gpu handling
    gpu = '/gpu:%d' % 0  # gpu_id
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu_id
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.allow_soft_placement = True
    set_session(tf.Session(config=config))

    # prepare the model
    with tf.device(gpu):
        # the MICCAI201 model takes in [image_1, image_2] and outputs [warped_image_1, velocity_stats]
        # in these experiments, we use image_2 as atlas
        model = networks.miccai2018_net(vol_size, nf_enc, nf_dec, bidir=bidir)

        # load initial weights
        if load_model_file is not None and load_model_file != "":
            model.load_weights(load_model_file)

        # save first iteration
        model.save(os.path.join(model_dir, '%02d.h5' % initial_epoch))

        # compile
        # note: best to supply vol_shape here than to let tf figure it out.
        flow_vol_shape = model.outputs[-1].shape[1:-1]
        loss_class = losses.Miccai2018(image_sigma,
                                       prior_lambda,
                                       flow_vol_shape=flow_vol_shape)
        if bidir:
            model_losses = [
                loss_class.recon_loss, loss_class.recon_loss,
                loss_class.kl_loss
            ]
            loss_weights = [0.5, 0.5, 1]
        else:
            model_losses = [loss_class.recon_loss, loss_class.kl_loss]
            loss_weights = [1, 1]

    # data generator
    nb_gpus = len(gpu_id.split(','))
    assert np.mod(batch_size, nb_gpus) == 0, \
        'batch_size should be a multiple of the nr. of gpus. ' + \
        'Got batch_size %d, %d gpus' % (batch_size, nb_gpus)

    train_example_gen = datagenerators.example_gen(train_vol_names,
                                                   batch_size=batch_size)
    atlas_vol_bs = np.repeat(atlas_vol, batch_size, axis=0)
    miccai2018_gen = datagenerators.miccai2018_gen(train_example_gen,
                                                   atlas_vol_bs,
                                                   batch_size=batch_size,
                                                   bidir=bidir)

    # prepare callbacks
    save_file_name = os.path.join(model_dir, '{epoch:02d}.h5')

    # fit generator
    with tf.device(gpu):

        # multi-gpu support
        if nb_gpus > 1:
            save_callback = nrn_gen.ModelCheckpointParallel(save_file_name)
            mg_model = multi_gpu_model(model, gpus=nb_gpus)

        # single gpu
        else:
            save_callback = ModelCheckpoint(save_file_name)
            mg_model = model

        mg_model.compile(optimizer=Adam(lr=lr),
                         loss=model_losses,
                         loss_weights=loss_weights)
        mg_model.fit_generator(miccai2018_gen,
                               initial_epoch=initial_epoch,
                               epochs=nb_epochs,
                               callbacks=[save_callback],
                               steps_per_epoch=steps_per_epoch,
                               verbose=1)

Exemple #3

0

Afficher le fichier

def train(data_dir,
          model_dir,
          gpu_id,
          lr,
          nb_epochs,
          prior_lambda,
          image_sigma,
          steps_per_epoch,
          batch_size,
          load_model_file,
          bidir,
          initial_epoch=0):
    """
    model training function
    :param data_dir: folder with npz files for each subject.
    :param model_dir: model folder to save to
    :param gpu_id: integer specifying the gpu to use
    :param lr: learning rate
    :param nb_epochs: number of training iterations
    :param prior_lambda: the prior_lambda, the scalar in front of the smoothing laplacian, in MICCAI paper
    :param image_sigma: the image sigma in MICCAI paper
    :param steps_per_epoch: frequency with which to save model
    :param batch_size: Optional, default of 1. can be larger, depends on GPU memory and volume size
    :param load_model_file: optional h5 model file to initialize with
    :param bidir: logical whether to use bidirectional cost function
    """

    vol_size = (160, 192, 224)

    # Fisrt 30 images are used as training set, and the rest 10 images are testing set.
    constrain = list(range(31, 41, 1))
    moving_path = []
    list_dirs = os.walk(data_dir)
    for root, dirs, files in list_dirs:
        for f in files:
            if f.endswith(".hdr") and f.startswith("l"):
                for c in constrain:
                    if "l{}_".format(str(c)) in str(f) or "_l{}.".format(
                            str(c)) in str(f):
                        break
                else:
                    moving_path.append(os.path.join(data_dir, f))

    train_vol_names = moving_path
    random.shuffle(train_vol_names)  # shuffle volume list
    assert len(train_vol_names) > 0, "Could not find any training data"

    nf_enc = [16, 32, 32, 32]
    nf_dec = [32, 32, 32, 32, 16, 3]

    # prepare model folder
    if not os.path.isdir(model_dir):
        os.mkdir(model_dir)

    # gpu handling
    gpu = '/gpu:%d' % 0  # gpu_id
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu_id
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.allow_soft_placement = True
    set_session(tf.Session(config=config))

    # prepare the model
    with tf.device(gpu):
        model = networks_lpba40.miccai2018_net(vol_size,
                                               nf_enc,
                                               nf_dec,
                                               bidir=bidir)

        # load initial weights
        if load_model_file is not None and load_model_file != "":
            print("load file from {}".format(load_model_file))
            model.load_weights(load_model_file)

        # save first iteration
        model.save(os.path.join(model_dir, '%02d.h5' % initial_epoch))

        # compile
        flow_vol_shape = model.outputs[1].shape[1:-1]
        loss_class = losses.Miccai2018(image_sigma,
                                       prior_lambda,
                                       flow_vol_shape=flow_vol_shape)
        if bidir:
            model_losses = [
                loss_class.recon_loss, loss_class.recon_loss,
                loss_class.kl_loss
            ]
            loss_weights = [0.5, 0.5, 1]
        else:
            model_losses = [
                loss_class.recon_loss, loss_class.kl_loss,
                loss_class.kl_loss_1, loss_class.kl_loss_2,
                loss_class.kl_loss_3, loss_class.mse_loss, loss_class.mse_loss,
                loss_class.mse_loss
            ]
            loss_weights = [1, 1, 0.5, 0.5, 0.5, 1, 1, 1]

    # data generator
    nb_gpus = len(gpu_id.split(','))
    assert np.mod(batch_size, nb_gpus) == 0, \
        'batch_size should be a multiple of the nr. of gpus. ' + \
        'Got batch_size %d, %d gpus' % (batch_size, nb_gpus)

    train_example_gen = datagenerators_lpba40.example_gen_lpba40(
        train_vol_names, batch_size=batch_size, image_path=data_dir)
    miccai2018_gen_lpba40 = datagenerators_lpba40.miccai2018_gen_lpba40(
        train_example_gen, vol_size, batch_size=batch_size, bidir=bidir)

    # prepare callbacks
    save_file_name = os.path.join(model_dir, '{epoch:02d}.h5')

    # fit generator
    with tf.device(gpu):

        # multi-gpu support
        if nb_gpus > 1:
            save_callback = nrn_gen.ModelCheckpointParallel(save_file_name)
            mg_model = multi_gpu_model(model, gpus=nb_gpus)

        # single gpu
        else:
            save_callback = ModelCheckpoint(save_file_name)
            mg_model = model

        mg_model.compile(optimizer=Adam(lr=lr),
                         loss=model_losses,
                         loss_weights=loss_weights)
        mg_model.fit_generator(miccai2018_gen_lpba40,
                               initial_epoch=initial_epoch,
                               epochs=nb_epochs,
                               callbacks=[save_callback],
                               steps_per_epoch=steps_per_epoch,
                               verbose=1)

Exemple #4

0

Afficher le fichier

def train(data_dir,
          atlas_file,
          model,
          model_name,
          gpu_id,
          lr,
          nb_epochs,
          reg_param,
          steps_per_epoch,
          batch_size,
          load_model_file,
          data_loss,
          initial_epoch=0):
    """
    model training function
    :param data_dir: folder with npz files for each subject.
    :param atlas_file: atlas filename. So far we support npz file with a 'vol' variable
    :param model: either vm1 or vm2 (based on CVPR 2018 paper)
    :param model_dir: the model directory to save to
    :param gpu_id: integer specifying the gpu to use
    :param lr: learning rate
    :param n_iterations: number of training iterations
    :param reg_param: the smoothness/reconstruction tradeoff parameter (lambda in CVPR paper)
    :param steps_per_epoch: frequency with which to save models
    :param batch_size: Optional, default of 1. can be larger, depends on GPU memory and volume size
    :param load_model_file: optional h5 model file to initialize with
    :param data_loss: data_loss: 'mse' or 'ncc
    """

    # load atlas from provided files. The atlas we used is 160x192x224.
    # atlas_vol = np.load(atlas_file)['vol'][np.newaxis, ..., np.newaxis]
    atlas_vol = nib.load(atlas_file).get_data()[np.newaxis, ..., np.newaxis]
    vol_size = atlas_vol.shape[1:-1]

    # prepare data files
    # for the CVPR and MICCAI papers, we have data arranged in train/validate/test folders
    # inside each folder is a /vols/ and a /asegs/ folder with the volumes
    # and segmentations. All of our papers use npz formated data.
    train_vol_names = glob.glob(os.path.join(data_dir, '*.npz'))
    random.shuffle(train_vol_names)  # shuffle volume list
    assert len(train_vol_names) > 0, "Could not find any training data"

    # UNET filters for voxelmorph-1 and voxelmorph-2,
    # these are architectures presented in CVPR 2018
    nf_enc = [16, 32, 32, 32]
    if model == 'vm1':
        nf_dec = [32, 32, 32, 32, 8, 8]
    elif model == 'vm2':
        nf_dec = [32, 32, 32, 32, 32, 16, 16]
    else:  # 'vm2double':
        nf_enc = [f * 2 for f in nf_enc]
        nf_dec = [f * 2 for f in [32, 32, 32, 32, 32, 16, 16]]

    assert data_loss in [
        'mse', 'cc', 'ncc'
    ], 'Loss should be one of mse or cc, found %s' % data_loss
    if data_loss in ['ncc', 'cc']:
        data_loss = losses.NCC().loss

    model_dir = "../models/" + model_name
    # prepare model folder
    if not os.path.isdir(model_dir):
        os.mkdir(model_dir)

    # GPU handling
    gpu = '/gpu:%d' % gpu_id
    os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.allow_soft_placement = True
    set_session(tf.Session(config=config))

    # prepare the model
    with tf.device(gpu):
        # prepare the model
        # in the CVPR layout, the model takes in [image_1, image_2] and outputs [warped_image_1, flow]
        # in the experiments, we use image_2 as atlas
        model = networks.cvpr2018_net(vol_size, nf_enc, nf_dec)

        # load initial weights
        if load_model_file is not None and load_model_file != '':
            print('loading', load_model_file)
            model.load_weights(load_model_file)

        # save first iteration
        model.save(os.path.join(model_dir, '%02d.h5' % initial_epoch))

    # data generator
    # nb_gpus = len(gpu_id.split(','))
    # assert np.mod(batch_size, nb_gpus) == 0, \
    #     'batch_size should be a multiple of the nr. of gpus. ' + \
    #     'Got batch_size %d, %d gpus' % (batch_size, nb_gpus)
    nb_gpus = 1

    train_example_gen = datagenerators.example_gen(train_vol_names,
                                                   batch_size=batch_size)
    atlas_vol_bs = np.repeat(atlas_vol, batch_size, axis=0)
    cvpr2018_gen = datagenerators.cvpr2018_gen(train_example_gen,
                                               atlas_vol_bs,
                                               batch_size=batch_size)

    # prepare callbacks
    save_file_name = os.path.join(model_dir, '{epoch:02d}.h5')

    # fit generator
    with tf.device(gpu):

        # multi-gpu support
        if nb_gpus > 1:
            save_callback = nrn_gen.ModelCheckpointParallel(save_file_name)
            mg_model = multi_gpu_model(model, gpus=nb_gpus)

        # single-gpu
        else:
            save_callback = ModelCheckpoint(save_file_name)
            mg_model = model

        # compile
        mg_model.compile(optimizer=Adam(lr=lr),
                         loss=[data_loss, losses.Grad('l2').loss],
                         loss_weights=[1.0, reg_param])

        # fit
        mg_model.fit_generator(cvpr2018_gen,
                               initial_epoch=initial_epoch,
                               epochs=nb_epochs,
                               callbacks=[save_callback],
                               steps_per_epoch=steps_per_epoch,
                               verbose=1)

Exemple #5

0

Afficher le fichier

Fichier : train.py Projet : XimenaDai/ACTA-Reg-Net

def train(model_dir,
          gpu_id,
          lr,
          nb_epochs,
          prior_lambda,
          image_sigma,
          steps_per_epoch,
          batch_size,
          load_model_file,
          bidir,
          initial_epoch=0):
    """
    model training function
    :param data_dir: folder with npz files for each subject.
    :param model_dir: model folder to save to
    :param gpu_id: integer specifying the gpu to use
    :param lr: learning rate
    :param nb_epochs: number of training iterations
    :param prior_lambda: the prior_lambda, the scalar in front of the smoothing laplacian, in MICCAI paper
    :param image_sigma: the image sigma in MICCAI paper
    :param steps_per_epoch: frequency with which to save models
    :param batch_size: Optional, default of 1. can be larger, depends on GPU memory and volume size
    :param load_model_file: optional h5 model file to initialize with
    :param bidir: logical whether to use bidirectional cost function
    """

    # Diffeomorphic network architecture used in VoxelMorph MICCAI 2018 paper
    nf_enc = [16, 32, 32, 32]
    nf_dec = [32, 32, 32, 32, 16, 3]

    # prepare model folder
    if not os.path.isdir(model_dir):
        os.mkdir(model_dir)

    # gpu handling
    gpu = '/gpu:%d' % 0  # gpu_id
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu_id
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.allow_soft_placement = True
    set_session(tf.Session(config=config))

    # prepare the model
    with tf.device(gpu):
        model = networks.Reg_Net(vol_size, nf_enc, nf_dec, bidir=bidir)
        flow = model.get_layer('flow-int').output
        [src, tgt] = model.inputs
        [y, flow_parameters] = model.outputs
        model = Model(inputs=[src, tgt], outputs=[y, flow_parameters, flow])
        # load initial weights
        if load_model_file is not None and load_model_file != "":
            model.load_weights(load_model_file)

        # save first iteration
        model.save(os.path.join(model_dir, '%02d.h5' % initial_epoch))

        # compile
        # note: best to supply vol_shape here than to let tf figure it out.
        flow_vol_shape = model.outputs[-2].shape[1:-1]
        loss_class = losses.Miccai2018(image_sigma,
                                       prior_lambda,
                                       flow_vol_shape=flow_vol_shape)
        if bidir:
            model_losses = [
                loss_class.recon_loss, loss_class.recon_loss,
                loss_class.kl_loss
            ]
            loss_weights = [0.5, 0.5, 1]
        else:
            model_losses = [
                loss_class.recon_loss, loss_class.kl_loss,
                losses_user.Sparse_Loss(
                    tissue=src,
                    loss_weights=1,
                ).loss
            ]
            loss_weights = [1, 1, 1]

    # data generator
    nb_gpus = len(gpu_id.split(','))
    assert np.mod(batch_size, nb_gpus) == 0, \
        'batch_size should be a multiple of the nr. of gpus. ' + \
        'Got batch_size %d, %d gpus' % (batch_size, nb_gpus)

    data_gen = datagenerators.data_generator_vertices(train_vol_names,
                                                      vol_size)

    # prepare callbacks
    save_file_name = os.path.join(model_dir, '{epoch:02d}.h5')
    log_dir = "./logs/" + model_dir[9:] + datetime.datetime.now().strftime(
        "%Y%m%d-%H%M%S")
    tensorboard_callback = keras.callbacks.TensorBoard(log_dir=log_dir,
                                                       histogram_freq=0)
    # fit generator
    with tf.device(gpu):

        # multi-gpu support
        if nb_gpus > 1:
            save_callback = nrn_gen.ModelCheckpointParallel(save_file_name)
            mg_model = multi_gpu_model(model, gpus=nb_gpus)

        # single gpu
        else:
            save_callback = ModelCheckpoint(save_file_name)
            mg_model = model

        mg_model.compile(optimizer=Adam(lr=lr),
                         loss=model_losses,
                         loss_weights=loss_weights)
        mg_model.fit_generator(data_gen,
                               initial_epoch=initial_epoch,
                               epochs=nb_epochs,
                               callbacks=[save_callback, tensorboard_callback],
                               steps_per_epoch=steps_per_epoch,
                               verbose=1)

Exemple #6

0

Afficher le fichier

def train(
        data_dir,
        val_data_dir,
        atlas_file,
        val_atlas_file,
        model,
        model_dir,
        gpu_id,
        lr,
        nb_epochs,
        reg_param,
        gama_param,
        steps_per_epoch,
        batch_size,
        load_model_file,
        data_loss,
        seg_dir=None,  # one file
        val_seg_dir=None,
        Sf_file=None,  # one file
        val_Sf_file=None,
        auxi_label=None,
        initial_epoch=0):
    """
    model training function
    :param data_dir: folder with npz files for each subject.
    :param atlas_file: atlas filename. So far we support npz file with a 'vol' variable
    :param model: either vm1 or vm2 (based on CVPR 2018 paper)
    :param model_dir: the model directory to save to
    :param gpu_id: integer specifying the gpu to use
    :param lr: learning rate
    :param n_iterations: number of training iterations
    :param reg_param: the smoothness/reconstruction tradeoff parameter (lambda in CVPR paper)
    :param steps_per_epoch: frequency with which to save models
    :param batch_size: Optional, default of 1. can be larger, depends on GPU memory and volume size
    :param load_model_file: optional h5 model file to initialize with
    :param data_loss: 'mse' or 'ncc
    :param auxi_label: whether to use auxiliary informmation during the training
    """

    # load atlas from provided files. The atlas we used is 160x192x224.
    # atlas_file = 'D:/voxel/data/t064.tif'
    atlas = Image.open(atlas_file)  # is a TiffImageFile _size is (628, 690)
    atlas_vol = np.array(atlas)[
        np.newaxis, ..., np.newaxis]  # is a ndarray, shape is (1, 690, 628, 1)
    # new = Image.fromarray(X) new.size is (628, 690)
    vol_size = atlas_vol.shape[1:-1]  # (690, 628)
    print(vol_size)

    val_atlas = Image.open(
        val_atlas_file)  # is a TiffImageFile _size is (628, 690)
    val_atlas_vol = np.array(val_atlas)[
        np.newaxis, ..., np.newaxis]  # is a ndarray, shape is (1, 690, 628, 1)
    # new = Image.fromarray(X) new.size is (628, 690)
    val_vol_size = val_atlas_vol.shape[1:-1]  # (690, 628)
    print(val_vol_size)

    Sm = Image.open(seg_dir)  # is a TiffImageFile _size is (628, 690)
    Sm_ = np.array(Sm)[np.newaxis, ..., np.newaxis]

    val_Sm = Image.open(val_seg_dir)  # is a TiffImageFile _size is (628, 690)
    val_Sm_ = np.array(val_Sm)[np.newaxis, ..., np.newaxis]

    # prepare data files
    # for the CVPR and MICCAI papers, we have data arranged in train/validate/test folders
    # inside each folder is a /vols/ and a /asegs/ folder with the volumes
    # and segmentations. All of our papers use npz formated data.
    # data_dir = D:/voxel/data/01
    train_vol_names = data_dir  # glob.glob(os.path.join(data_dir, '*.tif'))   # is a list contain file path(name)
    # random.shuffle(train_vol_names)  # shuffle volume list    tif
    assert len(train_vol_names) > 0, "Could not find any training data"

    val_vol_names = val_data_dir  # glob.glob(os.path.join(data_dir, '*.tif'))   # is a list contain file path(name)
    # random.shuffle(train_vol_names)  # shuffle volume list    tif
    assert len(val_vol_names) > 0, "Could not find any training data"

    # UNET filters for voxelmorph-1 and voxelmorph-2,
    # these are architectures presented in CVPR 2018
    nf_enc = [16, 32, 32, 32]
    if model == 'vm1':
        nf_dec = [32, 32, 32, 32, 8, 8]
    elif model == 'vm2':
        nf_dec = [32, 32, 32, 32, 32, 16, 16]
    else:  # 'vm2double':
        nf_enc = [f * 2 for f in nf_enc]
        nf_dec = [f * 2 for f in [32, 32, 32, 32, 32, 16, 16]]

    assert data_loss in [
        'mse', 'cc', 'ncc'
    ], 'Loss should be one of mse or cc, found %s' % data_loss
    if data_loss in ['ncc', 'cc']:
        data_loss = losses.NCC().loss

    if Sf_file is not None:
        Sf = Image.open(Sf_file)
        Sf_ = np.array(Sf)[np.newaxis, ..., np.newaxis]

    if val_Sf_file is not None:
        val_Sf = Image.open(val_Sf_file)
        val_Sf_ = np.array(val_Sf)[np.newaxis, ..., np.newaxis]

        # prepare model folder
    if not os.path.isdir(model_dir):
        os.mkdir(model_dir)

    # GPU handling
    gpu = '/gpu:%d' % 0  # gpu_id
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu_id
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.allow_soft_placement = True
    set_session(tf.Session(config=config))
    #gpu = gpu_id

    # data generator
    nb_gpus = len(gpu_id.split(','))  # 1
    assert np.mod(batch_size, nb_gpus) == 0, \
        'batch_size should be a multiple of the nr. of gpus. ' + \
        'Got batch_size %d, %d gpus' % (batch_size, nb_gpus)

    train_example_gen = datagenerators.example_gen(
        train_vol_names,
        batch_size=batch_size)  # it is a list contain a ndarray
    atlas_vol_bs = np.repeat(
        atlas_vol, batch_size,
        axis=0)  # is a ndarray, if batch_size is 2, shape is (2, 690, 628, 1)
    cvpr2018_gen = datagenerators.cvpr2018_gen(train_example_gen,
                                               atlas_vol_bs,
                                               batch_size=batch_size)

    val_example_gen = datagenerators.example_gen(
        val_vol_names, batch_size=batch_size)  # it is a list contain a ndarray
    val_atlas_vol_bs = np.repeat(
        val_atlas_vol, batch_size,
        axis=0)  # is a ndarray, if batch_size is 2, shape is (2, 690, 628, 1)
    val_cvpr2018_gen = datagenerators.cvpr2018_gen(val_example_gen,
                                                   val_atlas_vol_bs,
                                                   batch_size=batch_size)

    # prepare the model
    with tf.device(gpu):
        sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
        # prepare the model
        # in the CVPR layout, the model takes in [image_1, image_2] and outputs [warped_image_1, flow]
        # in the experiments, we use image_2 as atlas

        model = networks.cvpr2018_net(vol_size, nf_enc, nf_dec)

        # load initial weights
        if load_model_file is not None:
            print('loading', load_model_file)
            model.load_weights(load_model_file)

        # save first iteration
        model.save(os.path.join(model_dir, '%02d.h5' % initial_epoch))

        # if auxi_label is not None:
        #     print('yes')
        #     loss_model= [data_loss, losses.Grad('l2').loss, losses.Lseg()._lseg(Sf_) ]    ##########################
        #     loss_weight= [1.0, reg_param, gama_param]
        # else:
        loss_model = [
            data_loss,
            losses.Grad(gama_param, Sf_, Sm_, penalty='l2').loss
        ]  # real gama: reg_param*gama_param
        loss_weight = [1.0, reg_param]

        # reg_param_tensor = tf.constant(5, dtype=tf.float32)
        metrics_2 = losses.Grad(gama_param,
                                val_Sf_,
                                val_Sm_,
                                penalty='l2',
                                flag_vali=True).loss  # reg_param

    # prepare callbacks
    save_file_name = os.path.join(model_dir, '{epoch:02d}.h5')

    # fit generator
    with tf.device(gpu):

        # multi-gpu support
        if nb_gpus > 1:
            save_callback = nrn_gen.ModelCheckpointParallel(save_file_name)
            mg_model = multi_gpu_model(model, gpus=nb_gpus)

        # single-gpu
        else:
            save_callback = ModelCheckpoint(save_file_name)
            mg_model = model

        # compile
        mg_model.compile(optimizer=Adam(lr=lr),
                         loss=loss_model,
                         loss_weights=loss_weight,
                         metrics={'flow': metrics_2})

        # fit
        history = mg_model.fit_generator(cvpr2018_gen,
                                         initial_epoch=initial_epoch,
                                         epochs=nb_epochs,
                                         callbacks=[save_callback],
                                         steps_per_epoch=steps_per_epoch,
                                         validation_data=val_cvpr2018_gen,
                                         validation_steps=1,
                                         verbose=2)

        # plot

        print('model', mg_model.metrics_names)
        print('keys()', history.history.keys())

        # print(metrics.name)

        plt.plot(history.history['loss'])
        # plt.plot(history.history['val_spatial_transformer_1_loss'])
        plt.title('cvpr_auxi_loss')
        plt.ylabel('loss')
        plt.xlabel('Epoch')
        plt.legend(['Train', 'Validation'])
        plt.show()

Exemple #7

0

Afficher le fichier

Fichier : train_unsupervised_segmentation.py Projet : YangNe/voxelmorph-1

def train_unsupervised_segmentation(data_dir,
                                    atlas_file,
                                    mapping_file,
                                    model,
                                    model_dir,
                                    gpu_id,
                                    lr,
                                    nb_epochs,
                                    init_stats,
                                    reg_param,
                                    steps_per_epoch,
                                    batch_size,
                                    stat_post_warp,
                                    warp_method,
                                    load_model_file,
                                    initial_epoch=0):
    """
    model training function
    :param data_dir: folder with npz files (coregistered, intensity normalized)
    :param atlas_file: file with probabilistic atlas (coregistered to images)
    :param mapping_file: file with mapping from labels to tissue types
    :param model: registration (voxelmorph) model: vm1, vm2, or vm2double
    :param model_dir: the model directory to save to
    :param gpu_id: integer specifying the gpu to use
    :param lr: learning rate
    :param nb_epochs: number of epochs
    :param init_stats: file with guesses for means and log-variances (vectors init_mu, init_sigma)
    :param reg_param: smoothness/reconstruction tradeoff parameter (lambda in the paper)
    :param steps_per_epoch: frequency with which to save models
    :param batch_size: default of 1. can be larger, depends on GPU memory and volume size
    :param stat_post_warp: set to 1  to use warped atlas to estimate Gaussian parameters
    :param warp_method: set to 'WARP' if you want to warp the atlas
    :param load_model_file: optional h5 model file to initialize with
    :param initial_epoch: initial epoch
    """

    # load reference soft edge and corresponding mask from provided files
    # (we used size 160x192x224).
    # Also: group labels in tissue types, if necessary
    if mapping_file is None:
        atlas_vol = np.load(atlas_file)['vol_data'][np.newaxis, ...]
        nb_labels = atlas_vol.shape[-1]

    else:
        atlas_full = np.load(atlas_file)['vol_data'][np.newaxis, ...]

        mapping = np.load(mapping_file)['mapping'].astype('int').flatten()
        assert len(mapping) == atlas_full.shape[-1], \
            'mapping shape %d is inconsistent with atlas shape %d' % (len(mapping), atlas_full.shape[-1])

        nb_labels = 1 + np.max(mapping)
        atlas_vol = np.zeros(
            [1, *atlas_full.shape[1:-1],
             nb_labels.astype('int')])
        for j in range(np.max(mapping.shape)):
            atlas_vol[0, ...,
                      mapping[j]] = atlas_vol[0, ...,
                                              mapping[j]] + atlas_full[0, ...,
                                                                       j]

    vol_size = atlas_vol.shape[1:-1]

    # load guesses for means and variances
    init_mu = np.load(init_stats)['init_mu']
    init_sigma = np.load(init_stats)['init_std']

    # prepare data files
    train_vol_names = glob.glob(os.path.join(data_dir, '*.npz'))
    random.shuffle(train_vol_names)
    assert len(train_vol_names) > 0, "Could not find any training data"

    # UNET filters for voxelmorph-1 and voxelmorph-2,
    # these are architectures presented in CVPR 2018
    nf_enc = [16, 32, 32, 32]
    if model == 'vm1':
        nf_dec = [32, 32, 32, 32, 8, 8]
    elif model == 'vm2':
        nf_dec = [32, 32, 32, 32, 32, 16, 16]
    else:  # 'vm2double':
        nf_enc = [f * 2 for f in nf_enc]
        nf_dec = [f * 2 for f in [32, 32, 32, 32, 32, 16, 16]]

    # prepare model and log folders
    if not os.path.isdir(model_dir):
        os.mkdir(model_dir)

    log_dir = os.path.join(model_dir, 'logs')
    if not os.path.isdir(log_dir):
        os.mkdir(log_dir)

    # GPU handling
    gpu = '/gpu:%d' % 0  # gpu_id
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu_id
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.allow_soft_placement = True
    set_session(tf.Session(config=config))

    # prepare the model
    with tf.device(gpu):
        # prepare the model
        model = networks.cvpr2018_net_probatlas(vol_size,
                                                nf_enc,
                                                nf_dec,
                                                nb_labels,
                                                diffeomorphic=True,
                                                full_size=False,
                                                stat_post_warp=stat_post_warp,
                                                warp_method=warp_method,
                                                init_mu=init_mu,
                                                init_sigma=init_sigma)

        # load initial weights
        if load_model_file is not None:
            print('loading', load_model_file)
            model.load_weights(load_model_file)

        # save first iteration
        model.save(os.path.join(model_dir, '%02d.h5' % initial_epoch))

    # data generator
    nb_gpus = len(gpu_id.split(','))
    assert np.mod(batch_size, nb_gpus) == 0, \
        'batch_size should be a multiple of the nr. of gpus. ' + \
        'Got batch_size %d, %d gpus' % (batch_size, nb_gpus)

    train_example_gen = datagenerators.example_gen(train_vol_names,
                                                   batch_size=batch_size)
    atlas_vol_bs = np.repeat(atlas_vol, batch_size, axis=0)
    cvpr2018_gen = datagenerators.cvpr2018_gen(train_example_gen,
                                               atlas_vol_bs,
                                               batch_size=batch_size)

    # prepare callbacks
    save_file_name = os.path.join(model_dir, '{epoch:02d}.h5')

    # fit generator
    with tf.device(gpu):

        # multi-gpu support
        if nb_gpus > 1:
            save_callback = nrn_gen.ModelCheckpointParallel(save_file_name)
            mg_model = multi_gpu_model(model, gpus=nb_gpus)

        # single-gpu
        else:
            save_callback = ModelCheckpoint(save_file_name)
            mg_model = model

        # tensorBoard callback
        tensorboard = TensorBoard(log_dir=log_dir,
                                  histogram_freq=0,
                                  write_graph=True,
                                  write_images=False)

        # compile loss and parameters
        def data_loss(_, yp):
            m = tf.cast(model.inputs[0] > 0, tf.float32)
            return -K.sum(yp * m) / K.sum(m)

        if warp_method != 'WARP':
            reg_param = 0

        # compile
        mg_model.compile(optimizer=Adam(lr=lr),
                         loss=[data_loss, losses.Grad('l2').loss],
                         loss_weights=[1.0, reg_param])

        # fit
        mg_model.fit_generator(cvpr2018_gen,
                               initial_epoch=initial_epoch,
                               epochs=nb_epochs,
                               callbacks=[save_callback, tensorboard],
                               steps_per_epoch=steps_per_epoch,
                               verbose=1)