Example #1
0
def getModel(net_settings, num_classes=1):
    '''
		Should be modified with model type as input and returns the desired model
    '''
    if net_settings['model_type'] == 'resnet':
        base_model = resnet50.ResNet50(include_top=True, weights='imagenet')
        finetuning = Dense(1, activation='sigmoid',
                           name='predictions')(base_model.layers[-2].output)
        model = Model(input=base_model.input, output=finetuning)

        ## Adjust learning rate based on number of GPUs
        hv_lr = net_settings['lr'] * hvd.size()
        opt = optimizers.SGD(lr=hv_lr, momentum=0.9, decay=1e-6, nesterov=True)
        ## Adding Horovod DistributedOptimizer
        opt = hvd.DistributedOptimizer(opt)

        model.compile(loss=net_settings['loss'],
                      optimizer=opt,
                      metrics=['accuracy'])
        callbacks = [
            hvd.callbacks.BroadcastGlobalVariablesCallback(0),
        ]
        if hvd.rank() == 0:
            callbacks.append(
                keras.callbacks.ModelCheckpoint('./checkpoint-{epoch}.h5'))
        return model
    elif net_settings['model_type'] == 'resnet101':
        model = resnet101_model(224, 224, 3, 1)
        ## Adjust learning rate based on number of GPUs
        hv_lr = net_settings['lr'] * hvd.size()
        opt = optimizers.SGD(lr=hv_lr, momentum=0.9, decay=1e-6, nesterov=True)
        ## Adding Horovod DistributedOptimizer
        opt = hvd.DistributedOptimizer(opt)

        model.compile(loss=net_settings['loss'],
                      optimizer=opt,
                      metrics=['accuracy'])
        callbacks = [
            hvd.callbacks.BroadcastGlobalVariablesCallback(0),
        ]
        if hvd.rank() == 0:
            callbacks.append(
                keras.callbacks.ModelCheckpoint('./checkpoint-{epoch}.h5'))
            return model
    else:
        print '[models] Ugggh. Not ready for this yet.'
        exit(0)
        return None
def _get_hooks(is_distributed=_DISTRIBUTED, verbose=1):
    logger = _get_logger()
    if is_distributed:
        logger.info("Rank: {} Cluster Size {}".format(hvd.local_rank(),
                                                      hvd.size()))
        return [
            # Horovod: broadcast initial variable states from rank 0 to all other processes.
            # This is necessary to ensure consistent initialization of all workers when
            # training is started with random weights or restored from a checkpoint.
            hvd.callbacks.BroadcastGlobalVariablesCallback(0),
            # Horovod: average metrics among workers at the end of every epoch.
            #
            # Note: This callback must be in the list before the ReduceLROnPlateau,
            # TensorBoard, or other metrics-based callbacks.
            hvd.callbacks.MetricAverageCallback(),
            # Horovod: using `lr = 1.0 * hvd.size()` from the very beginning leads to worse final
            # accuracy. Scale the learning rate `lr = 1.0` ---> `lr = 1.0 * hvd.size()` during
            # the first five epochs. See https://arxiv.org/abs/1706.02677 for details.
            hvd.callbacks.LearningRateWarmupCallback(
                warmup_epochs=_WARMUP_EPOCHS, verbose=verbose),
            # Horovod: after the warmup reduce learning rate by 10 on the 30th, 60th and 80th epochs.
            hvd.callbacks.LearningRateScheduleCallback(
                start_epoch=_WARMUP_EPOCHS, end_epoch=30, multiplier=1.0),
            hvd.callbacks.LearningRateScheduleCallback(start_epoch=30,
                                                       end_epoch=60,
                                                       multiplier=1e-1),
            hvd.callbacks.LearningRateScheduleCallback(start_epoch=60,
                                                       end_epoch=80,
                                                       multiplier=1e-2),
            hvd.callbacks.LearningRateScheduleCallback(start_epoch=80,
                                                       multiplier=1e-3),
        ]
    else:
        return []
Example #3
0
def setup_callbacks(params, callbacks, encoder, decoder, prop_pred):
    import horovod.keras as hvd

    # model checkpointing
    if params.checkpoint_period and hvd.rank() == 0:
        model_checkpoint_callback = model_checkpoint(
            encoder,
            decoder,
            prop_pred,
            params.checkpoint_path,
            nepochs=params.checkpoint_period,
            overwrite=params.overwrite_checkpoint)
        callbacks.append(model_checkpoint_callback)

    # LR scheduler
    if params.lr_schedule_patience:
        lr_callback = ReduceLROnPlateau(monitor=params.lr_schedule_prop,
                                        factor=0.5,
                                        patience=params.lr_schedule_patience,
                                        min_lr=params.lr_schedule_min *
                                        hvd.size(),
                                        cooldown=params.lr_schedule_cooldown,
                                        verbose=(hvd.rank() == 0))
        callbacks.append(lr_callback)

    if hvd.rank() == 0:
        callbacks.append(print_loss())
        if params.enable_tensorboard:
            callbacks.append(TensorBoard(params.checkpoint_path))
Example #4
0
    def load_data(self, data_fn, test_size=0.3, random=True):
        if not self.distributed_training:
            self.logger.info(
                'Loading the full dataset since distributed training is disabled ...'
            )
            # X, Y = self.data_io.load_all(data_fn, dset_name_pattern=dset_name_pattern, camera_pos=camera_pos)
            X, Y = self.data_io.load_all(data_fn)
        else:
            self.logger.info(
                'Loading part of the dataset since distributed training is enabled ...'
            )
            X, Y = self.data_io.load_partial(data_fn, hvd.size(), hvd.rank())
        self.logger.debug('Shape of X: %s' % str(X.shape))
        self.logger.debug('Shape of Y: %s' % str(Y.shape))

        # update the input_shape setting according to the loaded data
        self.input_shape = X.shape[1:]

        if test_size > 0:
            x_train, x_test, y_train, y_test = train_test_split(
                X, Y, test_size=test_size, random_state=42)
            self.x_train = x_train
            self.x_test = x_test
            self.y_train = y_train
            self.y_test = y_test
        else:
            self.x_train = X
            self.y_train = Y
        self.num_classes = np.unique(Y).shape[0]
        print("shapes:", self.x_train.shape, self.x_test.shape,
              self.y_train.shape, self.y_test.shape)
        self.logger.debug('Number of classes: %d' % self.num_classes)
Example #5
0
 def save_model(self):
     if self.distributed_training is True:
         if hvd.rank() == 0:
             if self.use_noise is True:
                 self.model.save('model_hvd_bw_%d_B0_with_noise_n_p_%d.h5' %
                                 (self.input_shape[0], hvd.size()))
             else:
                 self.model.save('model_hvd_bw_%d_B0_no_noise_%d_nodes.h5' %
                                 (self.input_shape[0], hvd.size()))
     else:
         if self.use_noise is True:
             self.model.save('model_bw_%d_B0_with_noise.h5' %
                             (self.input_shape[0]))
         else:
             self.model.save('model_bw_%d_B0_no_noise.h5' %
                             (self.input_shape[0]))
Example #6
0
    def test_elastic_state(self):
        with self.test_session(config=self.config) as sess:
            K.set_session(sess)

            v = 1.0 if hvd.rank() == 0 else 2.0
            model1 = keras.models.Sequential([
                keras.layers.Dense(2, activation='softmax')
            ])
            model1.build((2, 2))
            model1.set_weights(
                [np.array([[v,  v], [v, v]], dtype=np.float32),
                 np.array([v, v], dtype=np.float32)])

            model2 = keras.models.Sequential([
                keras.layers.Dense(2, activation='softmax')
            ])
            model2.build((2, 2))
            model2.set_weights(
                [np.array([[1.0,  2.0], [3.0, 4.0]], dtype=np.float32),
                 np.array([0.0, 0.0], dtype=np.float32)])

            optimizer = keras.optimizers.Adam(0.001 * hvd.size())

            state = hvd.elastic.KerasState(model1, optimizer, batch=20 + hvd.rank(), epoch=10 + hvd.rank())
            state.sync()

            model1_weights = model1.get_weights()
            model2_weights = model2.get_weights()

            # After sync, all values should match the root rank
            for w in state.model.get_weights():
                self.assertAllClose(w, np.ones_like(w))
            assert state.batch == 20
            assert state.epoch == 10

            # Partially modify then restore
            model1.set_weights(model2_weights)
            state.batch = 21
            state.epoch = 11

            state.restore()

            for w1, w2 in zip(model1.get_weights(), model1_weights):
                self.assertAllClose(w1, w2)
            assert state.batch == 20
            assert state.epoch == 10

            # Partially modify then commit
            model1.set_weights(model2_weights)
            state.batch = 21
            state.epoch = 11

            state.commit()
            state.restore()

            for w1, w2 in zip(model1.get_weights(), model2_weights):
                self.assertAllClose(w1, w2)
            assert state.batch == 21
            assert state.epoch == 11
Example #7
0
    def get_batch_sharing_solution(self, batch_patch_info):
        n_workers = hvd.size()
        n_imgs_per_worker = batch_patch_info.shape[1] // hvd.size()

        worker_batch_delta = np.zeros((hvd.size(), 2), np.int32)
        for wi in range(n_workers):
            worker_batch_delta[wi, 0] = np.sum(
                batch_patch_info[0, :] == wi) - n_imgs_per_worker
            worker_batch_delta[wi, 1] = wi

        sw = worker_batch_delta[worker_batch_delta[:, 0].argsort(
        ), :]  # sorted by decreasing nb of missing patchs
        #       print("***SHARING SOLUTION***")
        #       print("INITIAL OFFERINGS")
        #       print(sw)
        transfers = np.zeros((n_workers, n_workers), np.int32)
        i = 0
        j = n_workers - 1
        while i < j and sw[
                i, 0] < 0:  # where there are missing patches for a worker
            if sw[i, 0] < 0 and sw[
                    j,
                    0] > 0:  # if patches missing for a worker and too much for another
                init_i = sw[i, 0]
                init_j = sw[j, 0]
                if -sw[i, 0] < sw[
                        j,
                        0]:  #worker j having images can fullfill request of i and still have some images left
                    transfers[sw[i, 1], sw[j, 1]] = sw[i, 0]
                    transfers[sw[j, 1], sw[i, 1]] = -sw[i, 0]
                    sw[j, 0] += sw[i, 0]
                    sw[i, 0] = 0
                    i += 1
                else:  #worker i having images can can get all images of i and (may) still need some more
                    transfers[sw[i, 1], sw[j, 1]] = -sw[j, 0]
                    transfers[sw[j, 1], sw[i, 1]] = sw[j, 0]
                    sw[i, 0] += sw[j, 0]
                    sw[j, 0] = 0
                    if -init_i == init_j:  #if both are fullfilled continue
                        i += 1
                    j -= 1

        if not np.sum(sw[:, 0]) == 0:
            raise Exception(
                "Error in sharing solution, check source code !!!!")
        return transfers
Example #8
0
 def save_model(self):
     if self.distributed_training is True:
         if hvd.rank() == 0:
             if self.noise_stddev > 0 is True:
                 self.model.save('model_%d_%s_noise_np_%d.h5' %
                                 (self.input_shape[0], self.base_model_name,
                                  hvd.size()))
             else:
                 self.model.save('model_%d_%s_np_%d.h5' %
                                 (self.input_shape[0], self.base_model_name,
                                  hvd.size()))
     else:
         if self.noise_stddev > 0 is True:
             self.model.save('model_%d_%s_noise.h5' %
                             (self.input_shape[0], self.base_model_name))
         else:
             self.model.save('model_%d_%s.h5' %
                             (self.input_shape[0], self.base_model_name))
Example #9
0
 def build(self):
     from keras.optimizers import deserialize
     opt_config = {'class_name': self.name, 'config': self.config}
     opt = deserialize(opt_config)
     if self.horovod_wrapper:
         import horovod.keras as hvd
         if hasattr(opt, 'lr'):
             opt.lr *= hvd.size()
         opt = hvd.DistributedOptimizer(opt)
     return opt
def _get_optimizer(params, is_distributed=_DISTRIBUTED):
    if is_distributed:
        # Horovod: adjust learning rate based on number of GPUs.
        opt = keras.optimizers.SGD(lr=params["learning_rate"] * hvd.size(),
                                   momentum=params["momentum"])
        # Horovod: add Horovod Distributed Optimizer.
        return hvd.DistributedOptimizer(opt)
    else:
        return keras.optimizers.SGD(lr=params["learning_rate"],
                                    momentum=params["momentum"])
Example #11
0
def train_evaluate():

    # Generate training and validation data generators 
    def get_image_list(data_dir):
       dataset = []
       for folder in os.listdir(data_dir):
          for image in os.listdir(os.path.join(data_dir, folder)):
             dataset.append((os.path.join(data_dir, folder, image), folder)) 
       return dataset      

    training_data = ImageSequence(get_image_list(os.path.join(FLAGS.data_dir, 'train')), FLAGS.batch_size, True)
    validation_data = ImageSequence(get_image_list(os.path.join(FLAGS.data_dir, 'test')), FLAGS.batch_size, False)

    # Horovod: Initialize Horovod
    hvd.init()

    # Horvod: Pin GPU to be used to process local rank (one GPU per process)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.visible_device_list = str(hvd.local_rank())
    tf.keras.backend.set_session(tf.Session(config=config))

    # Create a model
    model = network_model(FLAGS.hidden_units)
    loss = 'categorical_crossentropy'

    # Horovod: Adjust learning rate based on number of GPUs
    optimizer = Adadelta(lr=1.0 * hvd.size())
    # Horovod: add Horovod Distributed Optimizer
    optimizer = hvd.DistributedOptimizer(optimizer)

    metrics = ['acc']
    model.compile(optimizer, loss, metrics)
  
    # Set up callbacks
    callbacks = [
        # Broadcast initial variable states from rank 0 to all other processes
        hvd.callbacks.BroadcastGlobalVariablesCallback(0),
    ]
    
    # Horovod: save  logs only on worker 0
    if hvd.rank() == 0:
        callbacks.append(tf.keras.callbacks.TensorBoard(log_dir=FLAGS.log_dir))

    # Start training
    model.fit_generator(generator = training_data,
                        validation_data = validation_data,
                        epochs = FLAGS.epochs,
                        use_multiprocessing = True,
                        workers = 4,
                        callbacks = callbacks,
                        verbose = 1)

    # Save the model
    model.save(FLAGS.save_model_path)
Example #12
0
 def __init__(self, filename, batch_size):
     self.f_array = h5py.File(filename, "r")
     x = self.f_array["images"]
     y = self.f_array["masks"]
     self.batch_size = batch_size
     node_array_size = int(np.ceil(len(x) / hvd.size()))
     self.init_array = hvd.rank() * node_array_size
     self.end_array = self.init_array + node_array_size
     self.x = x
     self.y = y
     print("calculating size")
     print("size", len(self))
Example #13
0
def lr_schedule(epoch):
    """Learning Rate Schedule

    Learning rate is scheduled to be reduced after 80, 120, 160, 180 epochs.
    Called automatically every epoch as part of callbacks during training.

    # Arguments
        epoch (int): The number of epochs

    # Returns
        lr (float32): learning rate
    """
    if epoch <= 20:  #was 5
        # bypass to the warmup callback
        return K.get_value(model.optimizer.lr)
    if epoch <= 40:
        return 0.08 * hvd.size()  #was 80, 0.01
    if epoch <= 60:
        return 0.01 * hvd.size()  #was 120 0.002
    if epoch <= 70:
        return 0.002 * hvd.size()  #was 160 0.0004
    return 0.0004 * hvd.size()
Example #14
0
        def create_model():
            opt = keras.optimizers.SGD(lr=0.01 * hvd.size(), momentum=0.9)
            opt = hvd.DistributedOptimizer(opt)

            model = keras.models.Sequential()
            model.add(keras.layers.Dense(2, input_shape=(3,)))
            model.add(keras.layers.RepeatVector(3))
            model.add(keras.layers.TimeDistributed(keras.layers.Dense(3)))
            model.compile(loss=keras.losses.MSE,
                          optimizer=opt,
                          metrics=[keras.metrics.categorical_accuracy],
                          sample_weight_mode='temporal')

            return model
Example #15
0
        def create_model():
            opt = keras.optimizers.SGD(lr=0.01 * hvd.size(), momentum=0.9)
            opt = hvd.DistributedOptimizer(opt)

            model = keras.models.Sequential()
            model.add(keras.layers.Dense(2, input_shape=(3,)))
            model.add(keras.layers.RepeatVector(3))
            model.add(keras.layers.TimeDistributed(keras.layers.Dense(3)))
            model.compile(loss=keras.losses.MSE,
                          optimizer=opt,
                          metrics=[keras.metrics.categorical_accuracy],
                          sample_weight_mode='temporal')

            return model
def data_generator(file_path, batch_size, seq_len=512, predict=False):
    # Trick the code into thinking we're only running 1 process for prediction when running `Metrics`.
    if predict:
        size = 1
    else:
        size = hvd.size()
    total_batch_size = batch_size * size
    print(total_batch_size)
    rank = hvd.rank()
    print(rank)
    range_start = batch_size * rank
    range_end = range_start + batch_size
    print(range_start, range_end)
    while True:
        with xopen(file_path, "rt") as f:
            _, label_dim = json.loads(f.readline())
            text = []
            labels = []
            for line in f:
                if len(text) == total_batch_size:
                    text = text[range_start:range_end]
                    labels = labels[range_start:range_end]
                    print(text[0])
                    # Fun fact: the 2 inputs must be in a list, *not* a tuple. Why.
                    yield ([np.asarray(text), np.zeros_like(text)], np.asarray(labels))
                    text = []
                    labels = []
                line = json.loads(line)
                # First sublist is token ids.
                text.append(np.asarray(line[0])[0:seq_len])

                # Second sublist is positive label indices.
                label_line = np.zeros(label_dim, dtype='b')
                label_line[line[1]] = 1
                labels.append(label_line)
            # Yield what is left as the last batch when file has been read to its end.
            # Split the remaining examples, duplicating with `ceil()` if they don't split evenly.
            leftover_batch_start = ceil(len(text) / size) * rank
            leftover_batch_end = leftover_batch_start + ceil(len(text) / size)
            text = text[leftover_batch_start:leftover_batch_end]
            labels = labels[leftover_batch_start:leftover_batch_end]
            yield ([np.asarray(text), np.zeros_like(text)], np.asarray(labels))
Example #17
0
    def create_inception_model(self, number_categories, dense_layer_sizes, dropout_fraction, unfrozen_layers, focal_loss=False):
        hvd.init()
        config = tf.compat.v1.ConfigProto()
        config.gpu_options.visible_device_list = str(hvd.local_rank())
        opt = hvd.DistributedOptimizer(tf.keras.optimizers.Adam(learning_rate=0.001*hvd.size()))
        model = InceptionV3(include_top=False, pooling='avg')
        output = model.outputs[0]

        for layer_size in dense_layer_sizes:
            dense = Dense(layer_size, activation='relu')(output)
            dropout = Dropout(dropout_fraction)(dense)
            output = BatchNormalization()(dropout)
        
        if number_categories == 1:
            output = Dense(1, activation='sigmoid')(output)
        else:
            output = Dense(number_categories, activation='softmax')(output)
        model = Model(inputs=model.inputs, outputs=output)

        for index in range(len(model.layers) - unfrozen_layers):
            model.layers[index].trainable = False

        if number_categories == 1:
            the_metrics = [metrics.binary_accuracy]
            if focal_loss:
                loss = customlosses.focal_binary_crossentropy
            else:
                loss = 'binary_crossentropy'
        else:
            the_metrics = [metrics.categorical_accuracy]
            if focal_loss:
                loss = customlosses.focal_categorical_crossentropy
            else:
                loss = 'categorical_crossentropy'

        model.compile(optimizer=opt, loss=loss, metrics=the_metrics)
        model.save(self.model_filename)
        self.model = model
Example #18
0
def batch_generator(full_sequences, fragment_length, batch_size,
                    fragment_stride, nb_output_bins, randomize_batch_order,
                    _rnd):
    indices = list(
        fragment_indices(full_sequences, fragment_length, batch_size,
                         fragment_stride, nb_output_bins))
    global g_multi_gpu
    if g_multi_gpu:
        import horovod.keras as hvd
        gpu_count = hvd.size()
        current_gpu = hvd.rank()
    else:
        gpu_count = 1
        current_gpu = 0

    if randomize_batch_order:
        _rnd.shuffle(indices)

    batches_parted = [batch for batch in partition_all(batch_size, indices)]
    start_index = len(batches_parted) // gpu_count * current_gpu
    batches_gpu = batches_parted[start_index:]

    batches = cycle(batches_gpu)
    for batch in batches:
        if len(batch) < batch_size:
            continue
        yield np.array([
            one_hot(full_sequences[e[0]][e[1]:e[1] + fragment_length])
            for e in batch
        ],
                       dtype='uint8'), np.array([
                           one_hot(full_sequences[e[0]][e[1] + 1:e[1] +
                                                        fragment_length + 1])
                           for e in batch
                       ],
                                                dtype='uint8')
Example #19
0
# Load the data files
train_file = os.path.join(input_dir, 'train.h5')
valid_file = os.path.join(input_dir, 'val.h5')
test_file = os.path.join(input_dir, 'test.h5')
train_input, train_labels, train_weights = load_file(train_file, n_train)
valid_input, valid_labels, valid_weights = load_file(valid_file, n_valid)
test_input, test_labels, test_weights = load_file(test_file, n_test)
print('train shape:', train_input.shape, 'Mean label:', train_labels.mean())
print('valid shape:', valid_input.shape, 'Mean label:', valid_labels.mean())
print('test shape: ', test_input.shape, 'Mean label:', test_labels.mean())

# Model config
conv_sizes = [8, 16, 32]
fc_sizes = [64]
optimizer = 'Adam'
lr = 0.01 * hvd.size()
dropout = 0.5

# Training config
batch_size = 32  #128
n_epochs = 8

# Build the model
model = build_model(train_input.shape[1:],
                    conv_sizes=conv_sizes,
                    fc_sizes=fc_sizes,
                    dropout=dropout,
                    optimizer=optimizer,
                    lr=lr)
if hvd.rank() == 0:
    model.summary()
Example #20
0
def main(args):
    if 'sourcedir.tar.gz' in args.tensorboard_dir:
        tensorboard_dir = re.sub('source/sourcedir.tar.gz', 'model',
                                 args.tensorboard_dir)
    else:
        tensorboard_dir = args.tensorboard_dir
    logging.info("Writing TensorBoard logs to {}".format(tensorboard_dir))

    if os.path.isdir(args.checkpoint_path):
        logging.info("Checkpointing directory {} exists".format(
            args.checkpoint_path))
    else:
        logging.info("Creating Checkpointing directory {}".format(
            args.checkpoint_path))
        os.mkdir(args.checkpoint_path)

    mpi = False
    if 'sagemaker_mpi_enabled' in args.fw_params:
        if args.fw_params['sagemaker_mpi_enabled']:
            import horovod.keras as hvd
            mpi = True
            # Horovod: initialize Horovod.
            hvd.init()

            # Horovod: pin GPU to be used to process local rank (one GPU per process)
            config = tf.ConfigProto()
            config.gpu_options.allow_growth = True
            config.gpu_options.visible_device_list = str(hvd.local_rank())
            K.set_session(tf.Session(config=config))
    else:
        hvd = None
    logging.info("Running with MPI={}".format(mpi))

    logging.info("getting data")
    train_dataset = train_input_fn()
    eval_dataset = eval_input_fn()
    validation_dataset = validation_input_fn()

    logging.info("configuring model")

    # Load model
    if not os.listdir(args.checkpoint_path):
        model = keras_model_fn(args.learning_rate, args.weight_decay,
                               args.optimizer, args.momentum, mpi, hvd)
        epoch_number = 0
    else:
        model, epoch_number = load_checkpoint_model(args.checkpoint_path)

    logging.info("Checkpointing to: {}".format(args.checkpoint_path))

    callbacks = []
    if mpi:
        callbacks.append(hvd.callbacks.BroadcastGlobalVariablesCallback(0))
        callbacks.append(hvd.callbacks.MetricAverageCallback())
        callbacks.append(
            hvd.callbacks.LearningRateWarmupCallback(warmup_epochs=5,
                                                     verbose=1))
        callbacks.append(
            keras.callbacks.ReduceLROnPlateau(patience=10, verbose=1))
        if hvd.rank() == 0:
            callbacks.append(
                ModelCheckpoint(args.checkpoint_path +
                                '/checkpoint-{epoch}.h5'))
            callbacks.append(
                TensorBoard(log_dir=tensorboard_dir, update_freq='epoch'))
    else:
        callbacks.append(
            keras.callbacks.ReduceLROnPlateau(patience=10, verbose=1))
        callbacks.append(
            ModelCheckpoint(args.checkpoint_path + '/checkpoint-{epoch}.h5'))
        callbacks.append(
            TensorBoard(log_dir=tensorboard_dir, update_freq='epoch'))

    logging.info("Starting training")
    size = 1
    if mpi:
        size = hvd.size()

    model.fit(
        x=train_dataset[0],
        y=train_dataset[1],
        steps_per_epoch=(num_examples_per_epoch('train') // args.batch_size) //
        size,
        epochs=args.epochs,
        initial_epoch=epoch_number,
        validation_data=validation_dataset,
        validation_steps=(num_examples_per_epoch('validation') //
                          args.batch_size) // size,
        callbacks=callbacks)

    score = model.evaluate(eval_dataset[0],
                           eval_dataset[1],
                           steps=num_examples_per_epoch('eval') //
                           args.batch_size,
                           verbose=0)

    logging.info('Test loss:{}'.format(score[0]))
    logging.info('Test accuracy:{}'.format(score[1]))

    # Horovod: Save model only on worker 0 (i.e. master)
    if mpi:
        if hvd.rank() == 0:
            save_model(model, args.model_output_dir)
    else:
        save_model(model, args.model_output_dir)
Example #21
0
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

model = Sequential()
model.add(
    Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

# Horovod: adjust learning rate based on number of GPUs.
opt = keras.optimizers.Adadelta(lr=args.lr * hvd.size())

# Horovod: add Horovod Distributed Optimizer.
opt = hvd.DistributedOptimizer(opt)

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=opt,
              metrics=['accuracy'])

callbacks = [
    # Horovod: broadcast initial variable states from rank 0 to all other processes.
    # This is necessary to ensure consistent initialization of all workers when
    # training is started with random weights or restored from a checkpoint.
    hvd.callbacks.BroadcastGlobalVariablesCallback(0),

    # Horovod: average metrics among workers at the end of every epoch.
Example #22
0
def _run():
    import keras
    import models
    logger = tk.log.get(__name__)
    parser = argparse.ArgumentParser()
    parser.add_argument('--epochs', help='epoch数。', default=300, type=int)
    parser.add_argument('--batch-size', help='バッチサイズ。', default=16, type=int)
    parser.add_argument('--warm',
                        help='models/model.fold{cv_index}.h5を読み込む',
                        action='store_true',
                        default=False)
    parser.add_argument('--cv-index', help='CVの何番目か。', type=int)
    parser.add_argument('--cv-size', help='CVの分割数。', default=5, type=int)
    parser.add_argument('--split-seed', help='分割のシード値。', default=123, type=int)
    args = parser.parse_args()
    assert args.cv_index in range(args.cv_size)
    model_path = _MODELS_DIR / 'model.fold{}.h5'.format(args.cv_index)

    (X_train,
     y_train), (X_val, y_val), _ = data.load_data(args.cv_index, args.cv_size,
                                                  args.split_seed)
    num_classes = len(np.unique(y_train))
    y_train = tk.ml.to_categorical(num_classes)(y_train)
    y_val = tk.ml.to_categorical(num_classes)(y_val)
    logger.info('len(X_train) = {} len(X_val) = {}'.format(
        len(X_train), len(X_val)))

    model = models.create_network(num_classes)

    # 学習率:
    # ・lr 0.5、batch size 256くらいが多いのでその辺を基準に
    # ・バッチサイズに比例させるのが良いとのうわさ
    lr = 0.5 * args.batch_size / 256 * hvd.size()
    opt = keras.optimizers.SGD(lr=lr, momentum=0.9, nesterov=True)
    opt = hvd.DistributedOptimizer(opt)
    model.compile(opt, 'categorical_crossentropy', ['acc'])

    if hvd.rank() == 0 and args.cv_index == 0:
        model.summary(print_fn=logger.info)
        logger.info('network depth: %d', tk.dl.count_network_depth(model))

    if args.warm:
        model.load_weights(str(model_path))
        logger.info('{} loaded'.format(model_path))
    else:
        assert not model_path.exists()  # 誤操作対策

    callbacks = []
    if args.warm and args.epochs < 300:  # 短縮モード
        callbacks.append(tk.dl.learning_rate_callback((0, 0.5)))
    else:
        callbacks.append(tk.dl.learning_rate_callback())
    callbacks.append(hvd.callbacks.BroadcastGlobalVariablesCallback(0))
    callbacks.append(hvd.callbacks.MetricAverageCallback())
    callbacks.append(
        hvd.callbacks.LearningRateWarmupCallback(warmup_epochs=5, verbose=1))
    if hvd.rank() == 0:
        callbacks.append(tk.dl.tsv_log_callback(_MODELS_DIR / 'history.tsv'))
    callbacks.append(tk.dl.freeze_bn_callback(0.95))

    gen = models.create_generator((299, 299), mixup=True)
    model.fit_generator(
        gen.flow(X_train,
                 y_train,
                 batch_size=args.batch_size,
                 data_augmentation=True,
                 shuffle=True),
        steps_per_epoch=gen.steps_per_epoch(len(X_train), args.batch_size) //
        hvd.size(),
        epochs=args.epochs,
        verbose=1 if hvd.rank() == 0 else 0,
        validation_data=gen.flow(X_val,
                                 y_val,
                                 batch_size=args.batch_size,
                                 shuffle=True),
        validation_steps=gen.steps_per_epoch(len(X_val), args.batch_size) //
        hvd.size(),  # * 3は省略
        callbacks=callbacks)

    if hvd.rank() == 0:
        model.save(str(model_path))

        proba_val = model.predict_generator(
            gen.flow(X_val, y_val, batch_size=args.batch_size),
            gen.steps_per_epoch(len(X_val), args.batch_size),
            verbose=1)
        joblib.dump(proba_val,
                    _MODELS_DIR / 'proba_val.fold{}.pkl'.format(args.cv_index))

        pred_val = proba_val.argmax(axis=-1)
        logger.info('val_acc: {:.1f}%'.format(
            sklearn.metrics.accuracy_score(y_val.argmax(axis=-1), pred_val) *
            100))
Example #23
0
    def __init__(self, config_file, resume_training=True, resume_epoch=None, predict_length=None, multi_gpu=False):
        self.config = ConfigParser.ConfigParser(allow_no_value=True)
        try:
            self.config.readfp(open(config_file))
        except:
            print('Could not read configuration file {} - exiting.'.format(config_file))
            sys.exit(1)
        # Get General Configuration
        self.train_multi_gpu = multi_gpu
        self.resume_training = resume_training
        self.resume_epoch = resume_epoch
        self.keras_verbose = self.config.getint('general', 'keras_verbose')
        self.seed = self.config.getint('general', 'seed')
        if self.seed is None:
            self.seed = 42
        # Get Model Configuration
        self.data_dir = self.config.get('model', 'data_dir')
        self.data_dir_structure = self.config.get('model', 'data_dir_structure')
        self.model_dir = self.config.get('model', 'model_dir')
        if len(self.model_dir) == 0:
            self.model_dir = None
        self.sample_rate = self.config.getint('model', 'sample_rate')
        self.debug = self.config.getint('model', 'debug')
        # Training Configuration
        self.max_epoch = self.config.getint('training', 'max_epoch')
        self.test_factor = self.config.getfloat('training', 'test_factor')
        self.batch_size = self.config.getint('training', 'batch_size')
        self.output_bins = self.config.getint('training', 'output_bins')
        self.filters = self.config.getint('training', 'filters')
        self.dilation_depth = self.config.getint('training', 'dilation_depth')
        self.stacks = self.config.getint('training', 'stacks')
        self.use_bias = self.config.getboolean('training', 'use_bias')
        self.use_ulaw = self.config.getboolean('training', 'use_ulaw')
        self.res_l2 = self.config.getfloat('training', 'res_l2')
        self.final_l2 = self.config.getfloat('training', 'final_l2')
        self.initial_fragment_length = self.config.getint('training', 'initial_fragment_length')
        self.fragment_stride = self.config.getint('training', 'fragment_stride')
        self.use_skip_connections = self.config.getboolean('training', 'use_skip_connections')
        self.learn_all_outputs = self.config.getboolean('training', 'learn_all_outputs')
        self.random_train_batches = self.config.getboolean('training', 'random_train_batches')
        self.randomize_batch_order = self.config.getboolean('training', 'randomize_batch_order')
        self.train_only_in_receptive_field = self.config.getboolean('training', 'train_only_in_receptive_field')
        self.train_with_soft_targets = self.config.getboolean('training', 'train_with_soft_targets')
        self.soft_target_stdev = self.config.getfloat('training', 'soft_target_stdev')
        self.optimizer = self.config.get('training', 'optimizer')
        self.early_stopping_patience = self.config.getint('training', 'early_stopping_patience')
        # Prediction Configuration
        self.predict_length = self.config.getfloat('prediction', 'predict_length')
        # Let's allow the user to overwrite the length via cmd-line, it is more practical :-)
        if predict_length is not None:
            self.predict_length = predict_length
        self.sample_argmax = self.config.getboolean('prediction', 'sample_argmax')
        self.sample_temperature = self.config.getfloat('prediction', 'sample_temperature')
        if self.sample_temperature < 0.001:
            self.sample_temperature = None
        self.predict_initial_input = self.config.get('prediction', 'initial_input')
        if len(self.predict_initial_input) == 0:
            self.predict_initial_input = None
        self.predict_use_softmax_as_input = self.config.getboolean('prediction', 'use_softmax_as_input')
        self.sample_seed = self.seed
        np.random.seed(self.seed)
        self.rnd = np.random.RandomState(self.seed)

        self.fragment_length = self.initial_fragment_length + self._compute_receptive_field2(self.sample_rate, self.dilation_depth, self.stacks)[0]
        # Additional Settings
        self.num_gpus = 1
        self.train_rank = 0
        if self.train_multi_gpu:
            self.train_rank = hvd.rank()
            self.num_gpus = hvd.size()
        print('rank = {}, num_gpu={}'.format(self.train_rank, self.num_gpus))
        self.dataset = DataSet(self.config, self.fragment_length, self.num_gpus, self.train_rank)
    # Add L2 weight decay & adjust BN settings.
    model_config = model.get_config()
    for layer, layer_config in zip(model.layers, model_config['layers']):
        if hasattr(layer, 'kernel_regularizer'):
            regularizer = keras.regularizers.l2(args.wd)
            layer_config['config']['kernel_regularizer'] = \
                {'class_name': regularizer.__class__.__name__,
                 'config': regularizer.get_config()}
        if type(layer) == keras.layers.BatchNormalization:
            layer_config['config']['momentum'] = 0.9
            layer_config['config']['epsilon'] = 1e-5

    model = keras.models.Model.from_config(model_config)

    # Horovod: adjust learning rate based on number of GPUs.
    opt = keras.optimizers.SGD(lr=args.base_lr * hvd.size(),
                               momentum=args.momentum)

    # Horovod: add Horovod Distributed Optimizer.
    opt = hvd.DistributedOptimizer(opt, compression=compression)

    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=opt,
                  metrics=['accuracy', 'top_k_categorical_accuracy'])

callbacks = [
    # Horovod: broadcast initial variable states from rank 0 to all other processes.
    # This is necessary to ensure consistent initialization of all workers when
    # training is started with random weights or restored from a checkpoint.
    hvd.callbacks.BroadcastGlobalVariablesCallback(0),
Example #25
0
import horovod.keras as hvd

# Horovod: initialize Horovod.
hvd.init()

# Horovod: pin GPU to be used to process local rank (one GPU per process)
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.gpu_options.visible_device_list = str(hvd.local_rank())
K.set_session(tf.Session(config=config))

batch_size = 128
num_classes = 10

# Horovod: adjust number of epochs based on number of GPUs.
epochs = int(math.ceil(12.0 / hvd.size()))

# Input image dimensions
img_rows, img_cols = 28, 28

# The data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)
Example #26
0
    # We'll save the worker logs and models separately but only
    # use the logs/saved model from worker 0.
    args.saved_model = "./worker{}/3d_unet_decathlon.hdf5".format(hvd.rank())

# Optimize CPU threads for TensorFlow
CONFIG = tf.ConfigProto(inter_op_parallelism_threads=args.interop_threads,
                        intra_op_parallelism_threads=args.intraop_threads)

SESS = tf.Session(config=CONFIG)

K.backend.set_session(SESS)

model, opt = unet_3d(
    use_upsampling=args.use_upsampling,
    n_cl_in=args.number_input_channels,
    learning_rate=args.lr * hvd.size(),
    n_cl_out=1,  # single channel (greyscale)
    dropout=0.2,
    print_summary=print_summary)

opt = hvd.DistributedOptimizer(opt)

model.compile(
    optimizer=opt,
    # loss=[combined_dice_ce_loss],
    loss=[dice_coef_loss],
    metrics=[dice_coef, "accuracy", sensitivity, specificity])

if hvd.rank() == 0:
    start_time = datetime.datetime.now()
    print("Started script on {}".format(start_time))
Example #27
0
#initial_model = create_vgg16()
#initial_model.load_weights(model_path) # we may begin from scratch

#x = Dense(batches.num_class, activation='softmax')(initial_model.layers[-2].output)
#model = Model(initial_model.input, x)
# for layer in initial_model.layers: layer.trainable=False # for scratch build
#opt = Adam(lr=0.001)
opt = SGD(lr=0.01)
opt = hvd.DistributedOptimizer(opt)
callbacks = [
    hvd.callbacks.BroadcastGlobalVariablesCallback(0),
    hvd.callbacks.MetricAverageCallback(),
    hvd.callbacks.LearningRateWarmupCallback(warmup_epochs=2, verbose=1),
    keras.callbacks.ReduceLROnPlateau(patience=3, verbose=1),
]

model.compile(optimizer=opt,
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

model.fit_generator(batches, steps_per_epoch=batches.samples//batch_size // hvd.size(), nb_epoch=10,
                validation_data=valid_batches, validation_steps=valid_batches.samples//batch_size//hvd.size())


if hvd.rank() == 0:
  model_json = model.to_json()
  with open("model.json",'w') as json_file:
    json_file.write(model_json)
  model.save_weights("model_first.h5")
  print("Saved mode in the first step")
y_test = keras.utils.to_categorical(y_test, num_classes)

model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

# Horovod: adjust learning rate based on number of GPUs.
opt = keras.optimizers.Adadelta(lr=1.0 * hvd.size())

# Horovod: add Horovod Distributed Optimizer.
opt = hvd.DistributedOptimizer(opt)

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=opt,
              metrics=['accuracy'])

callbacks = [
    # Horovod: broadcast initial variable states from rank 0 to all other processes.
    # This is necessary to ensure consistent initialization of all workers when
    # training is started with random weights or restored from a checkpoint.
    hvd.callbacks.BroadcastGlobalVariablesCallback(0),

    # Horovod: average metrics among workers at the end of every epoch.
Example #29
0
    "validate_test_split": args.validate_test_split,
    "augment": False,
    "shuffle": False,
    "seed": args.random_seed
}
validation_generator = DataGenerator("validate", args.data_path,
                                     **validation_data_params)

if (hvd.rank() == 0):
    validation_generator.print_info()

# Fit the model
# Do at least 3 steps for training and validation
steps_per_epoch = max(
    3,
    training_generator.get_length() // (args.bz * hvd.size()))
validation_steps = max(
    3, 3 * training_generator.get_length() // (args.bz * hvd.size()))

unet_model.model.fit_generator(
    training_generator,
    steps_per_epoch=steps_per_epoch,
    epochs=args.epochs,
    verbose=verbose,
    validation_data=validation_generator,
    #validation_steps=validation_steps,
    callbacks=callbacks,
    max_queue_size=1,  #args.num_prefetched_batches,
    workers=1,  #args.num_data_loaders,
    use_multiprocessing=True)
Example #30
0
from prednet import PredNet
from data_utils import SequenceGenerator
from kitti_settings import *
import datetime
import horovod.keras as hvd
import keras
import tensorflow as tf
#Horovod:initialize horovod
hvd.init()
#Horovod: pin GPU to be used for process local rank (one GPU per process)
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.gpu_options.visible_device_list = str(hvd.local_rank())
K.set_session(tf.Session(config=config))

print("horovode size", hvd.size())

save_model = True  # if weights will be saved
weights_file = os.path.join(
    WEIGHTS_DIR, 'prednet_kitti_weights.hdf5')  # where weights will be saved
json_file = os.path.join(WEIGHTS_DIR, 'prednet_kitti_model.json')
if not os.path.exists(WEIGHTS_DIR): os.mkdir(WEIGHTS_DIR)
# Data files
train_file = os.path.join(DATA_DIR, 'X_train.hkl')
train_sources = os.path.join(DATA_DIR, 'sources_train.hkl')
val_file = os.path.join(DATA_DIR, 'X_val.hkl')
val_sources = os.path.join(DATA_DIR, 'sources_val.hkl')

# Training parameters
nb_epoch = 10  #original: 150; for all tests so far set to 100; t2onlyMax: 150
batch_size = 15
Example #31
0
y_test = keras.utils.to_categorical(y_test, num_classes)

model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

# Horovod: adjust learning rate based on number of GPUs.
opt = keras.optimizers.Adadelta(1.0 * hvd.size())

# Horovod: add Horovod Distributed Optimizer.
opt = hvd.DistributedOptimizer(opt)

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=opt,
              metrics=['accuracy'])

callbacks = [
    # Horovod: broadcast initial variable states from rank 0 to all other processes.
    # This is necessary to ensure consistent initialization of all workers when
    # training is started with random weights or restored from a checkpoint.
    hvd.callbacks.BroadcastGlobalVariablesCallback(0),
]
Example #32
0
    # Add L2 weight decay & adjust BN settings.
    model_config = model.get_config()
    for layer, layer_config in zip(model.layers, model_config['layers']):
        if hasattr(layer, 'kernel_regularizer'):
            regularizer = keras.regularizers.l2(args.wd)
            layer_config['config']['kernel_regularizer'] = \
                {'class_name': regularizer.__class__.__name__,
                 'config': regularizer.get_config()}
        if type(layer) == keras.layers.BatchNormalization:
            layer_config['config']['momentum'] = 0.9
            layer_config['config']['epsilon'] = 1e-5

    model = keras.models.Model.from_config(model_config)

    # Horovod: adjust learning rate based on number of GPUs.
    opt = keras.optimizers.SGD(lr=args.base_lr * hvd.size(),
                               momentum=args.momentum)

    # Horovod: add Horovod Distributed Optimizer.
    opt = hvd.DistributedOptimizer(opt)

    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=opt,
                  metrics=['accuracy'])

callbacks = [
    # Horovod: broadcast initial variable states from rank 0 to all other processes.
    # This is necessary to ensure consistent initialization of all workers when
    # training is started with random weights or restored from a checkpoint.
    hvd.callbacks.BroadcastGlobalVariablesCallback(0),
Example #33
0
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

model = Sequential()
model.add(
    Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

# Horovod: adjust learning rate based on number of GPUs.
opt = keras.optimizers.Adadelta(1.0 * hvd.size())

# Horovod: add Horovod Distributed Optimizer.
opt = hvd.DistributedOptimizer(opt)

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=opt,
              metrics=['accuracy'])

log_dir = "../logs/keras-tensorboard-profile/" + datetime.now().strftime(
    "%Y%m%d-%H%M%S")
callbacks = [
    # Horovod: broadcast initial variable states from rank 0 to all other processes.
    # This is necessary to ensure consistent initialization of all workers when
    # training is started with random weights or restored from a checkpoint.
    hvd.callbacks.BroadcastGlobalVariablesCallback(0),
    def train(self):
        train_data_generator = self.data_loader.get_train_data_generator()
        batch_size = self.config.trainer.batch_size

        steps_per_epoch = self.data_loader.get_train_data_size() // batch_size
        if self.config.trainer.use_horovod:
            import horovod.keras as hvd

            steps_per_epoch //= hvd.size()
        assert steps_per_epoch > 0

        valid_data_generator = self.data_loader.get_validation_data_generator()
        valid_data_size = self.data_loader.get_validation_data_size()

        fake_x_pool = FakeImagePool(self.config.trainer.fake_pool_size)
        fake_y_pool = FakeImagePool(self.config.trainer.fake_pool_size)

        batch_shape = (self.config.trainer.batch_size,
                       self.config.dataset.image_size // 8, self.config.dataset.image_size // 8, 1)

        fake = np.zeros(shape=batch_shape, dtype=np.float32)
        real = np.ones(shape=batch_shape, dtype=np.float32)

        epochs = self.config.trainer.num_epochs
        start_time = datetime.datetime.now()

        self.on_train_begin()
        for epoch in range(self.config.trainer.epoch_to_continue, epochs):
            self.on_epoch_begin(epoch, {})

            epoch_logs = defaultdict(float)
            for step in range(1, steps_per_epoch + 1):
                batch_logs = {'batch': step, 'size': self.config.trainer.batch_size}
                batch_logs = {"batch": step, "size": self.config.trainer.batch_size}
                self.on_batch_begin(step, batch_logs)

                imgs_x, imgs_y = next(train_data_generator)

                fakes_y = self.g_xy.predict(imgs_x)
                fakes_x = self.g_yx.predict(imgs_y)

                # train discriminator using history of fake images (Shrivastava et al)
                fakes_x = fake_x_pool.query(fakes_x)
                fakes_y = fake_y_pool.query(fakes_y)

                if self.config.trainer.label_smoothing:
                    fake = np.random.uniform(0, 0.2, size=batch_shape)
                    real = np.random.uniform(0.8, 1.0, size=batch_shape)

                # train discriminator
                dx_loss_real = self.d_x.train_on_batch(imgs_x, real)
                dx_loss_fake = self.d_x.train_on_batch(fakes_x, fake)
                dy_loss_real = self.d_y.train_on_batch(imgs_y, real)
                dy_loss_fake = self.d_y.train_on_batch(fakes_y, fake)

                # train generator
                g_loss = self.combined.train_on_batch([imgs_x, imgs_y], [real, real, imgs_x, imgs_y, imgs_x, imgs_y])

                dx_metric_names = self.d_metric_names("x")
                dy_metric_names = self.d_metric_names("y")
                g_metric_names = self.g_metric_names()

                assert len(dx_metric_names) == len(dx_loss_real) == len(dx_loss_fake)
                assert len(dy_metric_names) == len(dy_loss_real) == len(dy_loss_fake)
                assert len(g_metric_names) == len(g_loss)

                metric_logs = {}
                for metric_name, metric_value in zip(dx_metric_names + dy_metric_names,
                                                     dx_loss_real + dy_loss_real):
                    metric_logs[f"train/{metric_name}_real"] = \
                        metric_value * (100 if "accuracy" in metric_name.lower() else 1)

                for metric_name, metric_value in zip(dx_metric_names + dy_metric_names,
                                                     dx_loss_fake + dy_loss_fake):
                    metric_logs[f"train/{metric_name}_fake"] = \
                        metric_value * (100 if "accuracy" in metric_name.lower() else 1)

                for metric_name, metric_value in zip(g_metric_names, g_loss):
                    metric_logs[f"train/{metric_name}"] = metric_value

                batch_logs.update(metric_logs)
                for metric_name in metric_logs.keys():
                    if metric_name in epoch_logs:
                        epoch_logs[metric_name] += metric_logs[metric_name]
                    else:
                        epoch_logs[metric_name] = metric_logs[metric_name]

                print_str = f"[Epoch {epoch + 1}/{epochs}] [Batch {step}/{steps_per_epoch}]"
                deliminator = ' '
                for metric_name, metric_value in metric_logs.items():
                    if 'accuracy' in metric_name:
                        print_str += f"{deliminator}{metric_name}={metric_value:.1f}%"
                    elif 'loss' in metric_name:
                        print_str += f"{deliminator}{metric_name}={metric_value:.4f}"
                    else:
                        print_str += f"{deliminator}{metric_name}={metric_value}"
                    if deliminator == ' ':
                        deliminator = ',\t'

                print_str += f", time: {datetime.datetime.now() - start_time}"
                print(print_str, flush=True)

                self.on_batch_end(step, batch_logs)

            # sum to average
            for k in epoch_logs:
                epoch_logs[k] /= steps_per_epoch
            epoch_logs = dict(epoch_logs)

            # additional log
            epoch_logs['train/lr/G'] = K.get_value(self.combined.optimizer.lr)
            epoch_logs['train/lr/D_x'] = K.get_value(self.d_x.optimizer.lr)
            epoch_logs['train/lr/D_y'] = K.get_value(self.d_y.optimizer.lr)

            self.on_epoch_end(epoch, epoch_logs)
            if (epoch + 1) % self.config.trainer.predict_freq == 0:
                self.sample_valid_images(epoch, valid_data_generator, valid_data_size)

        self.predict_test_images(epochs)
        self.on_train_end()
    # Add L2 weight decay & adjust BN settings.
    model_config = model.get_config()
    for layer, layer_config in zip(model.layers, model_config['layers']):
        if hasattr(layer, 'kernel_regularizer'):
            regularizer = keras.regularizers.l2(weight_decay)
            layer_config['config']['kernel_regularizer'] = \
                {'class_name': regularizer.__class__.__name__,
                 'config': regularizer.get_config()}
        if type(layer) == keras.layers.BatchNormalization:
            layer_config['config']['momentum'] = 0.9
            layer_config['config']['epsilon'] = 1e-5

    model = keras.models.Model.from_config(model_config)

    # Horovod: adjust learning rate based on number of GPUs.
    opt = keras.optimizers.SGD(lr=learning_rate * hvd.size(), momentum=0.9)

    # Horovod: add Horovod Distributed Optimizer.
    opt = hvd.DistributedOptimizer(opt)

    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=opt,
                  metrics=['accuracy', 'top_k_categorical_accuracy'])

callbacks = [
    # Horovod: broadcast initial variable states from rank 0 to all other processes.
    # This is necessary to ensure consistent initialization of all workers when
    # training is started with random weights or restored from a checkpoint.
    hvd.callbacks.BroadcastGlobalVariablesCallback(0),

    # Horovod: average metrics among workers at the end of every epoch.