コード例 #1
0
    def build_dataset(self):
        """
        Dataset for train or evaluate
        :return: Return dataset for train or eval
        """
        ds_train = get_dataset(self.config,
                               is_training=True,
                               num_shards=hvd.size(),
                               shard_index=hvd.rank())
        self.train_steps_per_epoch = ds_train.steps_per_epoch
        self.train_steps_per_epoch = self.train_steps_per_epoch // hvd.size()
        train_dataset = ds_train.build()
        ds_eval = get_dataset(self.config, is_training=False)
        self.eval_steps_per_epoch = ds_eval.steps_per_epoch
        eval_dataset = ds_eval.build()
        train_dataset_distill = None
        eval_dataset_distill = None
        if self.config.get_attribute(
                "scheduler") == "distill" or self.config.get_attribute(
                    'is_distill'):
            ds_train_distill = get_dataset(self.config,
                                           is_training=True,
                                           num_shards=hvd.size(),
                                           shard_index=hvd.rank())
            train_dataset_distill = ds_train_distill.build(True)
            ds_eval_distill = get_dataset(self.config, is_training=False)
            eval_dataset_distill = ds_eval_distill.build(True)

        return train_dataset, eval_dataset, train_dataset_distill, eval_dataset_distill
コード例 #2
0
def compute_expected_value(
    batch_id: int,
    aggregation_frequency: int,
    multiplier: float,
    average_aggregated_gradient: bool,
    tf2: bool,
) -> float:
    """
    Compute the expected value based on how we are aggregating gradients.
    """
    gradients_aggregated = (batch_id + 1) % aggregation_frequency == 0
    if gradients_aggregated:
        all_reduced_grads = 0.0
        for _ in range(aggregation_frequency):
            grads_for_batch = 0.0
            for rank in range(hvd.size()):
                grads_for_batch += rank * multiplier
            if average_aggregated_gradient:
                grads_for_batch /= float(aggregation_frequency)
            all_reduced_grads += grads_for_batch / float(hvd.size())
        return all_reduced_grads
    else:
        non_aggregated_grads = hvd.rank() * multiplier
        if tf2:
            # In Tf2 we return the sum of the locally aggregated gradients.
            non_aggregated_grads *= (batch_id + 1) % aggregation_frequency
        return non_aggregated_grads
コード例 #3
0
 def on_train_end(self, logs=None):
     img_sec_mean = np.mean(self.img_secs)
     img_sec_conf = 1.96 * np.std(self.img_secs)
     print('Img/sec per %s: %.1f +-%.1f' %
           (device, img_sec_mean, img_sec_conf))
     print('Total img/sec on %d %s(s): %.1f +-%.1f' %
           (hvd.size(), device, hvd.size() * img_sec_mean,
            hvd.size() * img_sec_conf))
コード例 #4
0
    def validate(self,
                 data_creator,
                 verbose=1,
                 sample_weight=None,
                 steps=None,
                 callbacks=None,
                 data_config=None):
        """Evaluates the model on the validation data set."""
        config = self.config.copy()
        if data_config is not None:
            config.update(data_config)
        if self.backend == "horovod":
            import horovod.tensorflow.keras as hvd

            assert "batch_size" in config, "batch_size must be set in config"
            config["batch_size"] = config["batch_size"] // hvd.size()
            dataset = data_creator(config)
            from tensorflow.python.distribute.input_ops import auto_shard_dataset
            dataset = auto_shard_dataset(dataset, hvd.size(), hvd.rank())

        elif self.backend == "tf-distributed":
            with self.strategy.scope():
                dataset = data_creator(config)
        else:
            dataset = data_creator(config)

        if self.backend == "horovod":
            import horovod.tensorflow.keras as hvd
            if hvd.rank() != 0:
                verbose = 0
        elif self.backend == "tf-distributed":
            if self.strategy.cluster_resolver.task_id != 0:
                verbose = 0

        params = dict(
            verbose=verbose,
            sample_weight=sample_weight,
            steps=steps,
            callbacks=callbacks,
        )
        results = self.model.evaluate(dataset, **params)
        if results is None:
            # Using local Model since model.evaluate() returns None
            # for MultiWorkerMirroredStrategy
            logger.warning("Running a local model to get validation score.")
            self.local_model = self.model_creator(self.config)
            self.local_model.set_weights(self.model.get_weights())
            results = self.local_model.evaluate(dataset, **params)

        if isinstance(results, list):
            stats = {
                "validation_" + k: v
                for k, v in zip(self.model.metrics_names, results)
            }
        else:
            stats = {"results": results}

        return stats
コード例 #5
0
def input_fn(is_training,
             data_dir,
             batch_size,
             dtype,
             num_epochs=1,
             datasets_num_private_threads=None,
             num_parallel_batches=5,
             ):
    """Input function which provides batches for train or eval.
   
    Args:
      is_training: A boolean denoting whether the input is for training.
      data_dir: The directory containing the input data.
      batch_size: The number of samples per batch.
      num_epochs: The number of epochs to repeat the dataset.
      dtype: Data type to use for images/features
      datasets_num_private_threads: Number of private threads for tf.data.
      num_parallel_batches: Number of parallel batches for tf.data.
      parse_record_fn: Function to use for parsing the records.
      
    Returns:
      A dataset that can be used for iteration.
    """
    filenames = get_filenames(is_training, data_dir)
    labels = get_labels(is_training)
    dataset = tf.data.Dataset.from_tensor_slices((filenames,labels))
    
    # shard the dataset if it makes sense
    if hvd.size()>1:
        print('Sharding the dataset: input_pipeline_id=%d num_input_pipelines=%d' % (
                    hvd.rank(), hvd.size())) 
   
        dataset = dataset.shard(hvd.size(), hvd.rank()) 
      
    if is_training:
        # Shuffle the input files
        dataset = dataset.shuffle(buffer_size=_SHUFFLE_BUFFER)

    # Convert to individual records.
    # cycle_length = 10 means 10 files will be read and deserialized in parallel.
    # This number is low enough to not cause too much contention on small systems
    # but high enough to provide the benefits of parallelization. You may want
    # to increase this number if you have a large number of CPU cores.
    #dataset = dataset.apply(tf.data.experimental.parallel_interleave(
    #   tf.data.TFRecordDataset, cycle_length=10))
    
 
    return process_record_dataset(
        dataset=dataset,
        is_training=is_training,
        batch_size=batch_size,
        shuffle_buffer=_SHUFFLE_BUFFER,
        parse_record_fn=record_parser,
        num_epochs=num_epochs,
        dtype=dtype,
        datasets_num_private_threads=datasets_num_private_threads,
        num_parallel_batches=num_parallel_batches
    )
コード例 #6
0
ファイル: tf_runner.py プロジェクト: swarnend/analytics-zoo
    def step(self,
             data_creator,
             epochs=1,
             verbose=1,
             callbacks=None,
             validation_data_creator=None,
             class_weight=None,
             steps_per_epoch=None,
             validation_steps=None,
             validation_freq=1):
        """Runs a training epoch and updates the model parameters."""

        train_dataset = data_creator(self.config)
        if validation_data_creator is not None:
            test_dataset = validation_data_creator(self.config)
        else:
            test_dataset = None

        if self.backend == "horovod":
            import horovod.tensorflow.keras as hvd
            from tensorflow.python.distribute.input_ops import auto_shard_dataset
            train_dataset = auto_shard_dataset(train_dataset, hvd.size(),
                                               hvd.rank())
            if test_dataset is not None:
                test_dataset = auto_shard_dataset(test_dataset, hvd.size(),
                                                  hvd.rank())

        if self.backend == "horovod":
            import horovod.tensorflow.keras as hvd
            hvd_callbacks = [
                hvd.callbacks.BroadcastGlobalVariablesCallback(0),
                hvd.callbacks.MetricAverageCallback()
            ]
            if hvd.rank() != 0:
                verbose = 0

            if callbacks is not None:
                callbacks = hvd_callbacks + callbacks
            else:
                callbacks = hvd_callbacks

        history = self.model.fit(train_dataset,
                                 epochs=self.epoch + epochs,
                                 verbose=verbose,
                                 callbacks=callbacks,
                                 validation_data=test_dataset,
                                 class_weight=class_weight,
                                 initial_epoch=self.epoch,
                                 steps_per_epoch=steps_per_epoch,
                                 validation_steps=validation_steps,
                                 validation_freq=validation_freq)
        if history is None:
            stats = {}
        else:
            stats = {"train_" + k: v[-1] for k, v in history.history.items()}

        self.epoch += epochs
        return stats
コード例 #7
0
ファイル: hvd_wrapper.py プロジェクト: johSchm/magpie
def is_initialized():
    """ Checks if horovod is initialized.
    :return: bool
    """
    try:
        hvd.size()
    except ValueError:
        return False
    return True
コード例 #8
0
def adapt_optimizer(opt):
    if ('str' == opt.__class__.__name__):
        opt = get_optimizer_by_name(opt)
    opt_config = opt.get_config()
    try:
        opt_config['learning_rate'] *= hvd.size()
    except KeyError:
        opt_config['lr'] *= hvd.size()
    return hvd.DistributedOptimizer(opt.from_config(opt_config))
def train_hvd(learning_rate=1.0):
    # Tensorflow has given up on pickling. We need to explicitly import its modules inside workers
    from tensorflow.keras import backend as K
    from tensorflow.keras.models import Sequential
    import tensorflow as tf
    from tensorflow import keras
    import horovod.tensorflow.keras as hvd

    # Horovod: initialize Horovod.
    hvd.init()

    # Horovod: pin GPU to be used to process local rank (one GPU per process)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.visible_device_list = str(hvd.local_rank())
    K.set_session(tf.Session(config=config))

    (x_train, y_train), (x_test, y_test) = get_dataset(num_classes, hvd.rank(),
                                                       hvd.size())
    model = get_model(num_classes)

    # Horovod: adjust learning rate based on number of GPUs.
    optimizer = keras.optimizers.Adadelta(lr=learning_rate * hvd.size())

    # Horovod: add Horovod Distributed Optimizer.
    optimizer = hvd.DistributedOptimizer(optimizer)

    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    callbacks = [
        # Horovod: broadcast initial variable states from rank 0 to all other processes.
        # This is necessary to ensure consistent initialization of all workers when
        # training is started with random weights or restored from a checkpoint.
        hvd.callbacks.BroadcastGlobalVariablesCallback(0),
    ]

    # Horovod: save checkpoints only on worker 0 to prevent other workers from corrupting them.
    if hvd.rank() == 0:
        callbacks.append(
            keras.callbacks.ModelCheckpoint(checkpoint_dir +
                                            '/checkpoint-{epoch}.ckpt',
                                            save_weights_only=True))

    model.fit(x_train,
              y_train,
              batch_size=batch_size,
              callbacks=callbacks,
              epochs=epochs,
              verbose=2,
              validation_data=(x_test, y_test))
コード例 #10
0
        def compute_expected_value(batch_id):
            sum_per_aggregation = 0.0
            for _ in range(backward_passes_per_step):
                grads_for_batch = 0.0
                for rank in range(hvd.size()):
                    grads_for_batch += rank

                # Apply `average_aggregated_gradients`.
                grads_for_batch /= float(backward_passes_per_step)

                # Averages across workers.
                sum_per_aggregation += grads_for_batch / float(hvd.size())

            aggregations_completed = math.floor((batch_id + 1) / backward_passes_per_step)
            return aggregations_completed * sum_per_aggregation
コード例 #11
0
 def _compile_graph(self, model, loss_func='mse', opt_func='adam'):
     loss_functions = {'mse':'mean_squared_error', \
       'msle':'mean_squared_logarithmic_error', \
       'cc':'categorical_crossentropy', \
       'bce':'binary_crossentropy'}
     #'bce':BinaryCrossentropy()}
     #'scc':'sparse_categorical_crossentropy'} - wants a single output
     opt_functions = {'adam': Adam, 'sgd': SGD, 'rms': RMSprop}
     logger.debug(
         "Using the %s optimizer with a learning rate of %s and the %s loss function"
         % (opt_func, str(self.learning_rate), loss_func))
     if hvd:
         opt = opt_functions[opt_func](lr=self.learning_rate * hvd.size())
         if hvd.rank() == 0: logger.debug("Compiling distributed optimizer")
         opt = hvd.DistributedOptimizer(opt)
         self.callbacks = [
             hvd.callbacks.BroadcastGlobalVariablesCallback(0)
         ]
     else:
         opt = opt_functions[opt_func](lr=self.learning_rate)
     # compile
     model.compile(loss=loss_functions[loss_func],
                   optimizer=opt,
                   metrics=['accuracy'])
     #model.summary()
     plot_model(model,
                to_file=os.path.join(self.save_dir,
                                     '%s.png' % (self.param_name)))
コード例 #12
0
def create_config(args):
    assert not (args.cpu and args.amp
                ), "Automatic mixed precision conversion works only with GPU"
    assert (not args.benchmark
            or args.benchmark_warmup_steps < args.benchmark_steps
            ), "Number of benchmark steps must be higher than warmup steps"

    logger = logging.getLogger("tensorflow")

    if args.cpu:
        init_cpu(args, logger)
    else:
        init_gpu(args, logger)

    num_gpus = 1 if args.cpu else hvd.size()
    train_batch_size = args.global_batch_size // num_gpus
    eval_batch_size = args.eval_batch_size // num_gpus

    train_paths = sorted(glob.glob(args.train_data_pattern))
    valid_paths = sorted(glob.glob(args.eval_data_pattern))

    train_spec_input_fn = train_input_fn(
        train_paths=train_paths,
        records_batch_size=train_batch_size,
    )

    eval_spec_input_fn = eval_input_fn(valid_paths=valid_paths,
                                       records_batch_size=eval_batch_size)

    config = {
        "train_dataset": train_spec_input_fn,
        "eval_dataset": eval_spec_input_fn,
    }

    return config
コード例 #13
0
def define_model():
    model = models.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', padding='same'))
    model.add(layers.Conv2D(32, (3, 3), activation='relu', padding='same'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Dropout(0.2))
    model.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
    model.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Dropout(0.2))
    model.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
    model.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Dropout(0.2))
    model.add(layers.Flatten())
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(10, activation='softmax'))

    scaled_lr = 0.001 * hvd.size()
    opt = tf.optimizers.Adam(scaled_lr)

    opt = hvd.DistributedOptimizer(opt,
                                   backward_passes_per_step=1,
                                   average_aggregated_gradients=True)

    model.compile(
        optimizer=opt,
        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=['accuracy'],
        experimental_run_tf_function=False)
    return model
def get_model(input_shape, learning_rate, weight_decay, optimizer, momentum,
              hvd):
    input_tensor = Input(shape=input_shape)
    base_model = keras.applications.resnet50.ResNet50(
        include_top=False,
        weights=None,
        input_tensor=input_tensor,
        input_shape=input_shape,
        classes=None)
    x = Flatten()(base_model.output)
    predictions = Dense(NUM_CLASSES, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=predictions)

    size = hvd.size()
    if optimizer.lower() == 'sgd':
        opt = SGD(lr=learning_rate * size,
                  decay=weight_decay,
                  momentum=momentum)
    elif optimizer.lower() == 'rmsprop':
        opt = RMSprop(lr=learning_rate * size, decay=weight_decay)
    else:
        opt = Adam(lr=learning_rate * size, decay=weight_decay)

    opt = hvd.DistributedOptimizer(opt)

    model.compile(loss='categorical_crossentropy',
                  optimizer=opt,
                  metrics=['accuracy'])
    return model
コード例 #15
0
ファイル: hvd_wrapper.py プロジェクト: johSchm/magpie
def get_gpu_num():
    """ Returns the number of supported GPUs.
    :return: num of gpus
    """
    if hvd is None or not is_initialized():
        return 1
    return hvd.size()
コード例 #16
0
def init_gpu(args, logger):
    hvd.init()

    init_logger(
        full=hvd.rank() == 0,
        args=args,
        logger=logger
    )
    if args.affinity != 'disabled':
        gpu_id = hvd.local_rank()
        affinity = set_affinity(
            gpu_id=gpu_id,
            nproc_per_node=hvd.size(),
            mode=args.affinity
        )
        logger.warning(f'{gpu_id}: thread affinity: {affinity}')
    gpus = tf.config.experimental.list_physical_devices('GPU')
    tf.config.experimental.set_visible_devices(gpus[hvd.local_rank()], 'GPU')

    if args.amp:
        policy = tf.keras.mixed_precision.experimental.Policy("mixed_float16")
        tf.keras.mixed_precision.experimental.set_policy(policy)

    if args.xla:
        tf.config.optimizer.set_jit(True)
コード例 #17
0
def init_hvd(args):
    if hvd:
        hvd.init()
        FORMAT = "[%%(levelname)s - P%i/%i - %%(filename)s:%%(lineno)s - %%(funcName)s] %%(message)s" % (
            hvd.rank(), hvd.size())
        # Remove all handlers associated with the root logger object.
        for handler in logging.root.handlers[:]:
            logging.root.removeHandler(handler)
        logging.basicConfig(level=logging.INFO, format=FORMAT)
        if args.verbose:
            logger.setLevel(logging.DEBUG)
        else:
            logger.setLevel(logging.INFO)
        logger.debug("Updated logger to print process")
    args.hvd_rank = hvd.rank() if hvd else 0
    args.hvd_size = hvd.size() if hvd else 1
コード例 #18
0
def train(model, dataset, epoch, initial_lr):
    # Horovod: Specify `experimental_run_tf_function=False` to ensure TensorFlow
    # uses hvd.DistributedOptimizer() to compute gradients.
    model.compile(
        loss=tf.losses.SparseCategoricalCrossentropy(),
        optimizer=opt,
        metrics=["accuracy"],
        experimental_run_tf_function=False,
    )

    callbacks = [
        hvd.callbacks.BroadcastGlobalVariablesCallback(0),
        hvd.callbacks.MetricAverageCallback(),
        hvd.callbacks.LearningRateWarmupCallback(initial_lr,
                                                 warmup_epochs=3,
                                                 verbose=1),
    ]

    if hvd.rank() == 0:
        callbacks.append(
            tf.keras.callbacks.ModelCheckpoint("checkpoint-{epoch}.h5"))

    verbose = 1 if hvd.rank() == 0 else 0

    model.fit(
        dataset,
        steps_per_epoch=500 // hvd.size(),
        callbacks=callbacks,
        epochs=epoch,
        verbose=verbose,
    )
コード例 #19
0
def train(state):
    # Horovod: adjust number of steps based on number of GPUs.
    state.model.fit(dataset,
                    steps_per_epoch=500 // hvd.size(),
                    callbacks=callbacks,
                    epochs=epochs - state.epoch,
                    verbose=1 if hvd.rank() == 0 else 0)
コード例 #20
0
def create_resnet():
    # Build network
    import keras_resnet_single as networks
    resnet = networks.ResNet.build(
        len(channels), resblocks, [16, 32],
        (125 * granularity, 125 * granularity, len(channels)), granularity)
    # Load saved weights, if indicated
    if args.load_epoch != 0:
        directory = args.save_dir
        if args.save_dir == '':
            directory = expt_name
        model_name = glob.glob('../MODELS/%s/epoch%02d-*.hdf5' %
                               (directory, args.load_epoch))[0]
        #assert len(model_name) == 2
        #model_name = model_name[0].split('.hdf5')[0]+'.hdf5'
        print('Loading weights from file:', model_name)
        resnet.load_weights(model_name)
    #opt = keras.optimizers.Adam(lr=lr_init, epsilon=1.e-5) # changed eps to match pytorch value
    #opt = keras.optimizers.SGD(lr=lr_init * hvd.size())
    opt = NovoGrad(learning_rate=lr_init * hvd.size())
    #Wrap the optimizer in a Horovod distributed optimizer -> uses hvd.DistributedOptimizer() to compute gradients.
    opt = hvd.DistributedOptimizer(opt)

    #For Horovod: We specify `experimental_run_tf_function=False` to ensure TensorFlow
    #resnet.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'], experimental_run_tf_function = False)
    #resnet.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
    resnet.summary()
    return resnet
コード例 #21
0
    def test_train_model_lr_schedule(self):
        lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
            0.001 * hvd.size(),
            decay_steps=100000,
            decay_rate=0.96,
            staircase=True)
        opt = tf.keras.optimizers.Adam(lr_schedule)
        opt = hvd.DistributedOptimizer(opt)

        model = keras.models.Sequential()
        model.add(keras.layers.Dense(2, input_shape=(3,)))
        model.add(keras.layers.RepeatVector(3))
        model.add(keras.layers.ThresholdedReLU(0.5))
        model.compile(loss=keras.losses.mean_squared_error,
                      optimizer=opt,
                      metrics=[keras.metrics.categorical_accuracy],
                      experimental_run_tf_function=False)

        x = np.random.random((1, 3))
        y = np.random.random((1, 3, 2))

        # No assertions, we just need to verify that it doesn't hang or error
        callbacks = [hvd.callbacks.BroadcastGlobalVariablesCallback(0)]
        model.fit(x,
                  y,
                  steps_per_epoch=10,
                  callbacks=callbacks,
                  epochs=1)
コード例 #22
0
def init_workers(distributed=False):
    """Initialize distributed worker"""
    rank, local_rank, n_ranks = 0, 0, 1
    if distributed:
        hvd.init()
        rank, local_rank, n_ranks = hvd.rank(), hvd.local_rank(), hvd.size()
    return rank, local_rank, n_ranks
コード例 #23
0
def train(model, dataset, epochs, steps_per_epoch, hvd_rank=0, hvd_size=1):
    scaled_lr = 0.001 * hvd.size()
    callbacks = [
        # Horovod: broadcast initial variable states from rank 0 to all other processes.
        # This is necessary to ensure consistent initialization of all workers when
        # training is started with random weights or restore dfrom a checkpoint.
        hvd.callbacks.BroadcastGlobalVariablesCallback(0),

        # Horovod: average metrics among workers at the end of every epoch.
        #
        # Note: This callback must be in the list before the ReduceLROnPlateau,
        # TensorBoard or other metrics-based callbacks.
        hvd.callbacks.MetricAverageCallback(),

        # Horovod: using `lr = 1.0 * hvd.size()` from the very beginning leads to worse final
        # accuracy. Scale the learning rate `lr = 1.0` ---> `lr = 1.0 * hvd.size()` during
        # the first three epochs. See https://arxiv.org/abs/1706.02677 for details.
        hvd.callbacks.LearningRateWarmupCallback(initial_lr=scaled_lr,
                                                 warmup_epochs=1,
                                                 verbose=1),
    ]

    if hvd.rank() == 0:
        callbacks.append(
            tf.keras.callbacks.ModelCheckpoint('./checkpoint-{epoch}.h5'))

    # Horovod: write logs on worker 0.
    verbose = 1 if hvd_rank == 0 else 0
    model.fit(dataset,
              epochs=epochs,
              steps_per_epoch=steps_per_epoch // hvd_size,
              callbacks=callbacks,
              verbose=verbose,
              validation_data=get_test_dataset())
    return model
コード例 #24
0
def create_model():
    model = models.Sequential()
    model.add(
        layers.Conv2D(32, (3, 3), activation='relu',
                      input_shape=(150, 150, 3)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(128, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Flatten())
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(1, activation='sigmoid'))

    # Horovod: adjust learning rate based on number of GPUs.
    opt = optimizers.SGD(0.01 * hvd.size())

    # Horovod: add Horovod DistributedOptimizer.
    opt = hvd.DistributedOptimizer(opt)

    # Horovod: Specify `experimental_run_tf_function=False` to ensure TensorFlow
    # uses hvd.DistributedOptimizer() to compute gradients.
    model.compile(loss='binary_crossentropy',
                  optimizer=opt,
                  metrics=['accuracy'],
                  experimental_run_tf_function=False)
    return model
コード例 #25
0
    def __init__(self,
                 data_dir,
                 index_file_dir,
                 split='train',
                 num_classes=None,
                 image_size=224,
                 num_channels=3,
                 batch_size=128,
                 dtype='float32',
                 one_hot=False,
                 use_dali=False,
                 augmenter=None,
                 shuffle_buffer_size=10000,
                 file_shuffle_buffer_size=1024,
                 cache=False,
                 mean_subtract=False,
                 standardize=False,
                 augmenter_params=None,
                 cutmix_alpha=0.0,
                 mixup_alpha=0.0,
                 defer_img_mixing=True,
                 hvd_size=None,
                 disable_map_parallelization=False):
        """Initialize the builder from the config."""
        if not os.path.exists(data_dir):
            raise FileNotFoundError(
                'Cannot find data dir: {}'.format(data_dir))
        if one_hot and num_classes is None:
            raise FileNotFoundError(
                'Number of classes is required for one_hot')
        self._data_dir = data_dir
        self._split = split
        self._image_size = image_size
        self._num_classes = num_classes
        self._num_channels = num_channels
        self._batch_size = batch_size
        self._dtype = dtype
        self._one_hot = one_hot
        self._augmenter_name = augmenter
        self._shuffle_buffer_size = shuffle_buffer_size
        self._file_shuffle_buffer_size = file_shuffle_buffer_size
        self._cache = cache
        self._mean_subtract = mean_subtract
        self._standardize = standardize
        self._index_file = index_file_dir
        self._use_dali = use_dali
        self.mixup_alpha = mixup_alpha
        self.cutmix_alpha = cutmix_alpha
        self.defer_img_mixing = defer_img_mixing
        self.disable_map_parallelization = disable_map_parallelization
        self._num_gpus = hvd.size() if not hvd_size else hvd_size

        if self._augmenter_name is not None:
            augmenter = AUGMENTERS.get(self._augmenter_name, None)
            params = augmenter_params or {}
            self._augmenter = augmenter(
                **params) if augmenter is not None else None
        else:
            self._augmenter = None
コード例 #26
0
    def test_elastic_state(self):
        v = 1.0 if hvd.rank() == 0 else 2.0
        model1 = tf.keras.Sequential(
            [tf.keras.layers.Dense(2, activation='softmax')])
        model1.build((2, 2))
        model1.set_weights([
            np.array([[v, v], [v, v]], dtype=np.float32),
            np.array([v, v], dtype=np.float32)
        ])

        model2 = tf.keras.Sequential(
            [tf.keras.layers.Dense(2, activation='softmax')])
        model2.build((2, 2))
        model2.set_weights([
            np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32),
            np.array([0.0, 0.0], dtype=np.float32)
        ])

        optimizer = tf.optimizers.Adam(0.001 * hvd.size())

        state = hvd.elastic.KerasState(model1,
                                       optimizer,
                                       batch=20 + hvd.rank(),
                                       epoch=10 + hvd.rank())
        state.sync()

        model1_weights = model1.get_weights()
        model2_weights = model2.get_weights()

        # After sync, all values should match the root rank
        for w in state.model.get_weights():
            self.assertAllClose(w, np.ones_like(w))
        assert state.batch == 20
        assert state.epoch == 10

        # Partially modify then restore
        model1.set_weights(model2_weights)
        state.batch = 21
        state.epoch = 11

        state.restore()

        for w1, w2 in zip(model1.get_weights(), model1_weights):
            self.assertAllClose(w1, w2)
        assert state.batch == 20
        assert state.epoch == 10

        # Partially modify then commit
        model1.set_weights(model2_weights)
        state.batch = 21
        state.epoch = 11

        state.commit()
        state.restore()

        for w1, w2 in zip(model1.get_weights(), model2_weights):
            self.assertAllClose(w1, w2)
        assert state.batch == 21
        assert state.epoch == 11
コード例 #27
0
    def build(self) -> tf.data.Dataset:
        """Construct a dataset end-to-end and return it.

    Args:
      input_context: An optional context provided by `tf.distribute` for
        cross-replica training.

    Returns:
      A TensorFlow dataset outputting batched images and labels.
    """
        if self._use_dali:
            print("Using dali for {train} dataloading".format(
                train="training" if self.is_training else "validation"))
            tfrec_filenames = sorted(
                tf.io.gfile.glob(
                    os.path.join(self._data_dir, '%s-*' % self._split)))
            tfrec_idx_filenames = sorted(
                tf.io.gfile.glob(
                    os.path.join(self._index_file, '%s-*' % self._split)))

            # # Create pipeline
            dali_pipeline = Dali.DaliPipeline(
                tfrec_filenames=tfrec_filenames,
                tfrec_idx_filenames=tfrec_idx_filenames,
                height=self._image_size,
                width=self._image_size,
                batch_size=self.local_batch_size,
                num_threads=1,
                device_id=hvd.local_rank(),
                shard_id=hvd.rank(),
                num_gpus=hvd.size(),
                num_classes=self.num_classes,
                deterministic=False,
                dali_cpu=False,
                training=self.is_training)

            # Define shapes and types of the outputs
            shapes = ((self.local_batch_size, self._image_size,
                       self._image_size, 3), (self.local_batch_size,
                                              self._num_classes))
            dtypes = (tf.float32, tf.float32)

            # Create dataset
            dataset = dali_tf.DALIDataset(pipeline=dali_pipeline,
                                          batch_size=self.local_batch_size,
                                          output_shapes=shapes,
                                          output_dtypes=dtypes,
                                          device_id=hvd.local_rank())
            # if self.is_training and self._augmenter:
            #     print('Augmenting with {}'.format(self._augmenter))
            #     dataset.unbatch().map(self.augment_pipeline, num_parallel_calls=tf.data.experimental.AUTOTUNE).batch(self.local_batch_size)
            return dataset
        else:
            print("Using tf native pipeline for {train} dataloading".format(
                train="training" if self.is_training else "validation"))
            dataset = self.load_records()
            dataset = self.pipeline(dataset)

            return dataset
コード例 #28
0
    def __init__(self, config: Seq2SeqConfig):
        """
        Initialize model for training.

        :param config: seq2seq config from input data
        """
        self.body_count = config.body_count
        self.max_body_length = config.max_body_length
        self.subject_count = config.subject_count
        self.max_subject_length = config.max_subject_length
        self.body_word_to_index = config.body_word_to_index
        self.body_index_to_word = config.body_index_to_word
        self.subject_word_to_index = config.subject_word_to_index
        self.subject_index_to_word = config.subject_index_to_word
        self.config = config.__dict__

        encoder_inputs: Input = Input(shape=(None,), name="encoder_inputs")
        encoder_embedding: Embedding = Embedding(
            input_dim=self.body_count,
            output_dim=self.hidden_units,
            input_length=self.max_body_length,
            name="encoder_embedding",
        )
        encoder_lstm: LSTM = LSTM(units=self.hidden_units, return_state=True, name="encoder_lstm")
        _, encoder_hidden_state, encoder_cell_state = encoder_lstm(encoder_embedding(encoder_inputs))
        encoder_states: List[np.ndarray] = [encoder_hidden_state, encoder_cell_state]

        decoder_inputs: Input = Input(shape=(None, self.subject_count), name="decoder_inputs")
        decoder_lstm: LSTM = LSTM(
            units=self.hidden_units, return_state=True, return_sequences=True, name="decoder_lstm"
        )
        decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
        decoder_dense = Dense(units=self.subject_count, activation="softmax", name="decoder_dense")
        decoder_outputs = decoder_dense(decoder_outputs)

        # Horovod: add Horovod Distributed Optimizer.
        try:
            optimizer = RMSprop(1.0 * hvd.size())
            optimizer = hvd.DistributedOptimizer(optimizer)
        except ValueError:
            print("Running outside Horovod.")
            optimizer = RMSprop(1.0)

        model: Model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
        model.compile(
            loss="categorical_crossentropy",
            optimizer=optimizer,
            metrics=["accuracy"],
            experimental_run_tf_function=False,
        )

        self.model = model
        self.encoder_model = Model(encoder_inputs, encoder_states)

        decoder_state_inputs: List[Input] = [Input(shape=(self.hidden_units,)), Input(shape=(self.hidden_units,))]
        decoder_outputs, hidden_state, cell_state = decoder_lstm(decoder_inputs, initial_state=decoder_state_inputs)
        decoder_states: List[Dense] = [hidden_state, cell_state]
        decoder_outputs = decoder_dense(decoder_outputs)
        self.decoder_model = Model([decoder_inputs] + decoder_state_inputs, [decoder_outputs] + decoder_states)
コード例 #29
0
def train(state):
    # Horovod: adjust number of steps based on number of GPUs and number of epochs
    # based on the number of previously completed epochs.
    state.model.fit(dataset,
                    steps_per_epoch=args.batches_per_epoch // hvd.size(),
                    callbacks=callbacks,
                    epochs=epochs - state.epoch,
                    verbose=1 if hvd.rank() == 0 else 0)
コード例 #30
0
def init_workers(distributed=False):
    if distributed:
        hvd.init()
        return SimpleNamespace(rank=hvd.rank(), size=hvd.size(),
                               local_rank=hvd.local_rank(),
                               local_size=hvd.local_size())
    else:
        return SimpleNamespace(rank=0, size=1, local_rank=0, local_size=1)
コード例 #31
0
    args, _ = parser.parse_known_args()

    # Horovod: initialize Horovod.
    hvd.init()

    # Horovod: pin GPU to be used to process local rank (one GPU per process)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.visible_device_list = str(hvd.local_rank())
    K.set_session(tf.Session(config=config))

    batch_size = 128
    num_classes = 10

    # Horovod: adjust number of epochs based on number of GPUs.
    epochs = int(math.ceil(12.0 / hvd.size()))

    # Input image dimensions
    img_rows, img_cols = 28, 28

    # The data, shuffled and split between train and test sets

    x_train = np.load(os.path.join(args.train, 'train.npz'))['data']
    y_train = np.load(os.path.join(args.train, 'train.npz'))['labels']
    print("Train dataset loaded from: {}".format(os.path.join(args.train, 'train.npz')))

    x_test = np.load(os.path.join(args.test, 'test.npz'))['data']
    y_test = np.load(os.path.join(args.test, 'test.npz'))['labels']
    print("Test dataset loaded from: {}".format(os.path.join(args.test, 'test.npz')))