def getModel(net_settings, num_classes=1): ''' Should be modified with model type as input and returns the desired model ''' if net_settings['model_type'] == 'resnet': base_model = resnet50.ResNet50(include_top=True, weights='imagenet') finetuning = Dense(1, activation='sigmoid', name='predictions')(base_model.layers[-2].output) model = Model(input=base_model.input, output=finetuning) ## Adjust learning rate based on number of GPUs hv_lr = net_settings['lr'] * hvd.size() opt = optimizers.SGD(lr=hv_lr, momentum=0.9, decay=1e-6, nesterov=True) ## Adding Horovod DistributedOptimizer opt = hvd.DistributedOptimizer(opt) model.compile(loss=net_settings['loss'], optimizer=opt, metrics=['accuracy']) callbacks = [ hvd.callbacks.BroadcastGlobalVariablesCallback(0), ] if hvd.rank() == 0: callbacks.append( keras.callbacks.ModelCheckpoint('./checkpoint-{epoch}.h5')) return model elif net_settings['model_type'] == 'resnet101': model = resnet101_model(224, 224, 3, 1) ## Adjust learning rate based on number of GPUs hv_lr = net_settings['lr'] * hvd.size() opt = optimizers.SGD(lr=hv_lr, momentum=0.9, decay=1e-6, nesterov=True) ## Adding Horovod DistributedOptimizer opt = hvd.DistributedOptimizer(opt) model.compile(loss=net_settings['loss'], optimizer=opt, metrics=['accuracy']) callbacks = [ hvd.callbacks.BroadcastGlobalVariablesCallback(0), ] if hvd.rank() == 0: callbacks.append( keras.callbacks.ModelCheckpoint('./checkpoint-{epoch}.h5')) return model else: print '[models] Ugggh. Not ready for this yet.' exit(0) return None
def GetOptimizer(): if options.with_hvd: if options.trainingsolver == "adam": opt = keras.optimizers.Adam(lr=0.001 * hvd.size()) elif options.trainingsolver == "adadelta": opt = keras.optimizers.Adadelta(1.0 * hvd.size()) elif options.trainingsolver == "nadam": opt = keras.optimizers.Nadam(0.002 * hvd.size()) elif options.trainingsolver == "sgd": opt = keras.optimizers.SGD(0.01 * hvd.size()) else: raise Exception("horovod-enabled optimizer not selected") opt = hvd.DistributedOptimizer(opt) else: if options.trainingsolver == "adam": opt = keras.optimizers.Adam(lr=0.01) elif options.trainingsolver == "adadelta": opt = keras.optimizers.Adadelta(1.0) elif options.trainingsolver == "nadam": opt = keras.optimizers.Nadam(0.2) elif options.trainingsolver == "sgd": opt = keras.optimizers.SGD(0.01) else: opt = options.trainingsolver return opt
def get_optimizer(name, lr, lr_scaling='linear', n_ranks=1, distributed=False, **opt_args): """ Configure the optimizer and scale the learning rate by n_ranks. TODO: add support for wrapping TF optimizers like LARS. """ # Scale the learning rate if lr_scaling == 'linear': lr = lr * n_ranks elif lr_scaling == 'sqrt': lr = lr * math.sqrt(n_ranks) # Construct the optimizer OptType = getattr(keras.optimizers, name) opt = OptType(lr=lr, **opt_args) # Distributed optimizer wrapper if distributed: import horovod.keras as hvd opt = hvd.DistributedOptimizer(opt) return opt
def test_load_model(self): with self.test_session(config=self.config) as sess: K.set_session(sess) opt = keras.optimizers.RMSprop(lr=0.0001) opt = hvd.DistributedOptimizer(opt) model = keras.models.Sequential() model.add(keras.layers.Dense(2, input_shape=(3,))) model.add(keras.layers.RepeatVector(3)) model.add(keras.layers.TimeDistributed(keras.layers.Dense(3))) model.compile(loss=keras.losses.MSE, optimizer=opt, metrics=[keras.metrics.categorical_accuracy], sample_weight_mode='temporal') x = np.random.random((1, 3)) y = np.random.random((1, 3, 3)) model.train_on_batch(x, y) with temppath() as fname: model.save(fname) new_model = hvd.load_model(fname) new_opt = new_model.optimizer self.assertEqual(type(new_opt).__module__, 'horovod._keras') self.assertEqual(type(new_opt).__name__, 'RMSprop') self.assertEqual(K.get_value(opt.lr), K.get_value(new_opt.lr)) self._check_optimizer_weights(opt, new_opt)
def test_load_model_custom_optimizers(self): class TestOptimizer(keras.optimizers.RMSprop): def __init__(self, **kwargs): super(TestOptimizer, self).__init__(**kwargs) with self.test_session(config=self.config) as sess: K.set_session(sess) opt = TestOptimizer(lr=0.0001) opt = hvd.DistributedOptimizer(opt) model = keras.models.Sequential() model.add(keras.layers.Dense(2, input_shape=(3,))) model.add(keras.layers.RepeatVector(3)) model.add(keras.layers.TimeDistributed(keras.layers.Dense(3))) model.compile(loss=keras.losses.MSE, optimizer=opt, metrics=[keras.metrics.categorical_accuracy], sample_weight_mode='temporal') x = np.random.random((1, 3)) y = np.random.random((1, 3, 3)) model.train_on_batch(x, y) with temppath() as fname: model.save(fname) custom_optimizers = [TestOptimizer] new_model = hvd.load_model(fname, custom_optimizers=custom_optimizers) new_opt = new_model.optimizer self.assertEqual(type(new_opt).__module__, 'horovod._keras') self.assertEqual(type(new_opt).__name__, 'TestOptimizer') self._check_optimizer_weights(opt, new_opt)
def create_models(backbone_retinanet, num_classes, weights, multi_gpu=0, freeze_backbone=False): modifier = freeze_model if freeze_backbone else None # Keras recommends initialising a multi-gpu model on the CPU to ease weight sharing, and to prevent OOM errors. # optionally wrap in a parallel model if multi_gpu > 1: with tf.device('/cpu:0'): model = model_with_weights(backbone_retinanet(num_classes, modifier=modifier), weights=weights, skip_mismatch=True) training_model = multi_gpu_model(model, gpus=multi_gpu) else: model = model_with_weights(backbone_retinanet(num_classes, modifier=modifier), weights=weights, skip_mismatch=True) training_model = model # make prediction model prediction_model = retinanet_bbox(model=model) # compile model training_model.compile( loss={ 'regression' : losses.smooth_l1(), 'classification': losses.focal() }, optimizer=hvd.DistributedOptimizer( keras.optimizers.adam(lr=1e-5, clipnorm=0.001)) ) return model, training_model, prediction_model
def compile(self, *args, **kwargs): if 'optimizer' in kwargs: assert len(args) == 0 optimizer = kwargs['optimizer'] else: assert len(args) == 1 optimizer = args[0] # TODO(levosos): support cases when 'optimizer' is not an class instance but is either a string or a dictionary if not isinstance(optimizer, keras.optimizers.Optimizer): raise ValueError("'optimizer' must be a valid keras.optimizers.Optimizer") runai.utils.log.debug('compile() called with optimizer %s', optimizer) if runai.elastic.gpus > 1: runai.utils.log.debug('Wrapping optimizer with Horovod') import horovod.keras as hvd optimizer = hvd.DistributedOptimizer(optimizer) if runai.elastic.steps > 1: optimizer = runai.ga.keras.optimizers.Optimizer(optimizer, runai.elastic.steps) kwargs['optimizer'] = optimizer return self.__runai__['compile'](**kwargs) # ignore 'args' as 'optimizer' is the only possible argument and it is in 'kwargs'
def test_load_model(self): with self.test_session(config=self.config) as sess: K.set_session(sess) opt = keras.optimizers.RMSprop(lr=0.0001) opt = hvd.DistributedOptimizer(opt) model = keras.models.Sequential() model.add(keras.layers.Dense(2, input_shape=(3, ))) model.add(keras.layers.RepeatVector(3)) model.add(keras.layers.TimeDistributed(keras.layers.Dense(3))) model.compile(loss=keras.losses.MSE, optimizer=opt, metrics=[keras.metrics.categorical_accuracy], sample_weight_mode='temporal') x = np.random.random((1, 3)) y = np.random.random((1, 3, 3)) model.train_on_batch(x, y) _, fname = tempfile.mkstemp('.h5') model.save(fname) new_model = hvd.load_model(fname) new_opt = new_model.optimizer os.remove(fname) self.assertEqual(type(new_opt).__module__, 'horovod._keras') self.assertEqual(type(new_opt).__name__, 'RMSprop') self.assertEqual(K.get_value(opt.lr), K.get_value(new_opt.lr)) self.assertEqual(len(opt.get_weights()), len(new_opt.get_weights())) for weights, new_weights in zip(opt.get_weights(), new_opt.get_weights()): self.assertListEqual(weights.tolist(), new_weights.tolist())
def create_cnn_model(num_classes, input_shape, learning_rate): model = Sequential() model.add(Conv2D(32, (3, 3), padding='same', input_shape=input_shape)) model.add(Activation('relu')) model.add(Conv2D(32, (3, 3))) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Conv2D(64, (3, 3), padding='same')) model.add(Activation('relu')) model.add(Conv2D(64, (3, 3))) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(512)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(num_classes)) model.add(Activation('softmax')) # initiate RMSprop optimizer opt = keras.optimizers.rmsprop(lr=learning_rate, decay=1e-6) # Horovod: add Horovod Distributed Optimizer. opt = hvd.DistributedOptimizer(opt) # Let's train the model using RMSprop model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) return model
def get_model(input_shape, lr, lr_decay, num_classes=10): model = Sequential() model.add(Conv2D(64, (3, 3), padding='same', input_shape=input_shape)) model.add(Activation('relu')) model.add(Conv2D(64, (3, 3))) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Conv2D(256, (3, 3), padding='same')) model.add(Activation('relu')) model.add(Conv2D(256, (3, 3))) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(1024)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(num_classes)) model.add(Activation('softmax')) # initiate RMSprop optimizer opt = keras.optimizers.rmsprop(lr=lr, decay=lr_decay) opt = hvd.DistributedOptimizer(opt) # Let's train the model using RMSprop model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) return model
def test_load_model_custom_objects(self): hvd.init() class TestOptimizer(keras.optimizers.RMSprop): def __init__(self, **kwargs): super(TestOptimizer, self).__init__(**kwargs) with self.test_session() as sess: K.set_session(sess) opt = TestOptimizer(lr=0.0001) opt = hvd.DistributedOptimizer(opt) model = keras.models.Sequential() model.add(keras.layers.Dense(2, input_shape=(3, ))) model.add(keras.layers.RepeatVector(3)) model.add(keras.layers.TimeDistributed(keras.layers.Dense(3))) model.compile(loss=keras.losses.MSE, optimizer=opt, metrics=[keras.metrics.categorical_accuracy], sample_weight_mode='temporal') x = np.random.random((1, 3)) y = np.random.random((1, 3, 3)) model.train_on_batch(x, y) _, fname = tempfile.mkstemp('.h5') model.save(fname) custom_objects = { 'TestOptimizer': lambda **kwargs: hvd.DistributedOptimizer( TestOptimizer(**kwargs)) } new_model = hvd.load_model(fname, custom_objects=custom_objects) new_opt = new_model.optimizer os.remove(fname) self.assertEqual(type(new_opt).__module__, 'horovod.keras.impl') self.assertEqual(type(new_opt).__name__, 'TestOptimizer') self.assertEqual(K.get_value(opt.lr), K.get_value(new_opt.lr)) self.assertEqual(len(opt.get_weights()), len(new_opt.get_weights())) for weights, new_weights in zip(opt.get_weights(), new_opt.get_weights()): self.assertListEqual(weights.tolist(), new_weights.tolist())
def build_torch(self, model): import torch opt = torch.optim.SGD(model.parameters(), 1.) if self.horovod_wrapper: import horovod.torch as hvd opt = hvd.DistributedOptimizer( opt, named_parameters=model.named_parameters()) return opt
def _get_optimizer(params, is_distributed=_DISTRIBUTED): if is_distributed: # Horovod: adjust learning rate based on number of GPUs. opt = keras.optimizers.SGD(lr=params["learning_rate"] * hvd.size(), momentum=params["momentum"]) # Horovod: add Horovod Distributed Optimizer. return hvd.DistributedOptimizer(opt) else: return keras.optimizers.SGD(lr=params["learning_rate"], momentum=params["momentum"])
def build(self): from keras.optimizers import deserialize opt_config = {'class_name': self.name, 'config': self.config} opt = deserialize(opt_config) if self.horovod_wrapper: import horovod.keras as hvd if hasattr(opt, 'lr'): opt.lr *= hvd.size() opt = hvd.DistributedOptimizer(opt) return opt
def train_evaluate(): # Generate training and validation data generators def get_image_list(data_dir): dataset = [] for folder in os.listdir(data_dir): for image in os.listdir(os.path.join(data_dir, folder)): dataset.append((os.path.join(data_dir, folder, image), folder)) return dataset training_data = ImageSequence(get_image_list(os.path.join(FLAGS.data_dir, 'train')), FLAGS.batch_size, True) validation_data = ImageSequence(get_image_list(os.path.join(FLAGS.data_dir, 'test')), FLAGS.batch_size, False) # Horovod: Initialize Horovod hvd.init() # Horvod: Pin GPU to be used to process local rank (one GPU per process) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.visible_device_list = str(hvd.local_rank()) tf.keras.backend.set_session(tf.Session(config=config)) # Create a model model = network_model(FLAGS.hidden_units) loss = 'categorical_crossentropy' # Horovod: Adjust learning rate based on number of GPUs optimizer = Adadelta(lr=1.0 * hvd.size()) # Horovod: add Horovod Distributed Optimizer optimizer = hvd.DistributedOptimizer(optimizer) metrics = ['acc'] model.compile(optimizer, loss, metrics) # Set up callbacks callbacks = [ # Broadcast initial variable states from rank 0 to all other processes hvd.callbacks.BroadcastGlobalVariablesCallback(0), ] # Horovod: save logs only on worker 0 if hvd.rank() == 0: callbacks.append(tf.keras.callbacks.TensorBoard(log_dir=FLAGS.log_dir)) # Start training model.fit_generator(generator = training_data, validation_data = validation_data, epochs = FLAGS.epochs, use_multiprocessing = True, workers = 4, callbacks = callbacks, verbose = 1) # Save the model model.save(FLAGS.save_model_path)
def test_from_config(self): with self.test_session(config=self.config) as sess: K.set_session(sess) opt = keras.optimizers.Adam() hopt = hvd.DistributedOptimizer(opt) cfg = hopt.get_config() hopt_copy1 = hopt.from_config(cfg) self.assertEqual(cfg, hopt_copy1.get_config()) hopt_copy2 = hopt.__class__.from_config(cfg) self.assertEqual(cfg, hopt_copy2.get_config())
def create_model(): opt = keras.optimizers.SGD(lr=0.01 * hvd.size(), momentum=0.9) opt = hvd.DistributedOptimizer(opt) model = keras.models.Sequential() model.add(keras.layers.Dense(2, input_shape=(3,))) model.add(keras.layers.RepeatVector(3)) model.add(keras.layers.TimeDistributed(keras.layers.Dense(3))) model.compile(loss=keras.losses.MSE, optimizer=opt, metrics=[keras.metrics.categorical_accuracy], sample_weight_mode='temporal') return model
def test_sparse_as_dense(self): with self.test_session(config=self.config) as sess: K.set_session(sess) opt = keras.optimizers.RMSprop(lr=0.0001) opt = hvd.DistributedOptimizer(opt, sparse_as_dense=True) model = keras.models.Sequential() model.add(keras.layers.Embedding(1000, 64, input_length=10)) model.compile(loss=keras.losses.MSE, optimizer=opt) x = np.random.randint(1000, size=(32, 10)) y = np.random.random((32, 10, 64)) # No assertions, we just need to verify that it doesn't hang model.train_on_batch(x, y)
def build_torch(self, model): import torch lookup = { 'sgd': torch.optim.SGD, 'adadelta': torch.optim.Adadelta, 'rmsprop': torch.optim.RMSprop, 'adam': torch.optim.Adam } if self.name not in lookup: logging.warning("No optimizer '{}' found, using SGD instead".format(self.name)) self.name = 'sgd' opt = lookup[self.name](model.parameters(), **self.config) if self.horovod_wrapper: import horovod.torch as hvd opt = hvd.DistributedOptimizer(opt, named_parameters=model.named_parameters()) return opt
def build_model(input_shape, conv_sizes=[8, 16, 32], fc_sizes=[64], dropout=0.5, optimizer='Adam', learning_rate=0.001, use_horovod=False): """Construct the Keras model""" # Define the inputs inputs = layers.Input(shape=input_shape) h = inputs # Convolutional layers conv_args = dict(kernel_size=(3, 3), activation='relu', padding='same') for conv_size in conv_sizes: h = layers.Conv2D(conv_size, **conv_args)(h) h = layers.MaxPooling2D(pool_size=(2, 2))(h) h = layers.Dropout(dropout)(h) h = layers.Flatten()(h) # Fully connected layers for fc_size in fc_sizes: h = layers.Dense(fc_size, activation='relu')(h) h = layers.Dropout(dropout)(h) # Ouptut layer outputs = layers.Dense(1, activation='sigmoid')(h) # Construct the optimizer opt_dict = dict(Adam=optimizers.Adam, Nadam=optimizers.Nadam, Adadelta=optimizers.Adadelta) opt = opt_dict[optimizer](lr=learning_rate) if use_horovod: import horovod.keras as hvd opt = hvd.DistributedOptimizer(opt) # Compile the model model = models.Model(inputs=inputs, outputs=outputs, name='RPVClassifier') model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy']) return model
def get_optimizer(name, lr, lr_scaling='linear', n_ranks=1, **opt_args): """ Configure the optimizer and scale the learning rate by n_ranks. """ # Scale the learning rate if lr_scaling == 'linear': lr = lr * n_ranks elif lr_scaling == 'sqrt': lr = lr * math.sqrt(n_ranks) # Construct the optimizer OptType = getattr(keras.optimizers, name) opt = OptType(lr=lr, **opt_args) # Distributed optimizer wrapper if n_ranks > 1: opt = hvd.DistributedOptimizer(opt) return opt
def create_inception_model(self, number_categories, dense_layer_sizes, dropout_fraction, unfrozen_layers, focal_loss=False): hvd.init() config = tf.compat.v1.ConfigProto() config.gpu_options.visible_device_list = str(hvd.local_rank()) opt = hvd.DistributedOptimizer(tf.keras.optimizers.Adam(learning_rate=0.001*hvd.size())) model = InceptionV3(include_top=False, pooling='avg') output = model.outputs[0] for layer_size in dense_layer_sizes: dense = Dense(layer_size, activation='relu')(output) dropout = Dropout(dropout_fraction)(dense) output = BatchNormalization()(dropout) if number_categories == 1: output = Dense(1, activation='sigmoid')(output) else: output = Dense(number_categories, activation='softmax')(output) model = Model(inputs=model.inputs, outputs=output) for index in range(len(model.layers) - unfrozen_layers): model.layers[index].trainable = False if number_categories == 1: the_metrics = [metrics.binary_accuracy] if focal_loss: loss = customlosses.focal_binary_crossentropy else: loss = 'binary_crossentropy' else: the_metrics = [metrics.categorical_accuracy] if focal_loss: loss = customlosses.focal_categorical_crossentropy else: loss = 'categorical_crossentropy' model.compile(optimizer=opt, loss=loss, metrics=the_metrics) model.save(self.model_filename) self.model = model
def create_model(): # Set up standard WideResNet-16-10 model. model = WideResidualNetwork(depth=16, width=10, weights=None, input_shape=input_shape, classes=num_classes, dropout_rate=0.01) # WideResNet model that is included with Keras is optimized for inference. # Add L2 weight decay & adjust BN settings. model_config = model.get_config() for layer, layer_config in zip(model.layers, model_config['layers']): if hasattr(layer, 'kernel_regularizer'): regularizer = keras.regularizers.l2(args.wd) layer_config['config']['kernel_regularizer'] = \ {'class_name': regularizer.__class__.__name__, 'config': regularizer.get_config()} if type(layer) == keras.layers.BatchNormalization: layer_config['config']['momentum'] = 0.9 layer_config['config']['epsilon'] = 1e-5 model = keras.models.Model.from_config(model_config) if args.novo_grad: opt = NovoGrad(lr=args.base_lr) else: opt = keras.optimizers.SGD(lr=args.base_lr, momentum=args.momentum) # Wrap the optimizer in a Horovod distributed optimizer opt = hvd.DistributedOptimizer(opt) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=opt, metrics=['accuracy']) return model
def build_model(input_shape, h1=64, h2=128, h3=256, h4=256, h5=512, optimizer='Adam', lr=0.001, use_horovod=False): # Define the NN layers inputs = layers.Input(shape=input_shape) conv_args = dict(kernel_size=(3, 3), activation='relu', padding='same') h = layers.Conv2D(h1, strides=1, **conv_args)(inputs) h = layers.Conv2D(h2, strides=2, **conv_args)(h) h = layers.Conv2D(h3, strides=1, **conv_args)(h) h = layers.Conv2D(h4, strides=2, **conv_args)(h) h = layers.Flatten()(h) h = layers.Dense(h5, activation='relu')(h) outputs = layers.Dense(1, activation='sigmoid')(h) # Construct the optimizer if optimizer == 'Adam': opt = keras.optimizers.Adam(lr=lr) elif optimizer == 'Nadam': opt = keras.optimizers.Nadam(lr=lr) elif optimizer == 'Adadelta': opt = keras.optimizers.Adadelta(lr=lr) else: raise Exception('Unsupported optimizer type %s' % optimizer) if use_horovod: import horovod.keras as hvd opt = hvd.DistributedOptimizer(opt) # Compile the model model = models.Model(inputs, outputs, 'RPVClassifier') model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy']) return model
SESS = tf.Session(config=CONFIG) K.backend.set_session(SESS) CHANNEL_LAST = True unet_model = unet( use_upsampling=args.use_upsampling, learning_rate=args.lr, n_cl_in=args.number_input_channels, n_cl_out=1, # single channel (greyscale) feature_maps=args.featuremaps, dropout=0.2, print_summary=args.print_model, channels_last=CHANNELS_LAST) # channels first or last opt = hvd.DistributedOptimizer(unet_model.optimizer) unet_model.model.compile(optimizer=opt, loss=unet_model.loss, metrics=unet_model.metrics) if hvd.rank() == 0: start_time = datetime.datetime.now() print("Started script on {}".format(start_time)) # Save best model to hdf5 file saved_model_directory = os.path.dirname(args.saved_model) try: os.stat(saved_model_directory) except: os.mkdir(saved_model_directory)
regularizer = keras.regularizers.l2(args.wd) layer_config['config']['kernel_regularizer'] = \ {'class_name': regularizer.__class__.__name__, 'config': regularizer.get_config()} if type(layer) == keras.layers.BatchNormalization: layer_config['config']['momentum'] = 0.9 layer_config['config']['epsilon'] = 1e-5 model = keras.models.Model.from_config(model_config) # Horovod: adjust learning rate based on number of GPUs. opt = keras.optimizers.SGD(lr=args.base_lr * hvd.size(), momentum=args.momentum) # Horovod: add Horovod Distributed Optimizer. opt = hvd.DistributedOptimizer(opt) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=opt, metrics=['accuracy']) callbacks = [ # Horovod: broadcast initial variable states from rank 0 to all other processes. # This is necessary to ensure consistent initialization of all workers when # training is started with random weights or restored from a checkpoint. hvd.callbacks.BroadcastGlobalVariablesCallback(0), # Horovod: average metrics among workers at the end of every epoch. # # Note: This callback must be in the list before the ReduceLROnPlateau, # TensorBoard, or other metrics-based callbacks.
model_config = model.get_config() for layer, layer_config in zip(model.layers, model_config['layers']): if hasattr(layer, 'kernel_regularizer'): regularizer = keras.regularizers.l2(args.wd) layer_config['config']['kernel_regularizer'] = \ {'class_name': regularizer.__class__.__name__, 'config': regularizer.get_config()} if type(layer) == keras.layers.BatchNormalization: layer_config['config']['momentum'] = 0.9 layer_config['config']['epsilon'] = 1e-5 model = keras.models.Model.from_config(model_config) opt = keras.optimizers.SGD(lr=initial_lr, momentum=args.momentum) # Horovod: add Horovod Distributed Optimizer. opt = hvd.DistributedOptimizer(opt, compression=compression) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=opt, metrics=['accuracy', 'top_k_categorical_accuracy']) callbacks = [ # Horovod: broadcast initial variable states from rank 0 to all other processes. # This is necessary to ensure consistent initialization of all workers when # training is started with random weights or restored from a checkpoint. hvd.callbacks.BroadcastGlobalVariablesCallback(0), # Horovod: average metrics among workers at the end of every epoch. # # Note: This callback must be in the list before the ReduceLROnPlateau, # TensorBoard, or other metrics-based callbacks.
def main(argv=None): # Initialize Horovod. hvd.init() # Pin GPU to be used to process local rank (one GPU per process) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.visible_device_list = str(hvd.local_rank()) KB.set_session(tf.Session(config=config)) # print('LOCAL RANK, OVERAL RANK: {}, {}'.format(hvd.local_rank(), # hvd.rank())) ngpus = hvd.size() main.__doc__ = __doc__ argv = sys.argv if argv is None else sys.argv.extend(argv) desc = main.__doc__ # .format(os.path.basename(__file__)) # CLI parser args = _parser(desc) num_devices_tfrecord = 1 height, width = 224, 224 # Image dimensions. Gets resized if not match. distort_color = args.distort_color data_dir = args.datadir batch_size = args.batch_size # * ngpus epochs = args.epochs imgs_per_epoch = args.imgs_per_epoch # Fit the model using data from the TFRecord data tensors. device_minibatches = RecordInputImagenetPreprocessor.device_minibatches images_tfrecord, labels_tfrecord, nrecords = device_minibatches( num_devices_tfrecord, data_dir, batch_size, height, width, distort_color, val=False) images_tfrecord = images_tfrecord[0] labels_tfrecord = labels_tfrecord[0] # CASTING FOR KERAS # labels[device_num] = tf.cast(labels_tfrecord, dtype) nclasses = 1000 labels_tfrecord = tf.one_hot(labels_tfrecord, nclasses) nimgs_to_use = imgs_per_epoch if imgs_per_epoch > 0 else nrecords steps_per_epoch = nimgs_to_use // batch_size // hvd.size() # steps_per_epoch = 100 # batch_shape = images_tfrecord.get_shape().as_list() # images = Input(tensor=images_tfrecord, batch_shape=x_batch_shape) images = Input(tensor=images_tfrecord) model = ResNet50(input_tensor=images, weights=None) if hvd.rank() == 0: model.summary() print('Num images: {}'.format(nrecords)) if nimgs_to_use < nrecords: print('Using {} images per epoch'.format(nimgs_to_use)) # print('IMAGES_TFRECORD: {}'.format(images_tfrecord)) # print('LABELS_TFRECORD: {}'.format(labels_tfrecord)) # Add Horovod Distributed Optimizer from nvcnn.py # momentum = 0.9 # lr = 0.1 # learning_rate = tf.train.exponential_decay( # lr, # self.global_step, # decay_steps=FLAGS.lr_decay_epochs * nstep_per_epoch, # decay_rate=FLAGS.lr_decay_rate, # staircase=True) # opt = tf.train.MomentumOptimizer(self.learning_rate, momentum, # use_nesterov=True) # lr = 0.001 * ngpus # opt = tf.train.AdamOptimizer() # opt = hvd.DistributedOptimizer(opt) # , use_locking=True) # opt = KO.TFOptimizer(opt) # Required for tf.train based optimizers opt = KO.Adam() opt = hvd_keras.DistributedOptimizer(opt) model.compile( loss='categorical_crossentropy', optimizer=opt, # metrics=['accuracy'], target_tensors=[labels_tfrecord]) # Broadcast variables from rank 0 to all other processes. KB.get_session().run(hvd.broadcast_global_variables(0)) callbacks = [] if hvd.rank() == 0: callbacks += [BatchTiming(), SamplesPerSec(ngpus * batch_size)] # RecordInput is a yield op which doesn't use queue runners or queues. # Start the queue runners. # sess = KB.get_session() # sess.run([tf.local_variables_initializer(), # tf.global_variables_initializer()]) # coord = tf.train.Coordinator() # threads = tf.train.start_queue_runners(sess, coord) start_time = time.time() model.fit(steps_per_epoch=steps_per_epoch, epochs=epochs, callbacks=callbacks, verbose=1) # verbose=hvd.rank() == 0) elapsed_time = time.time() - start_time if hvd.rank() == 0: print('[{}] finished in {} s'.format('TRAINING', round(elapsed_time, 3))) # loss = model.evaluate(None, None, steps=steps_per_epoch_val) images_tfrecord_val, labels_tfrecord_val, nrecords_val = \ device_minibatches(num_devices_tfrecord, data_dir, batch_size, height, width, distort_color, val=True) images_tfrecord_val = images_tfrecord_val[0] labels_tfrecord_val = labels_tfrecord_val[0] labels_tfrecord_val = tf.one_hot(labels_tfrecord_val, nclasses) # print('IMAGES_TFRECORD_VAL: {}'.format(images_tfrecord_val)) # print('labels_tfrecord_val: {}'.format(labels_tfrecord_val)) steps_per_epoch_val = nrecords_val // batch_size images_val = Input(tensor=images_tfrecord_val) model_val = model model_val.layers[0] = KL.InputLayer(input_tensor=images_val) model_val.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'], target_tensors=[labels_tfrecord_val]) # model.summary() loss = model_val.evaluate(x=None, y=None, steps=steps_per_epoch_val) print('\nNum images evaluated, steps: {}, {}'.format( nrecords_val, steps_per_epoch_val)) print('\nTest loss, acc: {}'.format(loss)) # print('\nTest accuracy: {0}'.format(acc)) # Clean up the TF session. # coord.request_stop() # coord.join(threads) KB.clear_session() # do this for Horovod
def _run(): import keras import models logger = tk.log.get(__name__) parser = argparse.ArgumentParser() parser.add_argument('--epochs', help='epoch数。', default=300, type=int) parser.add_argument('--batch-size', help='バッチサイズ。', default=16, type=int) parser.add_argument('--warm', help='models/model.fold{cv_index}.h5を読み込む', action='store_true', default=False) parser.add_argument('--cv-index', help='CVの何番目か。', type=int) parser.add_argument('--cv-size', help='CVの分割数。', default=5, type=int) parser.add_argument('--split-seed', help='分割のシード値。', default=123, type=int) args = parser.parse_args() assert args.cv_index in range(args.cv_size) model_path = _MODELS_DIR / 'model.fold{}.h5'.format(args.cv_index) (X_train, y_train), (X_val, y_val), _ = data.load_data(args.cv_index, args.cv_size, args.split_seed) num_classes = len(np.unique(y_train)) y_train = tk.ml.to_categorical(num_classes)(y_train) y_val = tk.ml.to_categorical(num_classes)(y_val) logger.info('len(X_train) = {} len(X_val) = {}'.format( len(X_train), len(X_val))) model = models.create_network(num_classes) # 学習率: # ・lr 0.5、batch size 256くらいが多いのでその辺を基準に # ・バッチサイズに比例させるのが良いとのうわさ lr = 0.5 * args.batch_size / 256 * hvd.size() opt = keras.optimizers.SGD(lr=lr, momentum=0.9, nesterov=True) opt = hvd.DistributedOptimizer(opt) model.compile(opt, 'categorical_crossentropy', ['acc']) if hvd.rank() == 0 and args.cv_index == 0: model.summary(print_fn=logger.info) logger.info('network depth: %d', tk.dl.count_network_depth(model)) if args.warm: model.load_weights(str(model_path)) logger.info('{} loaded'.format(model_path)) else: assert not model_path.exists() # 誤操作対策 callbacks = [] if args.warm and args.epochs < 300: # 短縮モード callbacks.append(tk.dl.learning_rate_callback((0, 0.5))) else: callbacks.append(tk.dl.learning_rate_callback()) callbacks.append(hvd.callbacks.BroadcastGlobalVariablesCallback(0)) callbacks.append(hvd.callbacks.MetricAverageCallback()) callbacks.append( hvd.callbacks.LearningRateWarmupCallback(warmup_epochs=5, verbose=1)) if hvd.rank() == 0: callbacks.append(tk.dl.tsv_log_callback(_MODELS_DIR / 'history.tsv')) callbacks.append(tk.dl.freeze_bn_callback(0.95)) gen = models.create_generator((299, 299), mixup=True) model.fit_generator( gen.flow(X_train, y_train, batch_size=args.batch_size, data_augmentation=True, shuffle=True), steps_per_epoch=gen.steps_per_epoch(len(X_train), args.batch_size) // hvd.size(), epochs=args.epochs, verbose=1 if hvd.rank() == 0 else 0, validation_data=gen.flow(X_val, y_val, batch_size=args.batch_size, shuffle=True), validation_steps=gen.steps_per_epoch(len(X_val), args.batch_size) // hvd.size(), # * 3は省略 callbacks=callbacks) if hvd.rank() == 0: model.save(str(model_path)) proba_val = model.predict_generator( gen.flow(X_val, y_val, batch_size=args.batch_size), gen.steps_per_epoch(len(X_val), args.batch_size), verbose=1) joblib.dump(proba_val, _MODELS_DIR / 'proba_val.fold{}.pkl'.format(args.cv_index)) pred_val = proba_val.argmax(axis=-1) logger.info('val_acc: {:.1f}%'.format( sklearn.metrics.accuracy_score(y_val.argmax(axis=-1), pred_val) * 100))
def train(self, resume_training=True, resume_epoch=None): if self.model_dir is None or self.model_dir == '': self.model_dir = os.path.join('models', datetime.datetime.now().strftime('run_%Y%m%d_%H%M%S')) self.load_model = False self.resume_training = False else: self.load_model = True if not os.path.exists(self.model_dir): os.makedirs(self.model_dir) self.checkpoint_dir = os.path.join(self.model_dir, 'checkpoints') self.data_generators, self.nb_examples = self._get_generators() self.model = self._build_model() # _log.info(model.summary()) loss = objectives.categorical_crossentropy all_metrics = [ metrics.categorical_accuracy, categorical_mean_squared_error ] if self.train_with_soft_targets: loss = self._make_targets_soft(loss) if self.train_only_in_receptive_field: loss = self._skip_out_of_receptive_field(loss) all_metrics = [self._skip_out_of_receptive_field(m) for m in all_metrics] optim = self._make_optimizer() if self.train_multi_gpu: optim = hvd.DistributedOptimizer(optim) self.model.compile(optimizer=optim, loss=loss, metrics=all_metrics) self.initial_epoch = 0 if self.resume_training: _, self.initial_epoch = self._load_model_weights() # TODO: Consider gradient weighting making last outputs more important. if self.train_multi_gpu: callbacks = [ hvd.callbacks.BroadcastGlobalVariablesCallback(0), hvd.callbacks.MetricAverageCallback(), hvd.callbacks.LearningRateWarmupCallback(warmup_epochs=5, verbose=1) ] else: callbacks = [] callbacks.extend([ ReduceLROnPlateau(patience=self.early_stopping_patience / 2, cooldown=self.early_stopping_patience / 4, verbose=1), EarlyStopping(patience=self.early_stopping_patience, verbose=1) ]) if self.train_rank == 0: callbacks.extend([ ModelCheckpoint(os.path.join(self.checkpoint_dir, 'checkpoint.{epoch:05d}.hdf5'), save_best_only=False), CSVLogger(os.path.join(self.model_dir, 'history.csv'), append=True) ]) if not os.path.exists(self.checkpoint_dir): os.mkdir(self.checkpoint_dir) keras_verbose = self.keras_verbose if self.train_rank > 0: keras_verbose = 0 else: print('Starting Training...') self.model.fit_generator(self.data_generators['train'], self.nb_examples['train'] // self.num_gpus, initial_epoch=self.initial_epoch, epochs=self.max_epoch, validation_data=self.data_generators['test'], validation_steps=self.nb_examples['test'] // self.num_gpus, callbacks=callbacks, verbose=keras_verbose)