def getModel(net_settings, num_classes=1): ''' Should be modified with model type as input and returns the desired model ''' if net_settings['model_type'] == 'resnet': base_model = resnet50.ResNet50(include_top=True, weights='imagenet') finetuning = Dense(1, activation='sigmoid', name='predictions')(base_model.layers[-2].output) model = Model(input=base_model.input, output=finetuning) ## Adjust learning rate based on number of GPUs hv_lr = net_settings['lr'] * hvd.size() opt = optimizers.SGD(lr=hv_lr, momentum=0.9, decay=1e-6, nesterov=True) ## Adding Horovod DistributedOptimizer opt = hvd.DistributedOptimizer(opt) model.compile(loss=net_settings['loss'], optimizer=opt, metrics=['accuracy']) callbacks = [ hvd.callbacks.BroadcastGlobalVariablesCallback(0), ] if hvd.rank() == 0: callbacks.append( keras.callbacks.ModelCheckpoint('./checkpoint-{epoch}.h5')) return model elif net_settings['model_type'] == 'resnet101': model = resnet101_model(224, 224, 3, 1) ## Adjust learning rate based on number of GPUs hv_lr = net_settings['lr'] * hvd.size() opt = optimizers.SGD(lr=hv_lr, momentum=0.9, decay=1e-6, nesterov=True) ## Adding Horovod DistributedOptimizer opt = hvd.DistributedOptimizer(opt) model.compile(loss=net_settings['loss'], optimizer=opt, metrics=['accuracy']) callbacks = [ hvd.callbacks.BroadcastGlobalVariablesCallback(0), ] if hvd.rank() == 0: callbacks.append( keras.callbacks.ModelCheckpoint('./checkpoint-{epoch}.h5')) return model else: print '[models] Ugggh. Not ready for this yet.' exit(0) return None
def _get_hooks(is_distributed=_DISTRIBUTED, verbose=1): logger = _get_logger() if is_distributed: logger.info("Rank: {} Cluster Size {}".format(hvd.local_rank(), hvd.size())) return [ # Horovod: broadcast initial variable states from rank 0 to all other processes. # This is necessary to ensure consistent initialization of all workers when # training is started with random weights or restored from a checkpoint. hvd.callbacks.BroadcastGlobalVariablesCallback(0), # Horovod: average metrics among workers at the end of every epoch. # # Note: This callback must be in the list before the ReduceLROnPlateau, # TensorBoard, or other metrics-based callbacks. hvd.callbacks.MetricAverageCallback(), # Horovod: using `lr = 1.0 * hvd.size()` from the very beginning leads to worse final # accuracy. Scale the learning rate `lr = 1.0` ---> `lr = 1.0 * hvd.size()` during # the first five epochs. See https://arxiv.org/abs/1706.02677 for details. hvd.callbacks.LearningRateWarmupCallback( warmup_epochs=_WARMUP_EPOCHS, verbose=verbose), # Horovod: after the warmup reduce learning rate by 10 on the 30th, 60th and 80th epochs. hvd.callbacks.LearningRateScheduleCallback( start_epoch=_WARMUP_EPOCHS, end_epoch=30, multiplier=1.0), hvd.callbacks.LearningRateScheduleCallback(start_epoch=30, end_epoch=60, multiplier=1e-1), hvd.callbacks.LearningRateScheduleCallback(start_epoch=60, end_epoch=80, multiplier=1e-2), hvd.callbacks.LearningRateScheduleCallback(start_epoch=80, multiplier=1e-3), ] else: return []
def setup_callbacks(params, callbacks, encoder, decoder, prop_pred): import horovod.keras as hvd # model checkpointing if params.checkpoint_period and hvd.rank() == 0: model_checkpoint_callback = model_checkpoint( encoder, decoder, prop_pred, params.checkpoint_path, nepochs=params.checkpoint_period, overwrite=params.overwrite_checkpoint) callbacks.append(model_checkpoint_callback) # LR scheduler if params.lr_schedule_patience: lr_callback = ReduceLROnPlateau(monitor=params.lr_schedule_prop, factor=0.5, patience=params.lr_schedule_patience, min_lr=params.lr_schedule_min * hvd.size(), cooldown=params.lr_schedule_cooldown, verbose=(hvd.rank() == 0)) callbacks.append(lr_callback) if hvd.rank() == 0: callbacks.append(print_loss()) if params.enable_tensorboard: callbacks.append(TensorBoard(params.checkpoint_path))
def load_data(self, data_fn, test_size=0.3, random=True): if not self.distributed_training: self.logger.info( 'Loading the full dataset since distributed training is disabled ...' ) # X, Y = self.data_io.load_all(data_fn, dset_name_pattern=dset_name_pattern, camera_pos=camera_pos) X, Y = self.data_io.load_all(data_fn) else: self.logger.info( 'Loading part of the dataset since distributed training is enabled ...' ) X, Y = self.data_io.load_partial(data_fn, hvd.size(), hvd.rank()) self.logger.debug('Shape of X: %s' % str(X.shape)) self.logger.debug('Shape of Y: %s' % str(Y.shape)) # update the input_shape setting according to the loaded data self.input_shape = X.shape[1:] if test_size > 0: x_train, x_test, y_train, y_test = train_test_split( X, Y, test_size=test_size, random_state=42) self.x_train = x_train self.x_test = x_test self.y_train = y_train self.y_test = y_test else: self.x_train = X self.y_train = Y self.num_classes = np.unique(Y).shape[0] print("shapes:", self.x_train.shape, self.x_test.shape, self.y_train.shape, self.y_test.shape) self.logger.debug('Number of classes: %d' % self.num_classes)
def save_model(self): if self.distributed_training is True: if hvd.rank() == 0: if self.use_noise is True: self.model.save('model_hvd_bw_%d_B0_with_noise_n_p_%d.h5' % (self.input_shape[0], hvd.size())) else: self.model.save('model_hvd_bw_%d_B0_no_noise_%d_nodes.h5' % (self.input_shape[0], hvd.size())) else: if self.use_noise is True: self.model.save('model_bw_%d_B0_with_noise.h5' % (self.input_shape[0])) else: self.model.save('model_bw_%d_B0_no_noise.h5' % (self.input_shape[0]))
def test_elastic_state(self): with self.test_session(config=self.config) as sess: K.set_session(sess) v = 1.0 if hvd.rank() == 0 else 2.0 model1 = keras.models.Sequential([ keras.layers.Dense(2, activation='softmax') ]) model1.build((2, 2)) model1.set_weights( [np.array([[v, v], [v, v]], dtype=np.float32), np.array([v, v], dtype=np.float32)]) model2 = keras.models.Sequential([ keras.layers.Dense(2, activation='softmax') ]) model2.build((2, 2)) model2.set_weights( [np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32), np.array([0.0, 0.0], dtype=np.float32)]) optimizer = keras.optimizers.Adam(0.001 * hvd.size()) state = hvd.elastic.KerasState(model1, optimizer, batch=20 + hvd.rank(), epoch=10 + hvd.rank()) state.sync() model1_weights = model1.get_weights() model2_weights = model2.get_weights() # After sync, all values should match the root rank for w in state.model.get_weights(): self.assertAllClose(w, np.ones_like(w)) assert state.batch == 20 assert state.epoch == 10 # Partially modify then restore model1.set_weights(model2_weights) state.batch = 21 state.epoch = 11 state.restore() for w1, w2 in zip(model1.get_weights(), model1_weights): self.assertAllClose(w1, w2) assert state.batch == 20 assert state.epoch == 10 # Partially modify then commit model1.set_weights(model2_weights) state.batch = 21 state.epoch = 11 state.commit() state.restore() for w1, w2 in zip(model1.get_weights(), model2_weights): self.assertAllClose(w1, w2) assert state.batch == 21 assert state.epoch == 11
def get_batch_sharing_solution(self, batch_patch_info): n_workers = hvd.size() n_imgs_per_worker = batch_patch_info.shape[1] // hvd.size() worker_batch_delta = np.zeros((hvd.size(), 2), np.int32) for wi in range(n_workers): worker_batch_delta[wi, 0] = np.sum( batch_patch_info[0, :] == wi) - n_imgs_per_worker worker_batch_delta[wi, 1] = wi sw = worker_batch_delta[worker_batch_delta[:, 0].argsort( ), :] # sorted by decreasing nb of missing patchs # print("***SHARING SOLUTION***") # print("INITIAL OFFERINGS") # print(sw) transfers = np.zeros((n_workers, n_workers), np.int32) i = 0 j = n_workers - 1 while i < j and sw[ i, 0] < 0: # where there are missing patches for a worker if sw[i, 0] < 0 and sw[ j, 0] > 0: # if patches missing for a worker and too much for another init_i = sw[i, 0] init_j = sw[j, 0] if -sw[i, 0] < sw[ j, 0]: #worker j having images can fullfill request of i and still have some images left transfers[sw[i, 1], sw[j, 1]] = sw[i, 0] transfers[sw[j, 1], sw[i, 1]] = -sw[i, 0] sw[j, 0] += sw[i, 0] sw[i, 0] = 0 i += 1 else: #worker i having images can can get all images of i and (may) still need some more transfers[sw[i, 1], sw[j, 1]] = -sw[j, 0] transfers[sw[j, 1], sw[i, 1]] = sw[j, 0] sw[i, 0] += sw[j, 0] sw[j, 0] = 0 if -init_i == init_j: #if both are fullfilled continue i += 1 j -= 1 if not np.sum(sw[:, 0]) == 0: raise Exception( "Error in sharing solution, check source code !!!!") return transfers
def save_model(self): if self.distributed_training is True: if hvd.rank() == 0: if self.noise_stddev > 0 is True: self.model.save('model_%d_%s_noise_np_%d.h5' % (self.input_shape[0], self.base_model_name, hvd.size())) else: self.model.save('model_%d_%s_np_%d.h5' % (self.input_shape[0], self.base_model_name, hvd.size())) else: if self.noise_stddev > 0 is True: self.model.save('model_%d_%s_noise.h5' % (self.input_shape[0], self.base_model_name)) else: self.model.save('model_%d_%s.h5' % (self.input_shape[0], self.base_model_name))
def build(self): from keras.optimizers import deserialize opt_config = {'class_name': self.name, 'config': self.config} opt = deserialize(opt_config) if self.horovod_wrapper: import horovod.keras as hvd if hasattr(opt, 'lr'): opt.lr *= hvd.size() opt = hvd.DistributedOptimizer(opt) return opt
def _get_optimizer(params, is_distributed=_DISTRIBUTED): if is_distributed: # Horovod: adjust learning rate based on number of GPUs. opt = keras.optimizers.SGD(lr=params["learning_rate"] * hvd.size(), momentum=params["momentum"]) # Horovod: add Horovod Distributed Optimizer. return hvd.DistributedOptimizer(opt) else: return keras.optimizers.SGD(lr=params["learning_rate"], momentum=params["momentum"])
def train_evaluate(): # Generate training and validation data generators def get_image_list(data_dir): dataset = [] for folder in os.listdir(data_dir): for image in os.listdir(os.path.join(data_dir, folder)): dataset.append((os.path.join(data_dir, folder, image), folder)) return dataset training_data = ImageSequence(get_image_list(os.path.join(FLAGS.data_dir, 'train')), FLAGS.batch_size, True) validation_data = ImageSequence(get_image_list(os.path.join(FLAGS.data_dir, 'test')), FLAGS.batch_size, False) # Horovod: Initialize Horovod hvd.init() # Horvod: Pin GPU to be used to process local rank (one GPU per process) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.visible_device_list = str(hvd.local_rank()) tf.keras.backend.set_session(tf.Session(config=config)) # Create a model model = network_model(FLAGS.hidden_units) loss = 'categorical_crossentropy' # Horovod: Adjust learning rate based on number of GPUs optimizer = Adadelta(lr=1.0 * hvd.size()) # Horovod: add Horovod Distributed Optimizer optimizer = hvd.DistributedOptimizer(optimizer) metrics = ['acc'] model.compile(optimizer, loss, metrics) # Set up callbacks callbacks = [ # Broadcast initial variable states from rank 0 to all other processes hvd.callbacks.BroadcastGlobalVariablesCallback(0), ] # Horovod: save logs only on worker 0 if hvd.rank() == 0: callbacks.append(tf.keras.callbacks.TensorBoard(log_dir=FLAGS.log_dir)) # Start training model.fit_generator(generator = training_data, validation_data = validation_data, epochs = FLAGS.epochs, use_multiprocessing = True, workers = 4, callbacks = callbacks, verbose = 1) # Save the model model.save(FLAGS.save_model_path)
def __init__(self, filename, batch_size): self.f_array = h5py.File(filename, "r") x = self.f_array["images"] y = self.f_array["masks"] self.batch_size = batch_size node_array_size = int(np.ceil(len(x) / hvd.size())) self.init_array = hvd.rank() * node_array_size self.end_array = self.init_array + node_array_size self.x = x self.y = y print("calculating size") print("size", len(self))
def lr_schedule(epoch): """Learning Rate Schedule Learning rate is scheduled to be reduced after 80, 120, 160, 180 epochs. Called automatically every epoch as part of callbacks during training. # Arguments epoch (int): The number of epochs # Returns lr (float32): learning rate """ if epoch <= 20: #was 5 # bypass to the warmup callback return K.get_value(model.optimizer.lr) if epoch <= 40: return 0.08 * hvd.size() #was 80, 0.01 if epoch <= 60: return 0.01 * hvd.size() #was 120 0.002 if epoch <= 70: return 0.002 * hvd.size() #was 160 0.0004 return 0.0004 * hvd.size()
def create_model(): opt = keras.optimizers.SGD(lr=0.01 * hvd.size(), momentum=0.9) opt = hvd.DistributedOptimizer(opt) model = keras.models.Sequential() model.add(keras.layers.Dense(2, input_shape=(3,))) model.add(keras.layers.RepeatVector(3)) model.add(keras.layers.TimeDistributed(keras.layers.Dense(3))) model.compile(loss=keras.losses.MSE, optimizer=opt, metrics=[keras.metrics.categorical_accuracy], sample_weight_mode='temporal') return model
def data_generator(file_path, batch_size, seq_len=512, predict=False): # Trick the code into thinking we're only running 1 process for prediction when running `Metrics`. if predict: size = 1 else: size = hvd.size() total_batch_size = batch_size * size print(total_batch_size) rank = hvd.rank() print(rank) range_start = batch_size * rank range_end = range_start + batch_size print(range_start, range_end) while True: with xopen(file_path, "rt") as f: _, label_dim = json.loads(f.readline()) text = [] labels = [] for line in f: if len(text) == total_batch_size: text = text[range_start:range_end] labels = labels[range_start:range_end] print(text[0]) # Fun fact: the 2 inputs must be in a list, *not* a tuple. Why. yield ([np.asarray(text), np.zeros_like(text)], np.asarray(labels)) text = [] labels = [] line = json.loads(line) # First sublist is token ids. text.append(np.asarray(line[0])[0:seq_len]) # Second sublist is positive label indices. label_line = np.zeros(label_dim, dtype='b') label_line[line[1]] = 1 labels.append(label_line) # Yield what is left as the last batch when file has been read to its end. # Split the remaining examples, duplicating with `ceil()` if they don't split evenly. leftover_batch_start = ceil(len(text) / size) * rank leftover_batch_end = leftover_batch_start + ceil(len(text) / size) text = text[leftover_batch_start:leftover_batch_end] labels = labels[leftover_batch_start:leftover_batch_end] yield ([np.asarray(text), np.zeros_like(text)], np.asarray(labels))
def create_inception_model(self, number_categories, dense_layer_sizes, dropout_fraction, unfrozen_layers, focal_loss=False): hvd.init() config = tf.compat.v1.ConfigProto() config.gpu_options.visible_device_list = str(hvd.local_rank()) opt = hvd.DistributedOptimizer(tf.keras.optimizers.Adam(learning_rate=0.001*hvd.size())) model = InceptionV3(include_top=False, pooling='avg') output = model.outputs[0] for layer_size in dense_layer_sizes: dense = Dense(layer_size, activation='relu')(output) dropout = Dropout(dropout_fraction)(dense) output = BatchNormalization()(dropout) if number_categories == 1: output = Dense(1, activation='sigmoid')(output) else: output = Dense(number_categories, activation='softmax')(output) model = Model(inputs=model.inputs, outputs=output) for index in range(len(model.layers) - unfrozen_layers): model.layers[index].trainable = False if number_categories == 1: the_metrics = [metrics.binary_accuracy] if focal_loss: loss = customlosses.focal_binary_crossentropy else: loss = 'binary_crossentropy' else: the_metrics = [metrics.categorical_accuracy] if focal_loss: loss = customlosses.focal_categorical_crossentropy else: loss = 'categorical_crossentropy' model.compile(optimizer=opt, loss=loss, metrics=the_metrics) model.save(self.model_filename) self.model = model
def batch_generator(full_sequences, fragment_length, batch_size, fragment_stride, nb_output_bins, randomize_batch_order, _rnd): indices = list( fragment_indices(full_sequences, fragment_length, batch_size, fragment_stride, nb_output_bins)) global g_multi_gpu if g_multi_gpu: import horovod.keras as hvd gpu_count = hvd.size() current_gpu = hvd.rank() else: gpu_count = 1 current_gpu = 0 if randomize_batch_order: _rnd.shuffle(indices) batches_parted = [batch for batch in partition_all(batch_size, indices)] start_index = len(batches_parted) // gpu_count * current_gpu batches_gpu = batches_parted[start_index:] batches = cycle(batches_gpu) for batch in batches: if len(batch) < batch_size: continue yield np.array([ one_hot(full_sequences[e[0]][e[1]:e[1] + fragment_length]) for e in batch ], dtype='uint8'), np.array([ one_hot(full_sequences[e[0]][e[1] + 1:e[1] + fragment_length + 1]) for e in batch ], dtype='uint8')
# Load the data files train_file = os.path.join(input_dir, 'train.h5') valid_file = os.path.join(input_dir, 'val.h5') test_file = os.path.join(input_dir, 'test.h5') train_input, train_labels, train_weights = load_file(train_file, n_train) valid_input, valid_labels, valid_weights = load_file(valid_file, n_valid) test_input, test_labels, test_weights = load_file(test_file, n_test) print('train shape:', train_input.shape, 'Mean label:', train_labels.mean()) print('valid shape:', valid_input.shape, 'Mean label:', valid_labels.mean()) print('test shape: ', test_input.shape, 'Mean label:', test_labels.mean()) # Model config conv_sizes = [8, 16, 32] fc_sizes = [64] optimizer = 'Adam' lr = 0.01 * hvd.size() dropout = 0.5 # Training config batch_size = 32 #128 n_epochs = 8 # Build the model model = build_model(train_input.shape[1:], conv_sizes=conv_sizes, fc_sizes=fc_sizes, dropout=dropout, optimizer=optimizer, lr=lr) if hvd.rank() == 0: model.summary()
def main(args): if 'sourcedir.tar.gz' in args.tensorboard_dir: tensorboard_dir = re.sub('source/sourcedir.tar.gz', 'model', args.tensorboard_dir) else: tensorboard_dir = args.tensorboard_dir logging.info("Writing TensorBoard logs to {}".format(tensorboard_dir)) if os.path.isdir(args.checkpoint_path): logging.info("Checkpointing directory {} exists".format( args.checkpoint_path)) else: logging.info("Creating Checkpointing directory {}".format( args.checkpoint_path)) os.mkdir(args.checkpoint_path) mpi = False if 'sagemaker_mpi_enabled' in args.fw_params: if args.fw_params['sagemaker_mpi_enabled']: import horovod.keras as hvd mpi = True # Horovod: initialize Horovod. hvd.init() # Horovod: pin GPU to be used to process local rank (one GPU per process) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.visible_device_list = str(hvd.local_rank()) K.set_session(tf.Session(config=config)) else: hvd = None logging.info("Running with MPI={}".format(mpi)) logging.info("getting data") train_dataset = train_input_fn() eval_dataset = eval_input_fn() validation_dataset = validation_input_fn() logging.info("configuring model") # Load model if not os.listdir(args.checkpoint_path): model = keras_model_fn(args.learning_rate, args.weight_decay, args.optimizer, args.momentum, mpi, hvd) epoch_number = 0 else: model, epoch_number = load_checkpoint_model(args.checkpoint_path) logging.info("Checkpointing to: {}".format(args.checkpoint_path)) callbacks = [] if mpi: callbacks.append(hvd.callbacks.BroadcastGlobalVariablesCallback(0)) callbacks.append(hvd.callbacks.MetricAverageCallback()) callbacks.append( hvd.callbacks.LearningRateWarmupCallback(warmup_epochs=5, verbose=1)) callbacks.append( keras.callbacks.ReduceLROnPlateau(patience=10, verbose=1)) if hvd.rank() == 0: callbacks.append( ModelCheckpoint(args.checkpoint_path + '/checkpoint-{epoch}.h5')) callbacks.append( TensorBoard(log_dir=tensorboard_dir, update_freq='epoch')) else: callbacks.append( keras.callbacks.ReduceLROnPlateau(patience=10, verbose=1)) callbacks.append( ModelCheckpoint(args.checkpoint_path + '/checkpoint-{epoch}.h5')) callbacks.append( TensorBoard(log_dir=tensorboard_dir, update_freq='epoch')) logging.info("Starting training") size = 1 if mpi: size = hvd.size() model.fit( x=train_dataset[0], y=train_dataset[1], steps_per_epoch=(num_examples_per_epoch('train') // args.batch_size) // size, epochs=args.epochs, initial_epoch=epoch_number, validation_data=validation_dataset, validation_steps=(num_examples_per_epoch('validation') // args.batch_size) // size, callbacks=callbacks) score = model.evaluate(eval_dataset[0], eval_dataset[1], steps=num_examples_per_epoch('eval') // args.batch_size, verbose=0) logging.info('Test loss:{}'.format(score[0])) logging.info('Test accuracy:{}'.format(score[1])) # Horovod: Save model only on worker 0 (i.e. master) if mpi: if hvd.rank() == 0: save_model(model, args.model_output_dir) else: save_model(model, args.model_output_dir)
y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) model = Sequential() model.add( Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape)) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(128, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(num_classes, activation='softmax')) # Horovod: adjust learning rate based on number of GPUs. opt = keras.optimizers.Adadelta(lr=args.lr * hvd.size()) # Horovod: add Horovod Distributed Optimizer. opt = hvd.DistributedOptimizer(opt) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=opt, metrics=['accuracy']) callbacks = [ # Horovod: broadcast initial variable states from rank 0 to all other processes. # This is necessary to ensure consistent initialization of all workers when # training is started with random weights or restored from a checkpoint. hvd.callbacks.BroadcastGlobalVariablesCallback(0), # Horovod: average metrics among workers at the end of every epoch.
def _run(): import keras import models logger = tk.log.get(__name__) parser = argparse.ArgumentParser() parser.add_argument('--epochs', help='epoch数。', default=300, type=int) parser.add_argument('--batch-size', help='バッチサイズ。', default=16, type=int) parser.add_argument('--warm', help='models/model.fold{cv_index}.h5を読み込む', action='store_true', default=False) parser.add_argument('--cv-index', help='CVの何番目か。', type=int) parser.add_argument('--cv-size', help='CVの分割数。', default=5, type=int) parser.add_argument('--split-seed', help='分割のシード値。', default=123, type=int) args = parser.parse_args() assert args.cv_index in range(args.cv_size) model_path = _MODELS_DIR / 'model.fold{}.h5'.format(args.cv_index) (X_train, y_train), (X_val, y_val), _ = data.load_data(args.cv_index, args.cv_size, args.split_seed) num_classes = len(np.unique(y_train)) y_train = tk.ml.to_categorical(num_classes)(y_train) y_val = tk.ml.to_categorical(num_classes)(y_val) logger.info('len(X_train) = {} len(X_val) = {}'.format( len(X_train), len(X_val))) model = models.create_network(num_classes) # 学習率: # ・lr 0.5、batch size 256くらいが多いのでその辺を基準に # ・バッチサイズに比例させるのが良いとのうわさ lr = 0.5 * args.batch_size / 256 * hvd.size() opt = keras.optimizers.SGD(lr=lr, momentum=0.9, nesterov=True) opt = hvd.DistributedOptimizer(opt) model.compile(opt, 'categorical_crossentropy', ['acc']) if hvd.rank() == 0 and args.cv_index == 0: model.summary(print_fn=logger.info) logger.info('network depth: %d', tk.dl.count_network_depth(model)) if args.warm: model.load_weights(str(model_path)) logger.info('{} loaded'.format(model_path)) else: assert not model_path.exists() # 誤操作対策 callbacks = [] if args.warm and args.epochs < 300: # 短縮モード callbacks.append(tk.dl.learning_rate_callback((0, 0.5))) else: callbacks.append(tk.dl.learning_rate_callback()) callbacks.append(hvd.callbacks.BroadcastGlobalVariablesCallback(0)) callbacks.append(hvd.callbacks.MetricAverageCallback()) callbacks.append( hvd.callbacks.LearningRateWarmupCallback(warmup_epochs=5, verbose=1)) if hvd.rank() == 0: callbacks.append(tk.dl.tsv_log_callback(_MODELS_DIR / 'history.tsv')) callbacks.append(tk.dl.freeze_bn_callback(0.95)) gen = models.create_generator((299, 299), mixup=True) model.fit_generator( gen.flow(X_train, y_train, batch_size=args.batch_size, data_augmentation=True, shuffle=True), steps_per_epoch=gen.steps_per_epoch(len(X_train), args.batch_size) // hvd.size(), epochs=args.epochs, verbose=1 if hvd.rank() == 0 else 0, validation_data=gen.flow(X_val, y_val, batch_size=args.batch_size, shuffle=True), validation_steps=gen.steps_per_epoch(len(X_val), args.batch_size) // hvd.size(), # * 3は省略 callbacks=callbacks) if hvd.rank() == 0: model.save(str(model_path)) proba_val = model.predict_generator( gen.flow(X_val, y_val, batch_size=args.batch_size), gen.steps_per_epoch(len(X_val), args.batch_size), verbose=1) joblib.dump(proba_val, _MODELS_DIR / 'proba_val.fold{}.pkl'.format(args.cv_index)) pred_val = proba_val.argmax(axis=-1) logger.info('val_acc: {:.1f}%'.format( sklearn.metrics.accuracy_score(y_val.argmax(axis=-1), pred_val) * 100))
def __init__(self, config_file, resume_training=True, resume_epoch=None, predict_length=None, multi_gpu=False): self.config = ConfigParser.ConfigParser(allow_no_value=True) try: self.config.readfp(open(config_file)) except: print('Could not read configuration file {} - exiting.'.format(config_file)) sys.exit(1) # Get General Configuration self.train_multi_gpu = multi_gpu self.resume_training = resume_training self.resume_epoch = resume_epoch self.keras_verbose = self.config.getint('general', 'keras_verbose') self.seed = self.config.getint('general', 'seed') if self.seed is None: self.seed = 42 # Get Model Configuration self.data_dir = self.config.get('model', 'data_dir') self.data_dir_structure = self.config.get('model', 'data_dir_structure') self.model_dir = self.config.get('model', 'model_dir') if len(self.model_dir) == 0: self.model_dir = None self.sample_rate = self.config.getint('model', 'sample_rate') self.debug = self.config.getint('model', 'debug') # Training Configuration self.max_epoch = self.config.getint('training', 'max_epoch') self.test_factor = self.config.getfloat('training', 'test_factor') self.batch_size = self.config.getint('training', 'batch_size') self.output_bins = self.config.getint('training', 'output_bins') self.filters = self.config.getint('training', 'filters') self.dilation_depth = self.config.getint('training', 'dilation_depth') self.stacks = self.config.getint('training', 'stacks') self.use_bias = self.config.getboolean('training', 'use_bias') self.use_ulaw = self.config.getboolean('training', 'use_ulaw') self.res_l2 = self.config.getfloat('training', 'res_l2') self.final_l2 = self.config.getfloat('training', 'final_l2') self.initial_fragment_length = self.config.getint('training', 'initial_fragment_length') self.fragment_stride = self.config.getint('training', 'fragment_stride') self.use_skip_connections = self.config.getboolean('training', 'use_skip_connections') self.learn_all_outputs = self.config.getboolean('training', 'learn_all_outputs') self.random_train_batches = self.config.getboolean('training', 'random_train_batches') self.randomize_batch_order = self.config.getboolean('training', 'randomize_batch_order') self.train_only_in_receptive_field = self.config.getboolean('training', 'train_only_in_receptive_field') self.train_with_soft_targets = self.config.getboolean('training', 'train_with_soft_targets') self.soft_target_stdev = self.config.getfloat('training', 'soft_target_stdev') self.optimizer = self.config.get('training', 'optimizer') self.early_stopping_patience = self.config.getint('training', 'early_stopping_patience') # Prediction Configuration self.predict_length = self.config.getfloat('prediction', 'predict_length') # Let's allow the user to overwrite the length via cmd-line, it is more practical :-) if predict_length is not None: self.predict_length = predict_length self.sample_argmax = self.config.getboolean('prediction', 'sample_argmax') self.sample_temperature = self.config.getfloat('prediction', 'sample_temperature') if self.sample_temperature < 0.001: self.sample_temperature = None self.predict_initial_input = self.config.get('prediction', 'initial_input') if len(self.predict_initial_input) == 0: self.predict_initial_input = None self.predict_use_softmax_as_input = self.config.getboolean('prediction', 'use_softmax_as_input') self.sample_seed = self.seed np.random.seed(self.seed) self.rnd = np.random.RandomState(self.seed) self.fragment_length = self.initial_fragment_length + self._compute_receptive_field2(self.sample_rate, self.dilation_depth, self.stacks)[0] # Additional Settings self.num_gpus = 1 self.train_rank = 0 if self.train_multi_gpu: self.train_rank = hvd.rank() self.num_gpus = hvd.size() print('rank = {}, num_gpu={}'.format(self.train_rank, self.num_gpus)) self.dataset = DataSet(self.config, self.fragment_length, self.num_gpus, self.train_rank)
# Add L2 weight decay & adjust BN settings. model_config = model.get_config() for layer, layer_config in zip(model.layers, model_config['layers']): if hasattr(layer, 'kernel_regularizer'): regularizer = keras.regularizers.l2(args.wd) layer_config['config']['kernel_regularizer'] = \ {'class_name': regularizer.__class__.__name__, 'config': regularizer.get_config()} if type(layer) == keras.layers.BatchNormalization: layer_config['config']['momentum'] = 0.9 layer_config['config']['epsilon'] = 1e-5 model = keras.models.Model.from_config(model_config) # Horovod: adjust learning rate based on number of GPUs. opt = keras.optimizers.SGD(lr=args.base_lr * hvd.size(), momentum=args.momentum) # Horovod: add Horovod Distributed Optimizer. opt = hvd.DistributedOptimizer(opt, compression=compression) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=opt, metrics=['accuracy', 'top_k_categorical_accuracy']) callbacks = [ # Horovod: broadcast initial variable states from rank 0 to all other processes. # This is necessary to ensure consistent initialization of all workers when # training is started with random weights or restored from a checkpoint. hvd.callbacks.BroadcastGlobalVariablesCallback(0),
import horovod.keras as hvd # Horovod: initialize Horovod. hvd.init() # Horovod: pin GPU to be used to process local rank (one GPU per process) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.visible_device_list = str(hvd.local_rank()) K.set_session(tf.Session(config=config)) batch_size = 128 num_classes = 10 # Horovod: adjust number of epochs based on number of GPUs. epochs = int(math.ceil(12.0 / hvd.size())) # Input image dimensions img_rows, img_cols = 28, 28 # The data, shuffled and split between train and test sets (x_train, y_train), (x_test, y_test) = mnist.load_data() if K.image_data_format() == 'channels_first': x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) input_shape = (1, img_rows, img_cols) else: x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) input_shape = (img_rows, img_cols, 1)
# We'll save the worker logs and models separately but only # use the logs/saved model from worker 0. args.saved_model = "./worker{}/3d_unet_decathlon.hdf5".format(hvd.rank()) # Optimize CPU threads for TensorFlow CONFIG = tf.ConfigProto(inter_op_parallelism_threads=args.interop_threads, intra_op_parallelism_threads=args.intraop_threads) SESS = tf.Session(config=CONFIG) K.backend.set_session(SESS) model, opt = unet_3d( use_upsampling=args.use_upsampling, n_cl_in=args.number_input_channels, learning_rate=args.lr * hvd.size(), n_cl_out=1, # single channel (greyscale) dropout=0.2, print_summary=print_summary) opt = hvd.DistributedOptimizer(opt) model.compile( optimizer=opt, # loss=[combined_dice_ce_loss], loss=[dice_coef_loss], metrics=[dice_coef, "accuracy", sensitivity, specificity]) if hvd.rank() == 0: start_time = datetime.datetime.now() print("Started script on {}".format(start_time))
#initial_model = create_vgg16() #initial_model.load_weights(model_path) # we may begin from scratch #x = Dense(batches.num_class, activation='softmax')(initial_model.layers[-2].output) #model = Model(initial_model.input, x) # for layer in initial_model.layers: layer.trainable=False # for scratch build #opt = Adam(lr=0.001) opt = SGD(lr=0.01) opt = hvd.DistributedOptimizer(opt) callbacks = [ hvd.callbacks.BroadcastGlobalVariablesCallback(0), hvd.callbacks.MetricAverageCallback(), hvd.callbacks.LearningRateWarmupCallback(warmup_epochs=2, verbose=1), keras.callbacks.ReduceLROnPlateau(patience=3, verbose=1), ] model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy']) model.fit_generator(batches, steps_per_epoch=batches.samples//batch_size // hvd.size(), nb_epoch=10, validation_data=valid_batches, validation_steps=valid_batches.samples//batch_size//hvd.size()) if hvd.rank() == 0: model_json = model.to_json() with open("model.json",'w') as json_file: json_file.write(model_json) model.save_weights("model_first.h5") print("Saved mode in the first step")
y_test = keras.utils.to_categorical(y_test, num_classes) model = Sequential() model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape)) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(128, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(num_classes, activation='softmax')) # Horovod: adjust learning rate based on number of GPUs. opt = keras.optimizers.Adadelta(lr=1.0 * hvd.size()) # Horovod: add Horovod Distributed Optimizer. opt = hvd.DistributedOptimizer(opt) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=opt, metrics=['accuracy']) callbacks = [ # Horovod: broadcast initial variable states from rank 0 to all other processes. # This is necessary to ensure consistent initialization of all workers when # training is started with random weights or restored from a checkpoint. hvd.callbacks.BroadcastGlobalVariablesCallback(0), # Horovod: average metrics among workers at the end of every epoch.
"validate_test_split": args.validate_test_split, "augment": False, "shuffle": False, "seed": args.random_seed } validation_generator = DataGenerator("validate", args.data_path, **validation_data_params) if (hvd.rank() == 0): validation_generator.print_info() # Fit the model # Do at least 3 steps for training and validation steps_per_epoch = max( 3, training_generator.get_length() // (args.bz * hvd.size())) validation_steps = max( 3, 3 * training_generator.get_length() // (args.bz * hvd.size())) unet_model.model.fit_generator( training_generator, steps_per_epoch=steps_per_epoch, epochs=args.epochs, verbose=verbose, validation_data=validation_generator, #validation_steps=validation_steps, callbacks=callbacks, max_queue_size=1, #args.num_prefetched_batches, workers=1, #args.num_data_loaders, use_multiprocessing=True)
from prednet import PredNet from data_utils import SequenceGenerator from kitti_settings import * import datetime import horovod.keras as hvd import keras import tensorflow as tf #Horovod:initialize horovod hvd.init() #Horovod: pin GPU to be used for process local rank (one GPU per process) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.visible_device_list = str(hvd.local_rank()) K.set_session(tf.Session(config=config)) print("horovode size", hvd.size()) save_model = True # if weights will be saved weights_file = os.path.join( WEIGHTS_DIR, 'prednet_kitti_weights.hdf5') # where weights will be saved json_file = os.path.join(WEIGHTS_DIR, 'prednet_kitti_model.json') if not os.path.exists(WEIGHTS_DIR): os.mkdir(WEIGHTS_DIR) # Data files train_file = os.path.join(DATA_DIR, 'X_train.hkl') train_sources = os.path.join(DATA_DIR, 'sources_train.hkl') val_file = os.path.join(DATA_DIR, 'X_val.hkl') val_sources = os.path.join(DATA_DIR, 'sources_val.hkl') # Training parameters nb_epoch = 10 #original: 150; for all tests so far set to 100; t2onlyMax: 150 batch_size = 15
y_test = keras.utils.to_categorical(y_test, num_classes) model = Sequential() model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape)) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(128, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(num_classes, activation='softmax')) # Horovod: adjust learning rate based on number of GPUs. opt = keras.optimizers.Adadelta(1.0 * hvd.size()) # Horovod: add Horovod Distributed Optimizer. opt = hvd.DistributedOptimizer(opt) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=opt, metrics=['accuracy']) callbacks = [ # Horovod: broadcast initial variable states from rank 0 to all other processes. # This is necessary to ensure consistent initialization of all workers when # training is started with random weights or restored from a checkpoint. hvd.callbacks.BroadcastGlobalVariablesCallback(0), ]
# Add L2 weight decay & adjust BN settings. model_config = model.get_config() for layer, layer_config in zip(model.layers, model_config['layers']): if hasattr(layer, 'kernel_regularizer'): regularizer = keras.regularizers.l2(args.wd) layer_config['config']['kernel_regularizer'] = \ {'class_name': regularizer.__class__.__name__, 'config': regularizer.get_config()} if type(layer) == keras.layers.BatchNormalization: layer_config['config']['momentum'] = 0.9 layer_config['config']['epsilon'] = 1e-5 model = keras.models.Model.from_config(model_config) # Horovod: adjust learning rate based on number of GPUs. opt = keras.optimizers.SGD(lr=args.base_lr * hvd.size(), momentum=args.momentum) # Horovod: add Horovod Distributed Optimizer. opt = hvd.DistributedOptimizer(opt) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=opt, metrics=['accuracy']) callbacks = [ # Horovod: broadcast initial variable states from rank 0 to all other processes. # This is necessary to ensure consistent initialization of all workers when # training is started with random weights or restored from a checkpoint. hvd.callbacks.BroadcastGlobalVariablesCallback(0),
y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) model = Sequential() model.add( Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape)) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(128, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(num_classes, activation='softmax')) # Horovod: adjust learning rate based on number of GPUs. opt = keras.optimizers.Adadelta(1.0 * hvd.size()) # Horovod: add Horovod Distributed Optimizer. opt = hvd.DistributedOptimizer(opt) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=opt, metrics=['accuracy']) log_dir = "../logs/keras-tensorboard-profile/" + datetime.now().strftime( "%Y%m%d-%H%M%S") callbacks = [ # Horovod: broadcast initial variable states from rank 0 to all other processes. # This is necessary to ensure consistent initialization of all workers when # training is started with random weights or restored from a checkpoint. hvd.callbacks.BroadcastGlobalVariablesCallback(0),
def train(self): train_data_generator = self.data_loader.get_train_data_generator() batch_size = self.config.trainer.batch_size steps_per_epoch = self.data_loader.get_train_data_size() // batch_size if self.config.trainer.use_horovod: import horovod.keras as hvd steps_per_epoch //= hvd.size() assert steps_per_epoch > 0 valid_data_generator = self.data_loader.get_validation_data_generator() valid_data_size = self.data_loader.get_validation_data_size() fake_x_pool = FakeImagePool(self.config.trainer.fake_pool_size) fake_y_pool = FakeImagePool(self.config.trainer.fake_pool_size) batch_shape = (self.config.trainer.batch_size, self.config.dataset.image_size // 8, self.config.dataset.image_size // 8, 1) fake = np.zeros(shape=batch_shape, dtype=np.float32) real = np.ones(shape=batch_shape, dtype=np.float32) epochs = self.config.trainer.num_epochs start_time = datetime.datetime.now() self.on_train_begin() for epoch in range(self.config.trainer.epoch_to_continue, epochs): self.on_epoch_begin(epoch, {}) epoch_logs = defaultdict(float) for step in range(1, steps_per_epoch + 1): batch_logs = {'batch': step, 'size': self.config.trainer.batch_size} batch_logs = {"batch": step, "size": self.config.trainer.batch_size} self.on_batch_begin(step, batch_logs) imgs_x, imgs_y = next(train_data_generator) fakes_y = self.g_xy.predict(imgs_x) fakes_x = self.g_yx.predict(imgs_y) # train discriminator using history of fake images (Shrivastava et al) fakes_x = fake_x_pool.query(fakes_x) fakes_y = fake_y_pool.query(fakes_y) if self.config.trainer.label_smoothing: fake = np.random.uniform(0, 0.2, size=batch_shape) real = np.random.uniform(0.8, 1.0, size=batch_shape) # train discriminator dx_loss_real = self.d_x.train_on_batch(imgs_x, real) dx_loss_fake = self.d_x.train_on_batch(fakes_x, fake) dy_loss_real = self.d_y.train_on_batch(imgs_y, real) dy_loss_fake = self.d_y.train_on_batch(fakes_y, fake) # train generator g_loss = self.combined.train_on_batch([imgs_x, imgs_y], [real, real, imgs_x, imgs_y, imgs_x, imgs_y]) dx_metric_names = self.d_metric_names("x") dy_metric_names = self.d_metric_names("y") g_metric_names = self.g_metric_names() assert len(dx_metric_names) == len(dx_loss_real) == len(dx_loss_fake) assert len(dy_metric_names) == len(dy_loss_real) == len(dy_loss_fake) assert len(g_metric_names) == len(g_loss) metric_logs = {} for metric_name, metric_value in zip(dx_metric_names + dy_metric_names, dx_loss_real + dy_loss_real): metric_logs[f"train/{metric_name}_real"] = \ metric_value * (100 if "accuracy" in metric_name.lower() else 1) for metric_name, metric_value in zip(dx_metric_names + dy_metric_names, dx_loss_fake + dy_loss_fake): metric_logs[f"train/{metric_name}_fake"] = \ metric_value * (100 if "accuracy" in metric_name.lower() else 1) for metric_name, metric_value in zip(g_metric_names, g_loss): metric_logs[f"train/{metric_name}"] = metric_value batch_logs.update(metric_logs) for metric_name in metric_logs.keys(): if metric_name in epoch_logs: epoch_logs[metric_name] += metric_logs[metric_name] else: epoch_logs[metric_name] = metric_logs[metric_name] print_str = f"[Epoch {epoch + 1}/{epochs}] [Batch {step}/{steps_per_epoch}]" deliminator = ' ' for metric_name, metric_value in metric_logs.items(): if 'accuracy' in metric_name: print_str += f"{deliminator}{metric_name}={metric_value:.1f}%" elif 'loss' in metric_name: print_str += f"{deliminator}{metric_name}={metric_value:.4f}" else: print_str += f"{deliminator}{metric_name}={metric_value}" if deliminator == ' ': deliminator = ',\t' print_str += f", time: {datetime.datetime.now() - start_time}" print(print_str, flush=True) self.on_batch_end(step, batch_logs) # sum to average for k in epoch_logs: epoch_logs[k] /= steps_per_epoch epoch_logs = dict(epoch_logs) # additional log epoch_logs['train/lr/G'] = K.get_value(self.combined.optimizer.lr) epoch_logs['train/lr/D_x'] = K.get_value(self.d_x.optimizer.lr) epoch_logs['train/lr/D_y'] = K.get_value(self.d_y.optimizer.lr) self.on_epoch_end(epoch, epoch_logs) if (epoch + 1) % self.config.trainer.predict_freq == 0: self.sample_valid_images(epoch, valid_data_generator, valid_data_size) self.predict_test_images(epochs) self.on_train_end()
# Add L2 weight decay & adjust BN settings. model_config = model.get_config() for layer, layer_config in zip(model.layers, model_config['layers']): if hasattr(layer, 'kernel_regularizer'): regularizer = keras.regularizers.l2(weight_decay) layer_config['config']['kernel_regularizer'] = \ {'class_name': regularizer.__class__.__name__, 'config': regularizer.get_config()} if type(layer) == keras.layers.BatchNormalization: layer_config['config']['momentum'] = 0.9 layer_config['config']['epsilon'] = 1e-5 model = keras.models.Model.from_config(model_config) # Horovod: adjust learning rate based on number of GPUs. opt = keras.optimizers.SGD(lr=learning_rate * hvd.size(), momentum=0.9) # Horovod: add Horovod Distributed Optimizer. opt = hvd.DistributedOptimizer(opt) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=opt, metrics=['accuracy', 'top_k_categorical_accuracy']) callbacks = [ # Horovod: broadcast initial variable states from rank 0 to all other processes. # This is necessary to ensure consistent initialization of all workers when # training is started with random weights or restored from a checkpoint. hvd.callbacks.BroadcastGlobalVariablesCallback(0), # Horovod: average metrics among workers at the end of every epoch.