def multi_gpu_test_invalid_devices(): input_shape = (1000, 10) model = keras.models.Sequential() model.add(keras.layers.Dense(10, activation='relu', input_shape=input_shape[1:])) model.add(keras.layers.Dense(1, activation='sigmoid')) model.compile(loss='mse', optimizer='rmsprop') x = np.random.random(input_shape) y = np.random.random((input_shape[0], 1)) with pytest.raises(ValueError): parallel_model = multi_gpu_model(model, gpus=10) parallel_model.fit(x, y, epochs=2) with pytest.raises(ValueError): parallel_model = multi_gpu_model(model, gpus=[0, 2, 4, 6, 8]) parallel_model.fit(x, y, epochs=2) with pytest.raises(ValueError): parallel_model = multi_gpu_model(model, gpus=1) parallel_model.fit(x, y, epochs=2) with pytest.raises(ValueError): parallel_model = multi_gpu_model(model, gpus=[0]) parallel_model.fit(x, y, epochs=2)
def multi_gpu_test_multi_io_model(): print('####### test multi-io model') num_samples = 1000 input_dim_a = 10 input_dim_b = 5 output_dim_a = 1 output_dim_b = 2 hidden_dim = 10 gpus = 8 target_gpu_id = [0, 2, 4] epochs = 2 input_a = keras.Input((input_dim_a,)) input_b = keras.Input((input_dim_b,)) a = keras.layers.Dense(hidden_dim)(input_a) b = keras.layers.Dense(hidden_dim)(input_b) c = keras.layers.concatenate([a, b]) output_a = keras.layers.Dense(output_dim_a)(c) output_b = keras.layers.Dense(output_dim_b)(c) model = keras.models.Model([input_a, input_b], [output_a, output_b]) a_x = np.random.random((num_samples, input_dim_a)) b_x = np.random.random((num_samples, input_dim_b)) a_y = np.random.random((num_samples, output_dim_a)) b_y = np.random.random((num_samples, output_dim_b)) parallel_model = multi_gpu_model(model, gpus=gpus) parallel_model.compile(loss='mse', optimizer='rmsprop') parallel_model.fit([a_x, b_x], [a_y, b_y], epochs=epochs) parallel_model = multi_gpu_model(model, gpus=target_gpu_id) parallel_model.compile(loss='mse', optimizer='rmsprop') parallel_model.fit([a_x, b_x], [a_y, b_y], epochs=epochs)
def initModel(self): optimizer = Adam(lr=5e-5, beta_1=0.5, beta_2=0.999) x = Input(shape=IMAGE_SHAPE) self.autoencoder_A = KerasModel(x, self.decoder_A(self.encoder(x))) self.autoencoder_B = KerasModel(x, self.decoder_B(self.encoder(x))) if self.gpus > 1: self.autoencoder_A = multi_gpu_model( self.autoencoder_A , self.gpus) self.autoencoder_B = multi_gpu_model( self.autoencoder_B , self.gpus) self.autoencoder_A.compile(optimizer=optimizer, loss='mean_absolute_error') self.autoencoder_B.compile(optimizer=optimizer, loss='mean_absolute_error')
def run_benchmark(self, gpus=0): num_classes = 10 # Generate random input data input_shape = (self.num_samples, 3, 32, 32) x_train, y_train = generate_img_input_data(input_shape) y_train = np.reshape(y_train, (len(y_train), 1)) y_train = keras.utils.to_categorical(y_train, 10) if keras.backend.image_data_format() == 'channels_last': x_train = x_train.transpose(0, 2, 3, 1) model = Sequential() model.add(Conv2D(32, (3, 3), padding='same', input_shape=x_train.shape[1:], activation='relu')) model.add(Conv2D(32, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Conv2D(64, (3, 3), padding='same', activation='relu')) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(512, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(num_classes, activation='softmax')) opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6) if keras.backend.backend() is "tensorflow" and gpus > 1: model = multi_gpu_model(model, gpus=gpus) model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) x_train = x_train.astype('float32') x_train /= 255 # create a distributed trainer for cntk if keras.backend.backend() is "cntk" and gpus > 1: start, end = cntk_gpu_mode_config(model, x_train.shape[0]) x_train = x_train[start: end] y_train = y_train[start: end] time_callback = timehistory.TimeHistory() model.fit(x_train, y_train, batch_size=self.batch_size, epochs=self.epochs, shuffle=True, callbacks=[time_callback]) self.total_time = 0 for i in range(1, self.epochs): self.total_time += time_callback.times[i]
def create_models(backbone_retinanet, backbone, num_classes, weights, multi_gpu=0, freeze_backbone=False): modifier = freeze_model if freeze_backbone else None # Keras recommends initialising a multi-gpu model on the CPU to ease weight sharing, and to prevent OOM errors. # optionally wrap in a parallel model if multi_gpu > 1: with tf.device('/cpu:0'): model = model_with_weights(backbone_retinanet(num_classes, backbone=backbone, nms=False, modifier=modifier), weights=weights, skip_mismatch=True) training_model = multi_gpu_model(model, gpus=multi_gpu) # append NMS for prediction only classification = model.outputs[1] detections = model.outputs[2] boxes = keras.layers.Lambda(lambda x: x[:, :, :4])(detections) detections = layers.NonMaximumSuppression(name='nms')([boxes, classification, detections]) prediction_model = keras.models.Model(inputs=model.inputs, outputs=model.outputs[:2] + [detections]) else: model = model_with_weights(backbone_retinanet(num_classes, backbone=backbone, nms=True, modifier=modifier), weights=weights, skip_mismatch=True) training_model = model prediction_model = model # compile model training_model.compile( loss={ 'regression' : losses.smooth_l1(), 'classification': losses.focal() }, optimizer=keras.optimizers.adam(lr=1e-5, clipnorm=0.001) ) return model, training_model, prediction_model
def run_benchmark(self, gpus=0): input_dim_1 = 40 input_dim_2 = 60 input_shape = (self.num_samples, input_dim_1, 60) x, y = generate_text_input_data(input_shape) # build the model: a single LSTM model = Sequential() model.add(LSTM(128, input_shape=(input_dim_1, input_dim_2))) model.add(Dense(input_dim_2), activation='softmax') optimizer = RMSprop(lr=0.01) if keras.backend.backend() is "tensorflow" and gpus > 1: model = multi_gpu_model(model, gpus=gpus) model.compile(loss='categorical_crossentropy', optimizer=optimizer) # create a distributed trainer for cntk if keras.backend.backend() is "cntk" and gpus > 1: start, end = cntk_gpu_mode_config(model, x.shape[0]) x = x[start: end] y = y[start: end] time_callback = timehistory.TimeHistory() model.fit(x, y, batch_size=self.batch_size, epochs=self.epochs, callbacks=[time_callback]) self.total_time = 0 for i in range(1, self.epochs): self.total_time += time_callback.times[i]
def create_models(num_classes, weights='imagenet', multi_gpu=0): # create "base" model (no NMS) image = keras.layers.Input((None, None, 4)) # Keras recommends initialising a multi-gpu model on the CPU to ease weight sharing, and to prevent OOM errors. # optionally wrap in a parallel model if multi_gpu > 1: with tf.device('/cpu:0'): model = ResNet50RetinaNet(image, num_classes=num_classes, weights=weights, nms=False) training_model = multi_gpu_model(model, gpus=multi_gpu) else: model = ResNet50RetinaNet(image, num_classes=num_classes, weights=weights, nms=False) training_model = model # append NMS for prediction only classification = model.outputs[1] detections = model.outputs[2] boxes = keras.layers.Lambda(lambda x: x[:, :, :4])(detections) detections = layers.NonMaximumSuppression(name='nms')([boxes, classification, detections]) prediction_model = keras.models.Model(inputs=model.inputs, outputs=model.outputs[:2] + [detections]) # compile model training_model.compile( loss={ 'regression' : losses.smooth_l1(), 'classification': losses.focal() }, optimizer=keras.optimizers.adam(lr=1e-5, clipnorm=0.001) ) return model, training_model, prediction_model
def add_predictor(self, side, model): """ Add a predictor to the predictors dictionary """ logger.debug("Adding predictor: (side: '%s', model: %s)", side, model) if self.gpus > 1: logger.debug("Converting to multi-gpu: side %s", side) model = multi_gpu_model(model, self.gpus) self.predictors[side] = model if not self.state.inputs: self.store_input_shapes(model) if not self.output_shape: self.set_output_shape(model)
def test_multi_gpu_with_multi_input_layers(): inputs = keras.Input((4, 3)) init_state = keras.Input((3,)) outputs = keras.layers.SimpleRNN( 3, return_sequences=True)(inputs, initial_state=init_state) x = [np.random.randn(2, 4, 3), np.random.randn(2, 3)] y = np.random.randn(2, 4, 3) model = keras.models.Model([inputs, init_state], outputs) parallel_model = multi_gpu_model(model, 2) parallel_model.compile(loss='mean_squared_error', optimizer='adam') parallel_model.train_on_batch(x, y)
def multi_gpu_application_np_array_benchmark(): print('####### Xception benchmark - np i/o') model_cls = keras.applications.Xception num_samples = 1000 height = 224 width = 224 num_classes = 1000 epochs = 4 batch_size = 40 x = np.random.random((num_samples, height, width, 3)) y = np.random.random((num_samples, num_classes)) # Baseline model = model_cls(weights=None, input_shape=(height, width, 3), classes=num_classes) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') # Training start_time = time.time() model.fit(x, y, epochs=epochs) total_time = time.time() - start_time print('baseline training:', total_time) # Inference start_time = time.time() model.predict(x) total_time = time.time() - start_time print('baseline inference:', total_time) for i in range(8, 9): K.clear_session() with tf.device('/cpu:0'): model = model_cls(weights=None, input_shape=(height, width, 3), classes=num_classes) parallel_model = multi_gpu_model(model, gpus=i) parallel_model.compile(loss='categorical_crossentropy', optimizer='rmsprop') start_time = time.time() parallel_model.fit(x, y, epochs=epochs, batch_size=batch_size) total_time = time.time() - start_time print('%d gpus training:' % i, total_time) # Inference start_time = time.time() parallel_model.predict(x, batch_size=batch_size) total_time = time.time() - start_time print('%d gpus inference:' % i, total_time)
def create_models(backbone_retinanet, num_classes, weights, multi_gpu=0, freeze_backbone=False, lr=1e-5, config=None): """ Creates three models (model, training_model, prediction_model). Args backbone_retinanet : A function to call to create a retinanet model with a given backbone. num_classes : The number of classes to train. weights : The weights to load into the model. multi_gpu : The number of GPUs to use for training. freeze_backbone : If True, disables learning for the backbone. config : Config parameters, None indicates the default configuration. Returns model : The base model. This is also the model that is saved in snapshots. training_model : The training model. If multi_gpu=0, this is identical to model. prediction_model : The model wrapped with utility functions to perform object detection (applies regression values and performs NMS). """ modifier = freeze_model if freeze_backbone else None # load anchor parameters, or pass None (so that defaults will be used) anchor_params = None num_anchors = None if config and 'anchor_parameters' in config: anchor_params = parse_anchor_parameters(config) num_anchors = anchor_params.num_anchors() # Keras recommends initialising a multi-gpu model on the CPU to ease weight sharing, and to prevent OOM errors. # optionally wrap in a parallel model if multi_gpu > 1: from keras.utils import multi_gpu_model with tf.device('/cpu:0'): model = model_with_weights(backbone_retinanet(num_classes, num_anchors=num_anchors, modifier=modifier), weights=weights, skip_mismatch=True) training_model = multi_gpu_model(model, gpus=multi_gpu) else: model = model_with_weights(backbone_retinanet(num_classes, num_anchors=num_anchors, modifier=modifier), weights=weights, skip_mismatch=True) training_model = model # make prediction model prediction_model = retinanet_bbox(model=model, anchor_params=anchor_params) # compile model training_model.compile( loss={ 'regression' : losses.smooth_l1(), 'classification': losses.focal() }, optimizer=keras.optimizers.adam(lr=lr, clipnorm=0.001) ) return model, training_model, prediction_model
def multi_gpu_test_simple_model(): print('####### test simple model') num_samples = 1000 input_dim = 10 output_dim = 1 hidden_dim = 10 gpus = 8 target_gpu_id = [0, 2, 4] epochs = 2 model = keras.models.Sequential() model.add(keras.layers.Dense(hidden_dim, input_shape=(input_dim,))) model.add(keras.layers.Dense(output_dim)) x = np.random.random((num_samples, input_dim)) y = np.random.random((num_samples, output_dim)) parallel_model = multi_gpu_model(model, gpus=gpus) parallel_model.compile(loss='mse', optimizer='rmsprop') parallel_model.fit(x, y, epochs=epochs) parallel_model = multi_gpu_model(model, gpus=target_gpu_id) parallel_model.compile(loss='mse', optimizer='rmsprop') parallel_model.fit(x, y, epochs=epochs)
def test_multi_gpu_with_siamese(): input_shape = (3,) nested_model = keras.models.Sequential([ keras.layers.Dense(32, input_shape=input_shape), keras.layers.Dense(1) ], name='nested') input1 = keras.Input(input_shape) input2 = keras.Input(input_shape) score1 = nested_model(input1) score2 = nested_model(input2) score_sum = keras.layers.Add(name='add')([score1, score2]) siamese = keras.models.Model(inputs=[input1, input2], outputs=[score_sum, score1, score2], name='siamese') parallel_siamese = multi_gpu_model(siamese, 2) assert parallel_siamese.output_names == ['add', 'nested_1', 'nested_2']
def create_models(backbone_retinanet, num_classes, l_num_classes, weights, multi_gpu=0, freeze_backbone=False): """ Creates three models (model, training_model, prediction_model). Args backbone_retinanet : A function to call to create a retinanet model with a given backbone. num_classes : The number of classes to train. l_num_classes : The number of label classes to train. weights : The weights to load into the model. multi_gpu : The number of GPUs to use for training. freeze_backbone : If True, disables learning for the backbone. Returns model : The base model. This is also the model that is saved in snapshots. training_model : The training model. If multi_gpu=0, this is identical to model. prediction_model : The model wrapped with utility functions to perform object detection (applies regression values and performs NMS). """ modifier = freeze_model if freeze_backbone else None # Keras recommends initialising a multi-gpu model on the CPU to ease weight sharing, and to prevent OOM errors. # optionally wrap in a parallel model if multi_gpu > 1: with tf.device('/cpu:0'): model = model_with_weights(backbone_retinanet(num_classes, modifier=modifier), weights=weights, skip_mismatch=True) training_model = multi_gpu_model(model, gpus=multi_gpu) else: model = model_with_weights(backbone_retinanet(num_classes, l_num_classes, modifier=modifier), weights=weights, skip_mismatch=True) training_model = model # make prediction model prediction_model = retinanet_bbox(model=model) # compile model training_model.compile( loss={ 'p_regression' : losses.smooth_l1(), 'p_classification' : losses.focal(), # 'l_regression' : losses.smooth_l1(), 'l_classification' : losses.focal() }, optimizer=keras.optimizers.adam(lr=1e-5, clipnorm=0.001) ) return model, training_model, prediction_model
def run_benchmark(self, gpus=0): num_classes = 10 # Generate random input data input_shape = (self.num_samples, 28, 28) x_train, y_train = generate_img_input_data(input_shape) x_train = x_train.reshape(self.num_samples, 784) x_train = x_train.astype('float32') x_train /= 255 # convert class vectors to binary class matrices y_train = keras.utils.to_categorical(y_train, num_classes) model = Sequential() model.add(Dense(512, activation='relu', input_shape=(784,))) model.add(Dropout(0.2)) model.add(Dense(512, activation='relu')) model.add(Dropout(0.2)) model.add(Dense(num_classes, activation='softmax')) if keras.backend.backend() is "tensorflow" and gpus > 1: model = multi_gpu_model(model, gpus=gpus) model.compile(loss='categorical_crossentropy', optimizer=RMSprop(), metrics=['accuracy']) # create a distributed trainer for cntk if keras.backend.backend() is "cntk" and gpus > 1: start, end = cntk_gpu_mode_config(model, x_train.shape[0]) x_train = x_train[start: end] y_train = y_train[start: end] time_callback = timehistory.TimeHistory() model.fit(x_train, y_train, batch_size=self.batch_size, epochs=self.epochs, verbose=1, callbacks=[time_callback]) self.total_time = 0 for i in range(1, self.epochs): self.total_time += time_callback.times[i]
def generate(self): model_path = os.path.expanduser(self.model_path) assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.' # Load model, or construct model and load weights. num_anchors = len(self.anchors) num_classes = len(self.class_names) is_tiny_version = num_anchors==6 # default setting try: self.yolo_model = load_model(model_path, compile=False) except: self.yolo_model = tiny_yolo_body(Input(shape=(None,None,3)), num_anchors//2, num_classes) \ if is_tiny_version else yolo_body(Input(shape=(None,None,3)), num_anchors//3, num_classes) self.yolo_model.load_weights(self.model_path) # make sure model, anchors and classes match else: assert self.yolo_model.layers[-1].output_shape[-1] == \ num_anchors/len(self.yolo_model.output) * (num_classes + 5), \ 'Mismatch between model and given anchor and class sizes' print('{} model, anchors, and classes loaded.'.format(model_path)) # Generate colors for drawing bounding boxes. hsv_tuples = [(x / len(self.class_names), 1., 1.) for x in range(len(self.class_names))] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) np.random.seed(10101) # Fixed seed for consistent colors across runs. np.random.shuffle(self.colors) # Shuffle colors to decorrelate adjacent classes. np.random.seed(None) # Reset seed to default. # Generate output tensor targets for filtered bounding boxes. self.input_image_shape = K.placeholder(shape=(2, )) if gpu_num>=2: self.yolo_model = multi_gpu_model(self.yolo_model, gpus=gpu_num) boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors, len(self.class_names), self.input_image_shape, score_threshold=self.score, iou_threshold=self.iou) return boxes, scores, classes
def test_serialization(): model = keras.models.Sequential() model.add(keras.layers.Dense(3, input_shape=(4,))) model.add(keras.layers.Dense(4)) x = np.random.random((100, 4)) y = np.random.random((100, 4)) parallel_model = multi_gpu_model(model, gpus=2) parallel_model.compile(loss='mse', optimizer='rmsprop') parallel_model.fit(x, y, epochs=1) ref_output = parallel_model.predict(x) _, fname = tempfile.mkstemp('.h5') parallel_model.save(fname) K.clear_session() parallel_model = keras.models.load_model(fname) output = parallel_model.predict(x) np.testing.assert_allclose(ref_output, output, atol=1e-5)
return model # Set some model compile parameters optimizer = 'adam' loss = utils.bce_dice_loss metrics = [utils.mean_iou] # Compile our model model = keras_model(img_width=img_width, img_height=img_height) model.summary() # For more GPUs if num_gpus > 1: model = multi_gpu_model(model, gpus=num_gpus) model.compile(optimizer=optimizer, loss=loss, metrics=metrics) seed = 42 # Runtime data augmentation def get_train_test_augmented(X_data=X_train, Y_data=Y_train, validation_split=0.1, batch_size=32, seed=seed): X_train, X_test, Y_train, Y_test = train_test_split( X_data, Y_data,
def base_model(model_name='base_model', dim_sub_capsule=16, dim_primary_capsule=5, n_channels=8, primary_cap_kernel_size=9, first_layer_kernel_size=9, conv_layer_filters=48, gpus=1): model_name = NAME + '_' + model_name def make_model(): x = Input(shape=input_shape) conv1 = Conv3D(filters=conv_layer_filters, kernel_size=first_layer_kernel_size, strides=1, padding='valid', activation='relu', name='conv1')(x) primarycaps = PrimaryCap(conv1, dim_capsule=dim_primary_capsule, n_channels=n_channels, kernel_size=primary_cap_kernel_size, strides=2, padding='valid', name='primarycap_conv3d') sub_caps = CapsuleLayer(num_capsule=n_class, dim_capsule=dim_sub_capsule, routings=3, name='sub_caps')(primarycaps) out_caps = Length(name='capsnet')(sub_caps) # Decoder network y = Input(shape=(n_class, )) masked_by_y = Mask()([sub_caps, y]) masked = Mask()(sub_caps) # shared decoder model in training and prediction decoder = Sequential(name='decoder') decoder.add( Dense(512, activation='relu', input_dim=dim_sub_capsule * n_class)) decoder.add(Dense(1024, activation='relu')) decoder.add(Dense(np.prod(input_shape), activation='sigmoid')) decoder.add(Reshape(target_shape=input_shape, name='out_recon')) ### Models for training and evaluation (prediction and actually using) train_model = Model([x, y], [out_caps, decoder(masked_by_y)]) eval_model = Model(x, [out_caps, decoder(masked)]) ### manipulate model can be used to visualize activation maps for specific classes noise = Input(shape=(n_class, dim_sub_capsule)) noised_sub_caps = Add()([sub_caps, noise]) masked_noised_y = Mask()([noised_sub_caps, y]) manipulate_model = Model([x, y, noise], decoder(masked_noised_y)) return train_model, eval_model, manipulate_model ##### If using multiple GPUS ########## if gpus > 1: with tf.device("/cpu:0"): train_model, eval_model, manipulate_model = make_model() else: train_model, eval_model, manipulate_model = make_model() ################################ Compile and Train ############################### ##### IF USING MULTIPLE GPUS APPLY JUST BEFORE COMPILING ###### if gpus > 1: train_model = multi_gpu_model( train_model, gpus=gpus) #### Adjust for number of gpus # train_model = multi_gpu_model(train_model, gpus=2) #### Adjust for number of gpus ##### IF USING MULTIPLE GPUS ###### INIT_LR = 0.008 lam_recon = .04 optimizer = Adam(lr=INIT_LR) train_model.compile(optimizer, loss=[margin_loss, 'mse'], loss_weights=[1., lam_recon], metrics={'capsnet': 'accuracy'}) call_back_path = 'logs/{}.log'.format(model_name) tb = TensorBoard(log_dir=call_back_path) csv = CSVLogger(os.path.join(call_back_path, 'training.log')) early_stop = EarlyStopping(monitor='val_capsnet_acc', min_delta=0, patience=12, verbose=1, mode='auto') reduce_lr = ReduceLROnPlateau(monitor='val_capsnet_acc', factor=0.5, patience=3, min_lr=0.0001) #warm up with random restart by hand (I know it's jenky but it worked :-P) train_model.fit([x_train, y_train], [y_train, x_train], batch_size=32, epochs=3, validation_data=[[x_val, y_val], [y_val, x_val]]) # callbacks=[tb, checkpointer]) # callbacks=[tb, csv, reduce_lr, early_stop]) train_model.fit( [x_train, y_train], [y_train, x_train], batch_size=128, epochs=NUM_EPOCHS, validation_data=[[x_val, y_val], [y_val, x_val]], # callbacks=[tb, checkpointer]) callbacks=[tb, csv, reduce_lr, early_stop]) ################################ Process the results ############################### process_results(model_name, eval_model, manipulate_model, x_test, y_test, target_names, INIT_LR=INIT_LR, lam_recon=lam_recon, NUM_EPOCHS=NUM_EPOCHS, dim_sub_capsule=dim_sub_capsule, dim_primary_capsule=dim_primary_capsule, n_channels=n_channels, primary_cap_kernel_size=primary_cap_kernel_size, first_layer_kernel_size=first_layer_kernel_size, conv_layer_filters=conv_layer_filters)
elif epoch > 80: lr *= 1e-1 return lr # get the model base on configs model = get_resnet_model(version=version, input_shape=input_shape, depth=depth, num_classes=num_classes) # use multi gpu model for multi gpus if num_gpus > 1: if K.backend() == 'mxnet': # MXNet merge weights on GPU by default model = multi_gpu_model(model, gpus=num_gpus) else: # merge weights on GPU model = multi_gpu_model(model, gpus=num_gpus, cpu_merge=False) # compile the model model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=lr_schedule(0)), metrics=['accuracy']) model.summary() rootLogger.info('Training using: ' + model_type) # Prepare model saving directory. save_dir = os.path.join(os.getcwd(), 'saved_models') model_name = 'imagenet_%s_model.{epoch:03d}.h5' % model_type if not os.path.isdir(save_dir):
self.model_to_save = model def on_epoch_end(self, epoch, logs=None): fmt = checkpoint_models_path + 'model.%02d-%.4f.hdf5' self.model_to_save.save(fmt % (epoch, logs['val_loss'])) num_gpu = len(get_available_gpus()) if num_gpu >= 2: with tf.device("/cpu:0"): # Load our model, added support for Multi-GPUs model = build_model() if pretrained_path is not None: model.load_weights(pretrained_path) new_model = multi_gpu_model(model, gpus=num_gpu) # rewrite the callback: saving through the original model and not the multi-gpu model. model_checkpoint = MyCbk(model) else: new_model = build_model() if pretrained_path is not None: new_model.load_weights(pretrained_path) # finetune the whole network together. for layer in new_model.layers: layer.trainable = True sgd = keras.optimizers.SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True) new_model.compile(optimizer=sgd, loss=alpha_prediction_loss) print(new_model.summary())
def build_generator(self): def conv_block(input_tensor, f): x = input_tensor x = Conv2D(f, kernel_size=3, strides=2, kernel_initializer=conv_init, use_bias=False, padding="same")(x) x = Activation("relu")(x) return x def res_block(input_tensor, f): x = input_tensor x = Conv2D(f, kernel_size=3, kernel_initializer=conv_init, use_bias=False, padding="same")(x) x = LeakyReLU(alpha=0.2)(x) x = Conv2D(f, kernel_size=3, kernel_initializer=conv_init, use_bias=False, padding="same")(x) x = add([x, input_tensor]) x = LeakyReLU(alpha=0.2)(x) return x def upscale_ps(filters, use_instance_norm=True): def block(x): x = Conv2D(filters*4, kernel_size=3, use_bias=False, kernel_initializer=RandomNormal(0, 0.02), padding='same')(x) x = LeakyReLU(0.1)(x) x = PixelShuffler()(x) return x return block def Encoder(nc_in=3, input_size=64): inp = Input(shape=(input_size, input_size, nc_in)) x = Conv2D(64, kernel_size=5, kernel_initializer=conv_init, use_bias=False, padding="same")(inp) x = conv_block(x,128) x = conv_block(x,256) x = conv_block(x,512) x = conv_block(x,1024) x = Dense(1024)(Flatten()(x)) x = Dense(4*4*1024)(x) x = Reshape((4, 4, 1024))(x) out = upscale_ps(512)(x) return Model(inputs=inp, outputs=out) def Decoder_ps(nc_in=512, input_size=8): input_ = Input(shape=(input_size, input_size, nc_in)) x = input_ x = upscale_ps(256)(x) x = upscale_ps(128)(x) x = upscale_ps(64)(x) x = res_block(x, 64) x = res_block(x, 64) #x = Conv2D(4, kernel_size=5, padding='same')(x) alpha = Conv2D(1, kernel_size=5, padding='same', activation="sigmoid")(x) rgb = Conv2D(3, kernel_size=5, padding='same', activation="tanh")(x) out = concatenate([alpha, rgb]) return Model(input_, out ) encoder = Encoder() decoder_A = Decoder_ps() decoder_B = Decoder_ps() x = Input(shape=self.img_shape) netGA = Model(x, decoder_A(encoder(x))) netGB = Model(x, decoder_B(encoder(x))) self.netGA_sm = netGA self.netGB_sm = netGB try: netGA.load_weights(str(self.model_dir / hdf['netGAH5'])) netGB.load_weights(str(self.model_dir / hdf['netGBH5'])) print ("Generator models loaded.") except: print ("Generator weights files not found.") pass if self.gpus > 1: netGA = multi_gpu_model( self.netGA_sm , self.gpus) netGB = multi_gpu_model( self.netGB_sm , self.gpus) return netGA, netGB
def fit_model(X_train, y_train, X_val, y_val, G): epochs = 5 es_patience = 5 lr_patience = 3 dropout1 = None depth = 25 #40 nb_dense_block = 3 nb_filter = 18 growth_rate = 18 lr = 3E-1 weight_file = 'keras_densenet_simple_wt_30Sept.h5' bn = True reduction_ = 0.5 nb_classes = 1 img_dim = (2, 96, 96) n_channels = 2 model = DenseNet(depth=depth, nb_dense_block=nb_dense_block, growth_rate=growth_rate, nb_filter=nb_filter, dropout_rate=dropout1, activation='sigmoid', input_shape=img_dim, include_top=True, bottleneck=bn, reduction=reduction_, classes=nb_classes, pooling='avg', weights=None) model.summary() opt = Adam(lr=lr) parallel_model = multi_gpu_model(model, gpus=G) parallel_model.compile(loss=binary_crossentropy, optimizer=Adadelta(), metrics=['accuracy']) es = EarlyStopping(monitor='val_loss', patience=es_patience, verbose=1) #es = EarlyStopping(monitor='val_acc', patience=es_patience,verbose=1,restore_best_weights=True) checkpointer = ModelCheckpoint(filepath=weight_file, verbose=1, save_best_only=True) lr_reducer = ReduceLROnPlateau(monitor='val_loss', factor=np.sqrt(0.1), cooldown=0, patience=lr_patience, min_lr=0.5e-6, verbose=1) parallel_model.fit(X_train, y_train, batch_size=64 * G, epochs=epochs, callbacks=[es, lr_reducer, checkpointer], validation_data=(X_val, y_val), verbose=2) score, acc = parallel_model.evaluate(X_val, y_val) print('current Test accuracy:', acc) pred = parallel_model.predict(X_val) auc_score = roc_auc_score(y_val, pred) print("current auc_score ------------------> ", auc_score) """model = load_model(weight_file) #This is the best model score, acc = model.evaluate(X_val, y_val) print('Best saved model Test accuracy:', acc) pred = model.predict(X_val) auc_score = roc_auc_score(y_val,pred) print("best saved model auc_score ------------------> ",auc_score)""" return auc_score, parallel_model
input_shape=input_image_size + (3, ), activation='relu')) _model.add(MaxPooling2D(pool_size=(2, 2))) _model.add(Conv2D(32, (3, 3), activation='relu')) _model.add(MaxPooling2D(pool_size=(2, 2))) _model.add(Conv2D(64, (3, 3), activation='relu')) _model.add(MaxPooling2D(pool_size=(2, 2))) _model.add(Flatten()) _model.add(Dense(64, activation='relu')) _model.add(Dropout(0.5)) _model.add(Dense(1, activation='sigmoid')) model = multi_gpu_model(_model, gpus=NGPUS) model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy']) print(model.summary()) # ### Learning epochs = 20 history = model.fit_generator(train_generator, steps_per_epoch=nimages_train // batch_size, epochs=epochs, validation_data=validation_generator,
tune['l2_ratio']))(x) else: output = Dense( 1, activation=def_params['out_activation'], kernel_regularizer=keras.regularizers.l2( tune['l2_ratio']))(x) optimizer = keras.optimizers.Adam(lr=tune['learning_rate']) model = Model(inputs=[img_input, clin_input], outputs=[output]) model.compile(loss=def_params['loss_func'], optimizer=optimizer) parallel_model = multi_gpu_model(model, 2) parallel_model.compile(loss=def_params['loss_func'], optimizer=optimizer) e_stop = EarlyStopping( monitor='val_loss', min_delta=def_params['min_delta'], patience=def_params['iter_patience'], mode='auto') callbacks = [e_stop] print('Training for comb %i with parameters %s' % (comb, str(tune))) start = time.time() history = model.fit(
def create_model_bottleneck(input_shape, anchors, num_classes, freeze_body=2, weights_path=None, nb_gpu=1): """create the training model""" # K.clear_session() # get a new session cnn_h, cnn_w = input_shape image_input = Input(shape=(cnn_w, cnn_h, 3)) num_anchors = len(anchors) y_true = [Input(shape=(cnn_h // {0: 32, 1: 16, 2: 8}[l], cnn_w // {0: 32, 1: 16, 2: 8}[l], num_anchors // 3, num_classes + 5)) for l in range(3)] _LOSS_ARGUMENTS = { 'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5 } if not nb_gpu: # disable all GPUs os.environ["CUDA_VISIBLE_DEVICES"] = "-1" model_body = yolo_body_full(image_input, num_anchors // 3, num_classes) logging.info('Create YOLOv3 model with %i anchors and %i classes.', num_anchors, num_classes) if weights_path is not None: weights_path = update_path(weights_path) if os.path.isfile(weights_path): logging.warning('missing weights: %s', weights_path) else: logging.info('Load weights %s.', weights_path) model_body.load_weights(weights_path, by_name=True, skip_mismatch=True) if freeze_body in [1, 2]: # Freeze darknet53 body or freeze all but 3 output layers. num = (185, len(model_body.layers) - 3)[freeze_body - 1] for i in range(num): model_body.layers[i].trainable = False logging.info('Freeze the first %i layers of total %i layers.', num, len(model_body.layers)) # get output of second last layers and create bottleneck model of it out1 = model_body.layers[246].output out2 = model_body.layers[247].output out3 = model_body.layers[248].output model_bottleneck = Model([model_body.input, *y_true], [out1, out2, out3]) # create last layer model of last layers from yolo model in0 = Input(shape=model_bottleneck.output[0].shape[1:].as_list()) in1 = Input(shape=model_bottleneck.output[1].shape[1:].as_list()) in2 = Input(shape=model_bottleneck.output[2].shape[1:].as_list()) last_out0 = model_body.layers[249](in0) last_out1 = model_body.layers[250](in1) last_out2 = model_body.layers[251](in2) model_last = Model(inputs=[in0, in1, in2], outputs=[last_out0, last_out1, last_out2]) fn_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss', arguments=_LOSS_ARGUMENTS) model_loss_last = fn_loss([*model_last.output, *y_true]) last_layer_model = Model([in0, in1, in2, *y_true], model_loss_last) fn_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss', arguments=_LOSS_ARGUMENTS) model_loss = fn_loss([*model_body.output, *y_true]) model = Model([model_body.input, *y_true], model_loss) if nb_gpu >= 2: model = multi_gpu_model(model, gpus=nb_gpu) model_bottleneck = multi_gpu_model(model_bottleneck, gpus=nb_gpu) return model, model_bottleneck, last_layer_model
def build_model(self, img_shape=(32, 168, 168), learning_rate=5e-5, gpu_id=None, nb_gpus=None, trained_model=None, temp=1): input_img = Input(img_shape, name='img_inp') unsupervised_label = Input((img_shape[0], img_shape[1], 2), name='unsup_label_inp') supervised_flag = Input((img_shape[0], img_shape[1]), name='flag_inp') kernel_init = 'he_normal' sfs = 16 # start filter size bn = True do = True conv1, conv1_b_m = self.downLayer(input_img, sfs, 1, bn) conv2, conv2_b_m = self.downLayer(conv1, sfs * 2, 2, bn) conv3 = Conv2D(sfs * 4, (3, 3), activation='relu', padding='same', kernel_initializer=kernel_init, name='conv' + str(3) + '_1')(conv2) if bn: conv3 = BatchNormalization()(conv3) conv3 = Conv2D(sfs * 8, (3, 3), activation='relu', padding='same', kernel_initializer=kernel_init, name='conv' + str(3) + '_2')(conv3) if bn: conv3 = BatchNormalization()(conv3) pool3 = MaxPooling2D(pool_size=(2, 2))(conv3) conv4 = Conv2D(sfs * 16, (3, 3), activation='relu', padding='same', kernel_initializer=kernel_init, name='conv4_1')(pool3) if bn: conv4 = BatchNormalization()(conv4) if do: conv4 = Dropout(0.5, seed=4, name='Dropout_' + str(4))(conv4, training=True) conv4 = Conv2D(sfs * 16, (3, 3), activation='relu', padding='same', kernel_initializer=kernel_init, name='conv4_2')(conv4) if bn: conv4 = BatchNormalization()(conv4) # conv5 = upLayer(conv4, conv3_b_m, sfs*16, 5, bn, do) up1 = Conv2DTranspose(sfs * 16, (2, 2), strides=(2, 2), activation='relu', padding='same', name='up' + str(5))(conv4) up1 = concatenate([up1, conv3]) conv5 = Conv2D(int(sfs * 8), (3, 3), activation='relu', padding='same', kernel_initializer=kernel_init, name='conv' + str(5) + '_1')(up1) if bn: conv5 = BatchNormalization()(conv5) if do: conv5 = Dropout(0.5, seed=5, name='Dropout_' + str(5))(conv5, training=True) conv5 = Conv2D(int(sfs * 8), (3, 3), activation='relu', padding='same', kernel_initializer=kernel_init, name='conv' + str(5) + '_2')(conv5) if bn: conv5 = BatchNormalization()(conv5) conv6 = self.upLayer(conv5, conv2_b_m, sfs * 8, 6, bn, do) conv7 = self.upLayer(conv6, conv1_b_m, sfs * 4, 7, bn, do) conv_out = Conv2D(2, (1, 1), activation='softmax', name='conv_final')(conv7) bg_out = Lambda(lambda x: x[:, :, :, 0], name='bg')(conv_out) skin_out = Lambda(lambda x: x[:, :, :, 1], name='skin')(conv_out) bg_ensemble_pred = Lambda(lambda x: x[:, :, :, 0], name='bgu')( unsupervised_label) skin_ensemble_pred = Lambda(lambda x: x[:, :, :, 1], name='skinu')( unsupervised_label) bg = K.stack([bg_ensemble_pred, supervised_flag]) skin = K.stack([skin_ensemble_pred, supervised_flag]) optimizer = AdamWithWeightnorm(lr=learning_rate, beta_1=0.9, beta_2=0.999) #optimizer = Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999) if (nb_gpus is None): p_model = Model([input_img, unsupervised_label, supervised_flag], [bg_out, skin_out]) if trained_model is not None: p_model.load_weights(trained_model) p_model.compile(optimizer=optimizer, loss={'bg': self.semi_supervised_loss(bg), 'skin': self.semi_supervised_loss(skin)}, metrics={ 'bg': [self.dice_coef, self.unsup_dice_tb(bg), self.dice_tb(bg)], 'skin': [self.dice_coef, self.unsup_dice_tb(skin), self.dice_tb(skin)], } ) else: with tf.device(gpu_id): model = Model([input_img, unsupervised_label, supervised_flag], [conv_out]) if trained_model is not None: model.load_weights(trained_model) p_model = multi_gpu_model(model, gpus=nb_gpus) p_model.compile(optimizer=optimizer, loss={'bg': self.semi_supervised_loss(bg, unsup_loss_class_wt=1), 'skin': self.semi_supervised_loss(skin, 1)}, metrics={ 'bg': [self.dice_coef, self.unsup_dice_tb(bg, 1), self.dice_tb(bg, 1)], 'skin': [self.dice_coef, self.unsup_dice_tb(skin, 1), self.dice_tb(skin, 1)], } ) return p_model
def train_on_texts(self, texts, context_labels=None, batch_size=128, num_epochs=50, verbose=1, new_model=False, gen_epochs=1, train_size=1.0, max_gen_length=300, validation=True, dropout=0.0, via_new_model=False, save_epochs=0, multi_gpu=False, **kwargs): if new_model and not via_new_model: self.train_new_model(texts, context_labels=context_labels, num_epochs=num_epochs, gen_epochs=gen_epochs, train_size=train_size, batch_size=batch_size, dropout=dropout, validation=validation, save_epochs=save_epochs, multi_gpu=multi_gpu, **kwargs) return if context_labels: context_labels = LabelBinarizer().fit_transform(context_labels) if 'prop_keep' in kwargs: train_size = prop_keep if self.config['word_level']: texts = [text_to_word_sequence(text, filters='') for text in texts] # calculate all combinations of text indices + token indices indices_list = [ np.meshgrid(np.array(i), np.arange(len(text) + 1)) for i, text in enumerate(texts) ] # indices_list = np.block(indices_list) # this hangs when indices_list is large enough # FIX BEGIN ------ indices_list_o = np.block(indices_list[0]) for i in range(len(indices_list) - 1): tmp = np.block(indices_list[i + 1]) indices_list_o = np.concatenate([indices_list_o, tmp]) indices_list = indices_list_o # FIX END ------ # If a single text, there will be 2 extra indices, so remove them # Also remove first sequences which use padding if self.config['single_text']: indices_list = indices_list[self.config['max_length']:-2, :] indices_mask = np.random.rand(indices_list.shape[0]) < train_size if multi_gpu: num_gpus = len(K.tensorflow_backend._get_available_gpus()) batch_size = batch_size * num_gpus gen_val = None val_steps = None if train_size < 1.0 and validation: indices_list_val = indices_list[~indices_mask, :] gen_val = generate_sequences_from_texts(texts, indices_list_val, self, context_labels, batch_size) val_steps = max( int(np.floor(indices_list_val.shape[0] / batch_size)), 1) indices_list = indices_list[indices_mask, :] num_tokens = indices_list.shape[0] assert num_tokens >= batch_size, "Fewer tokens than batch_size." level = 'word' if self.config['word_level'] else 'character' print("Training on {:,} {} sequences.".format(num_tokens, level)) steps_per_epoch = max(int(np.floor(num_tokens / batch_size)), 1) gen = generate_sequences_from_texts(texts, indices_list, self, context_labels, batch_size) base_lr = 4e-3 # scheduler function must be defined inline. def lr_linear_decay(epoch): return (base_lr * (1 - (epoch / num_epochs))) if context_labels is not None: if new_model: weights_path = None else: weights_path = "{}_weights.hdf5".format(self.config['name']) self.save(weights_path) self.model = textgenrnn_model(self.num_classes, dropout=dropout, cfg=self.config, context_size=context_labels.shape[1], weights_path=weights_path) model_t = self.model if multi_gpu: # Do not locate model/merge on CPU since sample sizes are small. parallel_model = multi_gpu_model(self.model, gpus=num_gpus, cpu_merge=False) parallel_model.compile(loss='categorical_crossentropy', optimizer=RMSprop(lr=4e-3, rho=0.99)) model_t = parallel_model print("Training on {} GPUs.".format(num_gpus)) model_t.fit_generator(gen, steps_per_epoch=steps_per_epoch, epochs=num_epochs, callbacks=[ LearningRateScheduler(lr_linear_decay), generate_after_epoch(self, gen_epochs, max_gen_length), save_model_weights(self, num_epochs, save_epochs) ], verbose=verbose, max_queue_size=10, validation_data=gen_val, validation_steps=val_steps) # Keep the text-only version of the model if using context labels if context_labels is not None: self.model = Model(inputs=self.model.input[0], outputs=self.model.output[1])
def train_single_model(train_files, valid_files): import keras.backend as K from keras.callbacks import EarlyStopping, ModelCheckpoint, CSVLogger, ReduceLROnPlateau from keras.optimizers import Adam, SGD from keras.losses import mean_squared_error from keras.models import load_model import tensorflow as tf from keras.utils import multi_gpu_model image_classes = get_classes_for_images_dict() tuning_test_images, tuning_test_labels = get_tuning_labels_data_for_validation( 299) gen = ImageDataGenerator( horizontal_flip=True, vertical_flip=True, width_shift_range=0.15, height_shift_range=0.15, channel_shift_range=0.15, shear_range=0.15, zoom_range=0.15, rotation_range=15, preprocessing_function=inception_resnet_preprocess_input) #restore = 1 patience = 50 epochs = 1000 optim_type = 'Adam' learning_rate = 0.001 cnn_type = 'inception_resnet_v2' #num_gpus = 2 print('Creating and compiling {}...'.format(cnn_type)) print("Using {} gpus. ".format(2)) val_data_path = '/home/ec2-user/.inclusive/train/val_images/' print("Num of val images: ", len(os.listdir(val_data_path)), '\n') ##### Prepare the model ##### #model = get_model_resnet50_336() #model = get_model_densenet() model = get_model_inception_resnet_v2() if optim_type == 'SGD': optim = SGD(lr=learning_rate, decay=1e-6, momentum=0.9, nesterov=True) else: optim = Adam(lr=learning_rate) model = multi_gpu_model(model, gpus=2) model.compile(optimizer=optim, loss='binary_crossentropy', metrics=[f2beta_loss, fbeta]) ##### Prepare the model ##### cache_model_path = MODELS_PATH + '{}_temp.h5'.format(cnn_type) final_model_path = MODELS_PATH + '{}.h5'.format(cnn_type) if os.path.isfile(final_model_path): print('Model already exists {}.'.format(final_model_path)) if FLAGS.train_new_model: if os.path.isdir(MODELS_PATH): print("Removing previous models if any. ") shutil.rmtree(MODELS_PATH) if not os.path.isdir(MODELS_PATH): os.mkdir(MODELS_PATH) if not os.path.isdir(HISTORY_FOLDER_PATH): os.mkdir(HISTORY_FOLDER_PATH) if not FLAGS.train_new_model: if os.path.isfile(final_model_path): print('Load model from last point: ', final_model_path) #model = load_model(final_model_path, custom_objects={'f2beta_loss': f2beta_loss, 'fbeta': fbeta}) model.load_weights(final_model_path) else: print("\nCouldn't find previously trained models. Exit. ") return 0 else: print("\nStarted to train new model. \n") np.random.seed(10) valid_files = np.random.choice(valid_files, 4000) print(valid_files[:4]) print('Fitting model...') batch_size = 48 print('Batch size: {}'.format(batch_size)) steps_per_epoch = 96000 // batch_size validation_steps = len(valid_files) // (batch_size) print('Steps train: {}, Steps valid: {}'.format(steps_per_epoch, validation_steps)) gen_flow = gen.flow_from_directory(DATASET_PATH, target_size=(299, 299), batch_size=batch_size, class_mode='multilabel', multilabel_classes=image_classes, n_class=7178) callbacks = [ EarlyStopping(monitor='val_loss', patience=patience, verbose=1), MyModelCheckpoint(cache_model_path[:-7] + 'latest.h5', monitor='val_fbeta', mode='max', save_weights_only=True, save_best_only=True, verbose=0), #MyModelCheckpoint(final_model_path, monitor='val_fbeta', mode='max', save_weights_only=True, save_best_only=True, verbose=1), # ModelCheckpoint(cache_model_path[:-3] + '_{epoch:02d}.h5', monitor='val_fbeta', mode='max', verbose=0), CSVLogger(HISTORY_FOLDER_PATH + 'history_{}_lr_{}_optim_{}.csv'.format( cnn_type, learning_rate, optim_type), append=True), ReduceLROnPlateau(monitor='val_loss', factor=0.9, patience=5, min_lr=1e-9, min_delta=0.00001, verbose=0, mode='min'), # CyclicLR(base_lr=0.0001, max_lr=0.001, step_size=1000) ModelCheckpoint_F2Score( cache_model_path[:-3] + '_byTestScore_{epoch:02d}.h5', save_best_only=True, save_weights_only=True, mode='max', patience=patience, verbose=0, validation_data=(tuning_test_images, tuning_test_labels)), ] history = model.fit_generator(generator=gen_flow, epochs=epochs, steps_per_epoch=steps_per_epoch, validation_data=batch_generator_val( np.array(list(valid_files)), image_classes, batch_size, val_data_path), validation_steps=validation_steps, verbose=2, max_queue_size=10, initial_epoch=0, callbacks=callbacks) min_loss = min(history.history['val_loss']) print('Minimum loss: {} [Ep: {}]'.format(min_loss, len(history.history['val_loss']))) #model.load_weights(cache_model_path) #model.save(final_model_path) now = datetime.datetime.now() filename = HISTORY_FOLDER_PATH + 'history_{}_{:.4f}_lr_{}_{}.csv'.format( cnn_type, min_loss, learning_rate, now.strftime("%Y-%m-%d-%H-%M")) pd.DataFrame(history.history).to_csv(filename, index=False) save_history_figure(history, filename[:-4] + '.png') # del model # K.clear_session() return min_loss
num_epochs = args.num_epochs print(">>> num_epochs received by trial") print(num_epochs) num_gpus = args.num_gpus print(">>> num_gpus received by trial:") print(num_gpus) print("\n>>> Constructing Model...") constructor = ModelConstructor(arch, nn_config) test_model = constructor.build_model() print(">>> Model Constructed Successfully\n") if num_gpus > 1: test_model = multi_gpu_model(test_model, gpus=num_gpus) test_model.summary() test_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(lr=1e-3, decay=1e-4), metrics=['accuracy']) (x_train, y_train), (x_test, y_test) = cifar10.load_data() x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 x_test /= 255 y_train = to_categorical(y_train) y_test = to_categorical(y_test) augmentation = ImageDataGenerator(width_shift_range=0.1,
x = Dropout(dropout)(x) x = Dense(units=256)(x) x = BatchNormalization()(x) x = LeakyReLU(leaky)(x) x = Dropout(dropout)(x) x = Dense(units=1)(x) predictions = Activation('sigmoid')(x) inputs = [seq_input, z1_input] model = Model(inputs=inputs, outputs=predictions) pmodel = multi_gpu_model(model, gpus=ngpu) # pmodel = model pmodel.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy', ef.fbeta_score]) pmodel.summary() if existing_model: pmodel.load_weights(existing_model) ############## Training early_stopping = EarlyStopping(monitor='val_loss', patience=4) checkpoint = ModelCheckpoint(output_best_model, monitor='val_loss',
def create_stacked_auto_encoder_model(time_steps, dropout_rate, l2_reg_rate, level1_units, level2_units): """ Arguments: Returns: """ with tf.device('/cpu:0'): input_layer = Input(shape=(time_steps, features), batch_shape=(None, time_steps, features)) # encoder 1 encoding_layer_1 = Dense( level1_units, activation='relu', activity_regularizer=regularizers.l2(l2_reg_rate), kernel_initializer='glorot_normal')(input_layer) encoding_layer_1 = Dropout(dropout_rate)(encoding_layer_1) # encoder 2 encoding_layer_2 = Dense( level2_units, activation='relu', activity_regularizer=regularizers.l2(l2_reg_rate), kernel_initializer='glorot_normal')(encoding_layer_1) encoding_layer_2 = Dropout(dropout_rate)(encoding_layer_2) # encoder 3 encoding_layer_3 = Dense( encoded_features, activation='relu', activity_regularizer=regularizers.l2(l2_reg_rate), kernel_initializer='glorot_normal')(encoding_layer_2) encoding_layer_3 = Dropout(dropout_rate)(encoding_layer_3) # decoder 1 decoding_layer_1 = Dense( level2_units, activation='relu', activity_regularizer=regularizers.l2(l2_reg_rate), kernel_initializer='glorot_normal')(encoding_layer_3) decoding_layer_1 = Dropout(dropout_rate)(decoding_layer_1) # decoder 2 decoding_layer_2 = Dense( level1_units, activation='relu', activity_regularizer=regularizers.l2(l2_reg_rate), kernel_initializer='glorot_normal')(decoding_layer_1) decoding_layer_2 = Dropout(dropout_rate)(decoding_layer_2) # decoded decoded = Dense(features, activation='sigmoid', kernel_initializer='normal')(decoding_layer_2) # define model model = Model(inputs=input_layer, outputs=decoded) encoding_model = Model(inputs=input_layer, outputs=encoding_layer_3) # GPU paralellization does not work for stateful and batch size 1 # Replicates the model on the number of given GPUs. # This assumes that the machine has that specified number of available GPUs. model = multi_gpu_model(model, gpus=number_of_gpus) optimizer = Adam() model.compile(optimizer=optimizer, loss='mse') model.summary() return model, encoding_model
def __init__(self, ser_model, **kwargs): pmodel = multi_gpu_model(ser_model, **kwargs) self.__dict__.update(pmodel.__dict__) self._smodel = ser_model
def multi_gpu_application_folder_generator_benchmark(): """Before running this test: wget https://s3.amazonaws.com/img-datasets/cats_and_dogs_small.zip unzip cats_and_dogs_small.zip """ print('####### Xception benchmark - folder generator i/o') model_cls = keras.applications.Xception height = 150 width = 150 num_classes = 2 epochs = 3 steps_per_epoch = 100 batch_size = 64 # Baseline model = model_cls(weights=None, input_shape=(height, width, 3), classes=num_classes) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') datagen = ImageDataGenerator(rotation_range=40, width_shift_range=0.2, height_shift_range=0.2, shear_range=0.2, zoom_range=0.2, horizontal_flip=True, fill_mode='nearest') train_dir = '/home/ubuntu/cats_and_dogs_small/train' # Change this train_gen = datagen.flow_from_directory(train_dir, target_size=(height, width), batch_size=batch_size, class_mode='categorical') # Training start_time = time.time() model.fit_generator(train_gen, steps_per_epoch=steps_per_epoch, epochs=epochs, workers=4) total_time = time.time() - start_time print('baseline training:', total_time) for i in range(2, 9): K.clear_session() with tf.device('/cpu:0'): model = model_cls(weights=None, input_shape=(height, width, 3), classes=num_classes) parallel_model = multi_gpu_model(model, gpus=i) parallel_model.compile(loss='categorical_crossentropy', optimizer='rmsprop') train_gen = datagen.flow_from_directory(train_dir, target_size=(height, width), batch_size=batch_size, class_mode='categorical') start_time = time.time() parallel_model.fit_generator(train_gen, steps_per_epoch=steps_per_epoch, epochs=epochs, workers=4 * i) total_time = time.time() - start_time print('%d gpus training:' % i, total_time)
nb_epoch = 30 regul= 1e-6 Y_train = np_utils.to_categorical(y_train, nb_classes) Y_test = np_utils.to_categorical(y_test, nb_classes) do= 0.2 with tf.device('/cpu:0'): model = Sequential() model.add(Dense(500,input_shape=(X_train.shape[1],))) #model.add(Dense(784, 128)) model.add(Activation('tanh')) model.add(Dropout(do)) model.add(Dense(256)) model.add(Activation('relu')) model.add(Dense(64)) model.add(Activation('relu')) model.add(Dropout(do)) model.add(Dense(10)) model.add(Activation('softmax')) rms= keras.optimizers.rmsprop(lr=llrate, decay=regul) adm= keras.optimizers.Adam(lr=llrate, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=regul) model.compile(loss='mean_squared_error', optimizer=adm) parallel_model = multi_gpu_model(model, gpus=8) parallel_model.compile(loss='mean_squared_error', optimizer=adm) parallel_model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=2, validation_data=(X_test, Y_test), callbacks=[tensorboard]) score = model.evaluate(X_test, Y_test, verbose=1) print('Test score:', score) model.save('cifarus10.h5')
def generate(self): model_path = os.path.expanduser(self.model_path) assert model_path.endswith(".h5"), "Keras model or weights must be a .h5 file." # Load model, or construct model and load weights. start = timer() num_anchors = len(self.anchors) num_classes = len(self.class_names) is_tiny_version = num_anchors == 6 # default setting try: self.yolo_model = load_model(model_path, compile=False) except: self.yolo_model = ( tiny_yolo_body( Input(shape=(None, None, 3)), num_anchors // 2, num_classes ) if is_tiny_version else yolo_body( Input(shape=(None, None, 3)), num_anchors // 3, num_classes ) ) self.yolo_model.load_weights( self.model_path ) # make sure model, anchors and classes match else: assert self.yolo_model.layers[-1].output_shape[-1] == num_anchors / len( self.yolo_model.output ) * ( num_classes + 5 ), "Mismatch between model and given anchor and class sizes" end = timer() print( "{} model, anchors, and classes loaded in {:.2f}sec.".format( model_path, end - start ) ) # Generate colors for drawing bounding boxes. if len(self.class_names) == 1: self.colors = ["GreenYellow"] else: hsv_tuples = [ (x / len(self.class_names), 1.0, 1.0) for x in range(len(self.class_names)) ] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map( lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors, ) ) np.random.seed(10101) # Fixed seed for consistent colors across runs. np.random.shuffle( self.colors ) # Shuffle colors to decorrelate adjacent classes. np.random.seed(None) # Reset seed to default. # Generate output tensor targets for filtered bounding boxes. self.input_image_shape = K.placeholder(shape=(2,)) if self.gpu_num >= 2: self.yolo_model = multi_gpu_model(self.yolo_model, gpus=self.gpu_num) boxes, scores, classes = yolo_eval( self.yolo_model.output, self.anchors, len(self.class_names), self.input_image_shape, score_threshold=self.score, iou_threshold=self.iou, ) return boxes, scores, classes
def _build_regresser(model_name, weights, num_gpu, fine_tune): input_shape = (192, 192, 3) if model_name == "inception_v3": base_model = InceptionV3(input_shape=input_shape, weights=weights, include_top=False) elif model_name == "inception_resnet_v2": base_model = InceptionResNetV2(input_shape=input_shape, weights=weights, include_top=False) elif model_name == "xception": base_model = Xception(input_shape=input_shape, weights=weights, include_top=False) else: raise ValueError("NOT A SUPPORT MODEL") if model_name == "inception_v3": if fine_tune == INCEPTION_V3_INCEPTION_3: start = INCEPTION_V3_INCEPTION_3_START elif fine_tune == INCEPTION_V3_INCEPTION_4: start = INCEPTION_V3_INCEPTION_4_START elif fine_tune == INCEPTION_V3_INCEPTION_5: start = INCEPTION_V3_INCEPTION_5_START elif fine_tune == FINE_TUNE_ALL: start = -1 elif model_name == "inception_resnet_v2": if fine_tune == INCEPTION_RESNET_V2_INCEPTION_A: start = INCEPTION_RESNET_V2_INCEPTION_A_START elif fine_tune == INCEPTION_RESNET_V2_INCEPTION_B: start = INCEPTION_RESNET_V2_INCEPTION_B_START elif fine_tune == INCEPTION_RESNET_V2_INCEPTION_C: start = INCEPTION_RESNET_V2_INCEPTION_C_START elif fine_tune == FINE_TUNE_ALL: start = -1 elif model_name == "xception": if fine_tune == XCEPTION_ENTRY: start = XCEPTION_ENTRY_START elif fine_tune == XCEPTION_MID: start = XCEPTION_MID_START elif fine_tune == XCEPTION_EXIT: start = XCEPTION_EXIT_START elif fine_tune == FINE_TUNE_ALL: start = -1 else: raise ValueError("NOT A SUPPORT MODEL") for i, layer in enumerate(base_model.layers): if i < start: layer.trainable = False else: layer.trainable = True x = base_model.output x = GlobalAveragePooling2D()(x) predictions = Dense(1, activation=keras.activations.relu)(x) print predictions.get_shape() model = Model(inputs=base_model.input, outputs=predictions) optimizer = optimizers.RMSprop(lr=0.005, decay=0.95) if num_gpu > 1: model = multi_gpu_model(model, num_gpu) print "[x] compile model on %d GPU(s)" % num_gpu model.compile(optimizer=optimizer, loss='mean_absolute_error') return model, input_shape, base_model
def get_model(self): """Create the 3D CNN Model for Emphnet""" model = Sequential() # Convolutional Group 1 model.add(Conv3D(64, (3, 3, 3), name='conv1', strides=2, padding='same', input_shape=(128, 128, 128, 1))) #(64, 64, 64, 64) model.add(BatchNormalization(name='batch_norm1')) model.add(Activation('relu', name='relu1')) model.add(MaxPooling3D(name='pool1', pool_size=(2, 2, 2))) # (32, 32, 32, 64) # Convolution Group 2 model.add(Conv3D(128, (3, 3, 3), name='conv2', padding='same')) #(32, 32, 32, 128) model.add(BatchNormalization(name='batch_norm2')) model.add(Activation('relu', name='relu2')) model.add(MaxPooling3D(name='pool2', pool_size=(2, 2, 2))) #(16, 16, 16, 128) # Convolution Group 3 model.add(Conv3D(256, (3, 3, 3), name='conv3', padding='same')) #(16, 16, 16, 256) model.add(BatchNormalization(name='batch_norm3')) model.add(Activation('relu', name='relu3')) model.add(MaxPooling3D(name='pool3', pool_size=(2, 2, 2))) #(8, 8, 8, 256) # Convolution Group 4 model.add(Conv3D(512, (3, 3, 3), name='conv4', padding='same')) #(8, 8, 8, 512) model.add(BatchNormalization(name='batch_norm4')) model.add(Activation('relu', name='relu4')) model.add(MaxPooling3D(name='pool4', pool_size=(2, 2, 2))) #(4, 4, 4, 512) # Convolution Group 5 model.add(Conv3D(512, (3, 3, 3), name='conv5', padding='same')) #(4, 4, 4, 512) model.add(BatchNormalization(name='batch_norm5')) model.add(Activation('relu', name='relu5')) model.add(MaxPooling3D(name='pool5', pool_size=(2, 2, 2))) #(2, 2, 2, 512) # Convolution Group 6 model.add(Conv3D(512, (2, 2, 2), name='conv6', padding='valid')) #(1, 1, 1, 512) model.add(BatchNormalization(name='batch_norm6')) model.add(Activation('relu', name='relu6')) model.add(Dropout(0.5, name='dropout')) # Fully Connected & Output model.add(Conv3D(512, (1, 1, 1), name='fc1', activation='relu')) #(1, 1, 1, 512) model.add(Dropout(0.5, name='dropout_fc1')) model.add(Conv3D(512, (1, 1, 1), name='fc2', activation='relu')) #(1, 1, 1, 512) model.add(Dropout(0.5, name='dropout_fc2')) model.add(Flatten(name='flatten')) model.add(Dense(2, name='class', activation='softmax')) #(2) print(model.summary()) # Compile model model = multi_gpu_model(model, gpus=4) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model
elif epoch > 80: lr *= 1e-1 print('Learning rate: ', lr) return lr with tf.device('/cpu:0'): base_model = ResNet50(weights=None, include_top=False, input_shape=(32, 32, 3), pooling=None) model = models.Sequential() model.add(base_model) model.add(layers.Flatten()) model.add(layers.Dense(10, activation='softmax')) parallel_model = multi_gpu_model(model, gpus=num_gpu, cpu_merge=True) parallel_model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=lr_schedule(0)), metrics=['accuracy']) #model.fit(x_train, y_train, epochs=1, batch_size=20, validation_data=(x_test, y_test)) #model.summary() print(model_type) #pdb.set_trace() # Prepare model saving directory. save_dir = os.path.join(os.getcwd(), 'saved_models') model_name = 'my_test_resnet50_cifar_8gpu_model.{epoch:03d}.h5'
def main(): # fix random seed for reproducibility numpy.random.seed(7) # Get CLI arguments parser = argparse.ArgumentParser() parser.add_argument( '--gpus', help='Number of GPUs to use.', type=int, default=1) args = parser.parse_args() gpus = args.gpus # load the dataset dataframe = DataFrame( [0.00000, 5.99000, 11.92016, 17.73121, 23.36510, 28.76553, 33.87855, 38.65306, 43.04137, 46.99961, 50.48826, 53.47244, 55.92235, 57.81349, 59.12698, 59.84970, 59.97442, 59.49989, 58.43086, 56.77801, 54.55785, 51.79256, 48.50978, 44.74231, 40.52779, 35.90833, 30.93008, 25.64279, 20.09929, 14.35496, 8.46720, 2.49484, -3.50245, -9.46474, -15.33247, -21.04699, -26.55123, -31.79017, -36.71147, -41.26597, -45.40815, -49.09663, -52.29455, -54.96996, -57.09612, -58.65181, -59.62146, -59.99540, -59.76988, -58.94716, -57.53546, -55.54888, -53.00728, -49.93605, -46.36587, -42.33242, -37.87600, -33.04113, -27.87613, -22.43260, -16.76493, -10.92975, -4.98536, 1.00883, 6.99295, 12.90720, 18.69248, 24.29100, 29.64680, 34.70639, 39.41920, 43.73814, 47.62007, 51.02620, 53.92249, 56.28000, 58.07518, 59.29009, 59.91260, 59.93648, 59.36149, 58.19339, 56.44383, 54.13031, 51.27593, 47.90923, 44.06383, 39.77815, 35.09503, 30.06125, 24.72711, 19.14590, 13.37339, 7.46727, 1.48653, -4.50907, -10.45961, -16.30564, -21.98875, -27.45215, -32.64127, -37.50424, -41.99248, -46.06115, -49.66959, -52.78175, -55.36653, -57.39810, -58.85617, -59.72618, -59.99941, -59.67316, -58.75066, -57.24115, -55.15971, -52.52713, -49.36972, -45.71902, -41.61151, -37.08823, -32.19438, -26.97885, -21.49376, -15.79391, -9.93625, -3.97931, 2.01738, 7.99392, 13.89059, 19.64847, 25.21002, 30.51969, 35.52441, 40.17419, 44.42255, 48.22707, 51.54971, 54.35728, 56.62174, 58.32045, 59.43644, 59.95856, 59.88160, 59.20632, 57.93947, 56.09370, 53.68747, 50.74481, 47.29512, 43.37288, 39.01727, 34.27181, 29.18392, 23.80443, 18.18710, 12.38805, 6.46522, 0.47779, -5.51441, -11.45151]) dataset = dataframe.values dataset = dataset.astype('float32') # normalize the dataset '''scaler = MinMaxScaler(feature_range=(0, 1)) dataset = scaler.fit_transform(dataset)''' # split into train and test sets train_size = int(len(dataset) * 0.67) test_size = len(dataset) - train_size train, test = dataset[0:train_size, :], dataset[train_size:len(dataset), :] # reshape into X=t and Y=t+1 look_back = 1 trainX, trainY = create_dataset(train, look_back) testX, testY = create_dataset(test, look_back) # reshape input to be [samples, time steps, features] trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1])) # create and fit the LSTM network with tf.device('/cpu:0'): serial_model = Sequential() serial_model.add(LSTM(4, input_shape=(1, look_back))) serial_model.add(Dense(1)) if gpus == 1: parallel_model = serial_model else: parallel_model = multi_gpu_model( serial_model, cpu_relocation=True, gpus=gpus) parallel_model.compile( loss='mean_squared_error', optimizer='adam') parallel_model.fit( trainX, trainY, epochs=100, batch_size=int(dataset.size * gpus / 20), verbose=2) # make predictions if gpus == 1: trainPredict = parallel_model.predict(trainX) testPredict = parallel_model.predict(testX) else: trainPredict = serial_model.predict(trainX) testPredict = serial_model.predict(testX) '''print(trainPredict.shape, testPredict.shape) print(trainY.shape, testY.shape) print(trainY) print('\n')''' trainY = trainY.reshape((1, trainY.shape[0])) testY = testY.reshape((1, testY.shape[0])) # invert predictions '''trainPredict = scaler.inverse_transform(trainPredict) trainY = scaler.inverse_transform([trainY]) testPredict = scaler.inverse_transform(testPredict) testY = scaler.inverse_transform([testY])''' '''print(trainPredict.shape, testPredict.shape) print(trainY.shape, testY.shape) # print(testY) sys.exit(0)''' # calculate root mean squared error trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:, 0])) print('Train Score: %.2f RMSE' % (trainScore)) testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:, 0])) print('Test Score: %.2f RMSE' % (testScore)) # shift train predictions for plotting trainPredictPlot = numpy.empty_like(dataset) trainPredictPlot[:, :] = numpy.nan trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict # shift test predictions for plotting testPredictPlot = numpy.empty_like(dataset) testPredictPlot[:, :] = numpy.nan testPredictPlot[ len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict # plot baseline and predictions # plt.plot(scaler.inverse_transform(dataset), label='Complete Data') plt.plot(dataset, label='Complete Data') plt.plot(trainPredictPlot, label='Training Data') plt.plot(testPredictPlot, label='Prediction Data') plt.legend(loc='upper left') plt.title('Using {} GPUs'.format(gpus)) plt.show()
def cnn_model(X_train, y_train, kernel_size, nb_filters, channels, nb_epoch, batch_size, nb_classes, nb_gpus): """ Define and run the Convolutional Neural Network INPUT X_train: Array of NumPy arrays X_test: Array of NumPy arrays y_train: Array of labels y_test: Array of labels kernel_size: Initial size of kernel nb_filters: Initial number of filters channels: Specify if the image is grayscale (1) or RGB (3) nb_epoch: Number of epochs batch_size: Batch size for the model nb_classes: Number of classes for classification OUTPUT Fitted CNN model """ model = Sequential() model.add(Conv2D(nb_filters, (kernel_size[0], kernel_size[1]), padding='valid', strides=1, input_shape=(img_rows, img_cols, channels), activation="relu")) model.add(Conv2D(nb_filters, (kernel_size[0], kernel_size[1]), activation="relu")) model.add(Conv2D(nb_filters, (kernel_size[0], kernel_size[1]), activation="relu")) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) print("Model flattened out to: ", model.output_shape) model.add(Dense(128)) model.add(Activation('sigmoid')) model.add(Dropout(0.25)) model.add(Dense(nb_classes)) model.add(Activation('softmax')) model = multi_gpu_model(model, gpus=nb_gpus) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) stop = EarlyStopping(monitor='val_acc', min_delta=0.001, patience=2, verbose=0, mode='auto') tensor_board = TensorBoard(log_dir='./Graph', histogram_freq=0, write_graph=True, write_images=True) model.fit(X_train, y_train, batch_size=batch_size, epochs=nb_epoch, verbose=1, validation_split=0.2, class_weight='auto', callbacks=[stop, tensor_board]) return model
# Instantiate the base model (or "template" model). # We recommend doing this with under a CPU device scope, # so that the model's weights are hosted on CPU memory. # Otherwise they may end up hosted on a GPU, which would # complicate weight sharing. #with tf.device('/cpu:0'): # model = ResNet50(weights=None, input_shape=(height, width, 3), classes=num_classes) # #model = Xception(weights=None, input_shape=(height, width, 3), classes=num_classes) model = ResNet50(weights=None, input_shape=(height, width, 3), classes=num_classes) keras.utils.print_summary(model, line_length=None, positions=None, print_fn=None) # Replicates the model on 8 GPUs. # This assumes that your machine has 8 available GPUs. parallel_model = multi_gpu_model(model, gpus=2, cpu_merge=False) parallel_model.compile(loss='categorical_crossentropy', optimizer='rmsprop') keras.utils.plot_model(model, to_file='model.png', show_shapes=False, show_layer_names=True, rankdir='TB' ) # Generate dummy data. x = np.random.random((num_samples, height, width, 3)) y = np.random.random((num_samples, num_classes)) # This `fit` call will be distributed on 8 GPUs. # Since the batch size is 256, each GPU will process 32 samples. parallel_model.fit(x, y, epochs=1, batch_size=128) # Save model via the template model (which shares the same weights): #model.save('my_model.h5')
# Instantiate the base model (or "template" model). # We recommend doing this with under a CPU device scope, # so that the model's weights are hosted on CPU memory. # Otherwise they may end up hosted on a GPU, which would # complicate weight sharing. print("Build model") with tf.device('/cpu:0'): model = keras.applications.Xception(weights=None, input_shape=(height, width, 3), classes=num_classes) # Replicates the model on 8 GPUs. # This assumes that your machine has 8 available GPUs. if n_gpus > 1: print("Replicate model on %d gpus" % (n_gpus)) parallel_model = multi_gpu_model(model, gpus=n_gpus) else: print("Using only 1 gpu") parallel_model = model parallel_model.compile(loss='categorical_crossentropy', optimizer='rmsprop') # Generate dummy data. print("Generate random data") x = np.random.random((num_samples, height, width, 3)) y = np.random.random((num_samples, num_classes)) # This `fit` call will be distributed on 8 GPUs. # Since the batch size is 256, each GPU will process 32 samples. print("Begin fitting") parallel_model.fit(x, y, epochs=2, batch_size=batch_size) # Save model via the template model (which shares the same weights):
def __init__(self, ser_model, gpus): pmodel = multi_gpu_model(ser_model, gpus) self.__dict__.update(pmodel.__dict__) self._smodel = ser_model
def multi_gpu_application_folder_generator_benchmark(): """Before running this test: wget https://s3.amazonaws.com/img-datasets/cats_and_dogs_small.zip unzip cats_and_dogs_small.zip """ print('####### Xception benchmark - folder generator i/o') model_cls = keras.applications.Xception height = 150 width = 150 num_classes = 2 epochs = 3 steps_per_epoch = 100 batch_size = 64 # Baseline model = model_cls(weights=None, input_shape=(height, width, 3), classes=num_classes) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') datagen = ImageDataGenerator( rotation_range=40, width_shift_range=0.2, height_shift_range=0.2, shear_range=0.2, zoom_range=0.2, horizontal_flip=True, fill_mode='nearest') train_dir = '/home/ubuntu/cats_and_dogs_small/train' # Change this train_gen = datagen.flow_from_directory( train_dir, target_size=(height, width), batch_size=batch_size, class_mode='categorical') # Training start_time = time.time() model.fit_generator(train_gen, steps_per_epoch=steps_per_epoch, epochs=epochs, workers=4) total_time = time.time() - start_time print('baseline training:', total_time) for i in range(2, 9): K.clear_session() with tf.device('/cpu:0'): model = model_cls(weights=None, input_shape=(height, width, 3), classes=num_classes) parallel_model = multi_gpu_model(model, gpus=i) parallel_model.compile(loss='categorical_crossentropy', optimizer='rmsprop') train_gen = datagen.flow_from_directory( train_dir, target_size=(height, width), batch_size=batch_size, class_mode='categorical') start_time = time.time() parallel_model.fit_generator( train_gen, steps_per_epoch=steps_per_epoch, epochs=epochs, workers=4 * i) total_time = time.time() - start_time print('%d gpus training:' % i, total_time)
# delete dmap model del dmap_model # name of file to save contour model to saved_model_filename = os.path.join( saved_models_dir, experiment_id + '_model_fold_' + str(i_fold) + '.h5') if gpu_number > 1: # compile and train model: Multiple GPUs # checkpoint to save model after each epoch checkpointer = cytometer.model_checkpoint_parallel.ModelCheckpoint( filepath=saved_model_filename, verbose=1, save_best_only=True) # compile model parallel_model = multi_gpu_model(contour_model, gpus=gpu_number) parallel_model.compile(loss={ 'classification_output': cytometer.utils.binary_focal_loss(alpha=.9, gamma=2) }, optimizer='Adadelta', metrics={'classification_output': 'accuracy'}, sample_weight_mode='element') # train model tic = datetime.datetime.now() hist = parallel_model.fit( train_dataset['im'], {'classification_output': train_dataset['contour']}, sample_weight={ 'classification_output': train_dataset['mask'][..., 0]
# - mean / std for i in range(3): x_train[:, :, :, i] = (x_train[:, :, :, i] - mean[i]) / std[i] x_val[:, :, :, i] = (x_val[:, :, :, i] - mean[i]) / std[i] x_test[:, :, :, i] = (x_test[:, :, :, i] - mean[i]) / std[i] # build network img_input = Input(shape=(IMG_ROWS, IMG_COLS, IMG_CHANNELS)) output = resnext(img_input, CLASS_NUM) resnet = Model(img_input, output) print(resnet.summary()) # set optimizer parallel_model = multi_gpu_model(resnet, gpus=2) sgd = optimizers.SGD(lr=.1, momentum=0.9, nesterov=True) parallel_model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) # set callback tb_cb = TensorBoard(log_dir='./resnext/', histogram_freq=0) change_lr = LearningRateScheduler(scheduler) ckpt = ModelCheckpoint('./ckpt.{epoch:02d}-{val_acc:.4f}.h5', monitor='val_acc', save_best_only=True, mode='max', period=25) cbks = [change_lr, tb_cb, ckpt]
def go(options): slength = options.max_length lstm_hidden = options.lstm_capacity print('devices', device_lib.list_local_devices()) tbw = SummaryWriter(log_dir=options.tb_dir) if options.task == 'file': dir = options.data_dir x, x_vocab_len, x_word_to_ix, x_ix_to_word = \ util.load_sentences(options.data_dir, vocab_size=options.top_words) if options.clip_length is not None: x = [(sentence if len(sentence) < options.clip_length else sentence[:options.clip_length]) for sentence in x] # Finding the length of the longest sequence x_max_len = max([len(sentence) for sentence in x]) print('max sequence length ', x_max_len) print(len(x_ix_to_word), 'distinct words') x = util.batch_pad(x, options.batch, add_eos=True) def decode(seq): return ' '.join(x_ix_to_word[id] for id in seq) elif options.task == 'europarl': dir = options.data_dir x, x_vocab_len, x_word_to_ix, x_ix_to_word, _, _, _, _ = \ util.load_data(dir+os.sep+'europarl-v8.fi-en.en', dir+os.sep+'europarl-v8.fi-en.fi', vocab_size=options.top_words) # Finding the length of the longest sequence x_max_len = max([len(sentence) for sentence in x]) print('max sequence length ', x_max_len) print(len(x_ix_to_word), 'distinct words') x = util.batch_pad(x, options.batch) # Padding zeros to make all sequences have a same length with the longest one # x = sequence.pad_sequences(x, maxlen=slength, dtype='int32', padding='post', truncating='post') # y = sequence.pad_sequences(y, maxlen=slength, dtype='int32', padding='post', truncating='post') def decode(seq): print(seq) return ' '.join(x_ix_to_word[id] for id in seq) else: # Load only training sequences (x, _), _ = imdb.load_data(num_words=options.top_words) # rm start symbol x = [l[1:] for l in x] # x = sequence.pad_sequences(x, maxlen=slength+1, padding='post', truncating='post') # x = x[:, 1:] # rm start symbol x = util.batch_pad(x, options.batch) decode = decode_imdb print('Data Loaded.') print(sum([b.shape[0] for b in x]), ' sentences loaded') # for i in range(3): # print(x[i, :]) # print(decode(x[i, :])) num_words = len(x_ix_to_word) ## Define encoder input = Input(shape=(None, ), name='inp') embedding = Embedding(num_words, options.embedding_size, input_length=None) embedded = embedding(input) encoder = LSTM(lstm_hidden) if options.rnn_type == 'lstm' else GRU(lstm_hidden) h = Bidirectional(encoder)(embedded) tozmean = Dense(options.hidden) zmean = tozmean(h) tozlsigma = Dense(options.hidden) zlsigma = tozlsigma(h) ## Define KL Loss and sampling kl = util.KLLayer(weight = K.variable(1.0)) # computes the KL loss and stores it for later zmean, zlsigma = kl([zmean, zlsigma]) eps = Input(shape=(options.hidden,), name='inp-epsilon') sample = util.Sample() zsample = sample([zmean, zlsigma, eps]) ## Define decoder # zsample = Input(shape=(options.hidden,), name='inp-decoder-z') input_shifted = Input(shape=(None, ), name='inp-shifted') if options.rnn_type == 'lstm': expandz_h = Dense(lstm_hidden, input_shape=(options.hidden,)) expandz_c = Dense(lstm_hidden, input_shape=(options.hidden,)) z_exp_h = expandz_h(zsample) z_exp_c = expandz_c(zsample) state = [z_exp_h, z_exp_c] else: expandz = Dense(lstm_hidden, input_shape=(options.hidden,)) state = expandz(zsample) seq = embedding(input_shifted) seq = SpatialDropout1D(rate=options.dropout)(seq) decoder_rnn = LSTM(lstm_hidden, return_sequences=True) if options.rnn_type == 'lstm' else GRU(lstm_hidden, return_sequences=True) h = decoder_rnn(seq, initial_state=state) towords = TimeDistributed(Dense(num_words)) out = towords(h) auto = Model([input, input_shifted, eps], out) ## Extract the encoder and decoder models form the autoencoder # - NB: This isn't exactly DRY. It seems much nicer to build a separate encoder and decoder model and then build a # an autoencoder model that chains the two. For the life of me, I couldn't get it to work. For some reason the # gradients don't seem to propagate down to the encoder. Let me know if you have better luck. encoder = Model(input, [zmean, zlsigma]) z_in = Input(shape=(options.hidden,)) s_in = Input(shape=(None,)) seq = embedding(s_in) if options.rnn_type == 'lstm': z_exp_h = expandz_h(z_in) z_exp_c = expandz_c(z_in) state = [z_exp_h, z_exp_c] else: state = expandz(z_in) h = decoder_rnn(seq, initial_state=state) out = towords(h) decoder = Model([s_in, z_in], out) ## Compile the autoencoder model if options.num_gpu is not None: auto = multi_gpu_model(auto, gpus=options.num_gpu) opt = keras.optimizers.Adam(lr=options.lr) auto.compile(opt, sparse_loss) auto.summary() epoch = 0 instances_seen = 0 # DEBUG # x = x[:20] while epoch < options.epochs: klw = anneal(epoch, options.epochs) print('EPOCH {:03}: Set KL weight to {}'.format(epoch, klw)) K.set_value(kl.weight, klw) for batch in tqdm(x): n, l = batch.shape batch_shifted = np.concatenate([np.ones((n, 1)), batch], axis=1) # prepend start symbol batch_out = np.concatenate([batch, np.zeros((n, 1))], axis=1)[:, :, None] # append pad symbol eps = np.random.randn(n, options.hidden) # random noise for the sampling layer loss = auto.train_on_batch([batch, batch_shifted, eps], batch_out) instances_seen += n tbw.add_scalar('seq2seq/batch-loss', float(loss)/l, instances_seen) epoch += 1 if epoch % options.out_every == 0: # show samples for some sentences from random batches for i in range(CHECK): # CHECK 1. Generate some sentences from a z vector for a random # sentence from the corpus b = random.choice(x) z, _ = encoder.predict(b) z = z[None, 0, :] print('in ', decode(b[0, :])) l = 60 if options.clip_length is None else options.clip_length gen = generate_seq(decoder, z=z, size=l) print('out 1 ', decode(gen)) gen = generate_seq(decoder, z=z, size=l, temperature=0.05) print('out 2 (t0.05) ', decode(gen)) gen = generate_seq(decoder, z=z, size=l, temperature=0.0) print('out 3 (greedy) ', decode(gen)) # CHECK 2. Show the argmax reconstruction (i n, _ = b.shape b_shifted = np.concatenate([np.ones((n, 1)), b], axis=1) # prepend start symbol eps = np.random.randn(n, options.hidden) # random noise for the sampling layer out = auto.predict([b, b_shifted, eps])[None, 0, :] out = np.argmax(out[0, ...], axis=1) print(out) print('recon ', decode([int(o) for o in out])) print() for i in range(CHECK): # CHECK 3: Sample two z's from N(0,1) and interpolate between them # Here we use use greedy decoding: i.e. we pick the word that gets the highest # probability zfrom, zto = np.random.randn(1, options.hidden), np.random.randn(1, options.hidden) for d in np.linspace(0, 1, num=NINTER): z = zfrom * (1-d) + zto * d gen = generate_seq(decoder, z=z, size=l, temperature=0.0) print('out (d={:.1}) \t'.format(d), decode(gen)) print()
############################################################################################# with tf.device('/cpu:0'): model = ResNet101( include_top=True, l2_norm=True, scale_param=100, weights=None, classes=nb_classes) #weights=None for random initialization # load weights if saved_weights is not None: assert os.path.isfile(saved_weights) model.load_weights(saved_weights) multi_model = multi_gpu_model(model, gpus=nb_gpus) sgd = SGD(lr=0.001, decay=0.0, momentum=0.9, nesterov=False) multi_model.compile(optimizer=sgd, loss='sparse_categorical_crossentropy', metrics=['accuracy' ]) ############# PARAMS NOT FINALIZED##### # generators train_datagen = FaceAugDataGen(mode='training', batch_size=b_size, im_shape=(224, 224), n_classes=nb_classes, source='/lfs2/tmp/anh-train/', mean_file=mean_img_file) val_datagen = FaceAugDataGen(mode='validation', batch_size=b_size,
mask_false = K.cast(K.equal(y_true, mask_value), K.floatx()) mask_false_sum = K.sum(mask_false) mask_true_squared_error = K.sum(K.square( mask_true * (y_true - y_pred))) * (1 / mask_true_sum) mask_false_squared_error = K.sum(K.square( mask_false * (y_true - y_pred))) * (1 / mask_false_sum) #masked_mse = K.sum(masked_squared_error, axis = -1) / K.sum(mask_true, axis = -1) masked_squared_error = (mask_true_squared_error + mask_false_squared_error ) * (1 / K.cast(K.shape(y_true)[0], K.floatx())) masked_mse = masked_squared_error return masked_mse # This assumes that your machine has some available GPUs. parallel_model = multi_gpu_model(AutoEncoder, gpus=5, cpu_relocation=True) #parallel_model.compile(optimizer = 'Adam', loss = 'binary_crossentropy') parallel_model.compile(optimizer='Adam', loss=cosmic_squareLoss) #parallel_model.compile(optimizer = 'Adam', loss = squareLoss) #parallel_model.compile(optimizer = 'Adam', loss = 'mse') #parallel_model.compile(optimizer='Adam', loss='mean_squared_error') #num=cfg.BATCH_SIZE def input_generator(num): while True: img = proc.next() proc.read_next(num) image_dim = proc.image_dim() img = img.reshape(num, image_dim[2], image_dim[3], 1)
def model(self, params=None): """Create the Recurrent Neural Network. Args: None Returns: _model: RNN model """ # Initialize key variables if params is None: _hyperparameters = self.hyperparameters else: params['batch_size'] = params['batch_size'] * self._gpus _hyperparameters = params '''print(_hyperparameters['batch_size']) print(self.hyperparameters['batch_size']) sys.exit(0)''' # Calculate the steps per epoch epoch_steps = int( self.training_rows / _hyperparameters['batch_size']) + 1 ''' Instantiate the base model (or "template" model). We recommend doing this with under a CPU device scope, so that the model's weights are hosted on CPU memory. Otherwise they may end up hosted on a GPU, which would complicate weight sharing. NOTE: multi_gpu_model values will be way off if you don't do this. ''' with tf.device('/cpu:0'): serial_model = Sequential() ''' We can now add a Gated Recurrent Unit (GRU) to the network. This will have 512 outputs for each time-step in the sequence. Note that because this is the first layer in the model, Keras needs to know the shape of its input, which is a batch of sequences of arbitrary length (indicated by None), where each observation has a number of input-signals (num_x_signals). ''' serial_model.add(GRU( _hyperparameters['units'], stateful=True, batch_size=_hyperparameters['batch_size'], return_sequences=True, recurrent_dropout=_hyperparameters['dropout'], input_shape=(None, self._training_vector_count,))) for _ in range(1, _hyperparameters['layers']): serial_model.add(GRU( _hyperparameters['units'], stateful=True, batch_size=_hyperparameters['batch_size'], recurrent_dropout=_hyperparameters['dropout'], return_sequences=True)) ''' The GRU outputs a batch from keras_contrib.layers.advanced_activations of sequences of 512 values. We want to predict 3 output-signals, so we add a fully-connected (or dense) layer which maps 512 values down to only 3 values. The output-signals in the data-set have been limited to be between 0 and 1 using a scaler-object. So we also limit the output of the neural network using the Sigmoid activation function, which squashes the output to be between 0 and 1. ''' if False: serial_model.add(Dense( self._training_class_count, activation='sigmoid')) ''' A problem with using the Sigmoid activation function, is that we can now only output values in the same range as the training-data. For example, if the training-data only has values between -20 and +30, then the scaler-object will map -20 to 0 and +30 to 1. So if we limit the output of the neural network to be between 0 and 1 using the Sigmoid function, this can only be mapped back to values between -20 and +30. We can use a linear activation function on the output instead. This allows for the output to take on arbitrary values. It might work with the standard initialization for a simple network architecture, but for more complicated network architectures e.g. with more layers, it might be necessary to initialize the weights with smaller values to avoid NaN values during training. You may need to experiment with this to get it working. ''' if True: # Maybe use lower init-ranges. init = RandomUniform(minval=-0.05, maxval=0.05) serial_model.add(Dense( self._training_class_count, activation='linear', kernel_initializer=init)) '''print(inspect.getmembers(_model, predicate=inspect.ismethod)) print('\n\n----------------------\n\n')''' # Apply multi-GPU logic. try: # We have to wrap multi_gpu_model this way to get callbacks to work if False: parallel_model = ModelMGPU( serial_model, cpu_relocation=True, gpus=self._gpus) #_model = _model if True: parallel_model = multi_gpu_model( serial_model, cpu_relocation=True, gpus=self._gpus) # parallel_model = serial_model if False: parallel_model = serial_model print('> Training using multiple GPUs...') except ValueError: parallel_model = serial_model print('> Training using single GPU or CPU...') # Compile Model ''' This is the optimizer and the beginning learning-rate that we will use. We then compile the Keras model so it is ready for training. ''' optimizer = RMSprop(lr=1e-3) # optimizer = Adam(lr=1e-3) if self._binary is True: parallel_model.compile( loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) else: parallel_model.compile( loss=self._loss_mse_warmup, optimizer=optimizer, metrics=['accuracy']) ''' This is a very small model with only two layers. The output shape of (None, None, 3) means that the model will output a batch with an arbitrary number of sequences, each of which has an arbitrary number of observations, and each observation has 3 signals. This corresponds to the 3 target signals we want to predict. ''' print('\n> Model Summary (Serial):\n') print(serial_model.summary()) print('\n> Model Summary (Parallel):\n') print(parallel_model.summary()) '''print(_hyperparameters['batch_size']) sys.exit(0)''' # Create the batch-generator. generator = self._batch_generator( _hyperparameters['batch_size'], _hyperparameters['sequence_length']) # Validation Set ''' The neural network trains quickly so we can easily run many training epochs. But then there is a risk of overfitting the model to the training-set so it does not generalize well to unseen data. We will therefore monitor the model's performance on the test-set after each epoch and only save the model's weights if the performance is improved on the test-set. The batch-generator randomly selects a batch of short sequences from the training-data and uses that during training. But for the validation-data we will instead run through the entire sequence from the test-set and measure the prediction accuracy on that entire sequence. ''' validation_data = (np.expand_dims(self._x_validation_scaled, axis=0), np.expand_dims(self._y_validation_scaled, axis=0)) # Callback Functions ''' During training we want to save checkpoints and log the progress to TensorBoard so we create the appropriate callbacks for Keras. This is the callback for writing checkpoints during training. ''' callback_checkpoint = ModelCheckpoint( filepath=self._path_checkpoint, monitor='val_loss', verbose=1, save_weights_only=True, save_best_only=True) ''' This is the callback for stopping the optimization when performance worsens on the validation-set. ''' callback_early_stopping = EarlyStopping( monitor='val_loss', patience=_hyperparameters['patience'], verbose=1) ''' This is the callback for writing the TensorBoard log during training. ''' callback_tensorboard = TensorBoard( log_dir='/tmp/23_logs/', histogram_freq=0, write_graph=False) ''' This callback reduces the learning-rate for the optimizer if the validation-loss has not improved since the last epoch (as indicated by patience=0). The learning-rate will be reduced by multiplying it with the given factor. We set a start learning-rate of 1e-3 above, so multiplying it by 0.1 gives a learning-rate of 1e-4. We don't want the learning-rate to go any lower than this. ''' callback_reduce_lr = ReduceLROnPlateau( monitor='val_loss', factor=0.1, min_lr=1e-4, patience=0, verbose=1) callbacks = [ callback_early_stopping, callback_checkpoint, callback_tensorboard, callback_reduce_lr] # Train the Recurrent Neural Network '''We can now train the neural network. Note that a single "epoch" does not correspond to a single processing of the training-set, because of how the batch-generator randomly selects sub-sequences from the training-set. Instead we have selected steps_per_epoch so that one "epoch" is processed in a few minutes. With these settings, each "epoch" took about 2.5 minutes to process on a GTX 1070. After 14 "epochs" the optimization was stopped because the validation-loss had not decreased for 5 "epochs". This optimization took about 35 minutes to finish. Also note that the loss sometimes becomes NaN (not-a-number). This is often resolved by restarting and running the Notebook again. But it may also be caused by your neural network architecture, learning-rate, batch-size, sequence-length, etc. in which case you may have to modify those settings. ''' print('\n> Parameters for training\n') pprint(_hyperparameters) print('\n> Starting data training\n') history = parallel_model.fit_generator( generator=generator, epochs=_hyperparameters['epochs'], steps_per_epoch=epoch_steps, use_multiprocessing=True, validation_data=validation_data, callbacks=callbacks) '''v_steps = (self.test_rows // _hyperparameters['batch_size']) - 1 parallel_model.fit( self._x_train_scaled, self._y_train_scaled, epochs=_hyperparameters['epochs'], steps_per_epoch=epoch_steps, validation_data=validation_data, validation_steps=v_steps, callbacks=callbacks)''' # Save model '''self.save(serial_model) serial_model = self.load_model()''' # Determine whether the weights of the parallel and serial models match if general.weights_match(serial_model, parallel_model) is True: print('> Weights match (Parallel / Serial)') else: print('> Weights different (Parallel / Serial)') # sys.exit(0) '''print("Ploting History") plt.plot(history.history['loss'], label='Parallel Training Loss') plt.plot(history.history['val_loss'], label='Parallel Validation Loss') # plt.plot(parallel_model.history['loss'], label='Parallel Training Loss') # plt.plot(parallel_model.history['val_loss'], label='Parallel Validation Loss') plt.legend() plt.show() sys.exit(0)''' # Return return serial_model
x = Conv_Block(x, nb_filter=[128, 128, 224], kernel_size=(3, 3), strides=(1, 1), with_conv_shortcut=True) x = Conv_Block(x, nb_filter=[128, 128, 224], kernel_size=(3, 3)) x = Conv_Block(x, nb_filter=[128, 128, 224], kernel_size=(3, 3)) x = Flatten()(x) x = Dense(512, kernel_initializer=RandomNormal(mean=0.0, stddev=0.01), kernel_regularizer=regularizers.l2(0.01), activation='relu')(x) x = Dropout(0.5)(x) x = Dense(128, kernel_initializer=RandomNormal(mean=0.0, stddev=0.01), kernel_regularizer=regularizers.l2(0.01), activation='relu')(x) x = Dropout(0.5)(x) x = Dense(17, activation='softmax')(x) model = Model(inputs=inpt, outputs=x) model = multi_gpu_model(model, gpus=2) #add # checkpoint = ModelCheckpoint('epochResnet50_adam128_l20001.h5', # monitor='val_acc', save_weights_only= True, save_best_only=True, verbose=1,period=5) # reduce_lr = ReduceLROnPlateau(monitor='val_acc', factor=0.5, patience=3, verbose=1)#,min_lr= 1e-8, # early_stopping = EarlyStopping(monitor='val_acc', min_delta=0, patience=10, verbose=1) # # model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.0001), metrics=['accuracy']) model.summary() model.load_weights('epochResnet50_adam128_l20001.h5') # model.fit(x_train,y_train,epochs=150,batch_size=128,shuffle=True,validation_data=(x_val, y_val), # callbacks=[checkpoint,reduce_lr,early_stopping],verbose=1) """output""" pred1 = model.predict(x_test)
args = parser.parse_args() print(args) if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) # load data (x_train, y_train), (x_test, y_test) = load_mnist() # define model with tf.device('/cpu:0'): model, eval_model = CapsNet(input_shape=x_train.shape[1:], n_class=len(np.unique(np.argmax(y_train, 1))), num_routing=args.num_routing) model.summary() plot_model(model, to_file=args.save_dir+'/model.png', show_shapes=True) # define muti-gpu model multi_model = multi_gpu_model(model, gpus=args.gpus) # train or test if args.weights is not None: # init the model weights with provided one model.load_weights(args.weights) if args.is_training: train(model=multi_model, data=((x_train, y_train), (x_test, y_test)), args=args) model.save_weights(args.save_dir + '/trained_model.h5') print('Trained model saved to \'%s/trained_model.h5\'' % args.save_dir) test(model=eval_model, data=(x_test, y_test)) else: # as long as weights are given, will run testing if args.weights is None: print('No weights are provided. Will test using random initialized weights.') test(model=eval_model, data=(x_test, y_test))
if model == None: print("Model ..[ %s ] definition not found." % model_name) exit(0) model_json = model.to_json() with open(checkpoint_dir + model_name + ".json", 'w') as json_file: json_file.write(model_json) ##---------load pred trained weights if args.continue_training: print('Loading pre-trained model ~~~') model.load_weights(checkpoint_dir + "lanenet_weights_400.h5") model_parallel = model avaliabe_gpus = len(K.tensorflow_backend._get_available_gpus()) if avaliabe_gpus > 1: model_parallel = multi_gpu_model(model, gpus=avaliabe_gpus) print("\nTraining using %s GPUs.." % avaliabe_gpus) # CheckPoint and Callbacks tensorboard = TensorBoard(log_dir=checkpoint_dir, histogram_freq=0, write_graph=True, write_images=True) weights_path = checkpoint_dir + model_name + '_weights_{epoch:02d}.h5' class onEachEpochCheckPoint(Callback): def __init__(self, model_parallel, path, model,
def train(model, train_images, train_annotations, input_height=None, input_width=None, n_classes=None, verify_dataset=True, checkpoints_path=None, epochs=5, batch_size=2, validate=False, validation_split=0.0, val_images=None, val_annotations=None, val_batch_size=2, auto_resume_checkpoint=False, load_weights=None, steps_per_epoch=512, validation_steps=200, optimizer_name='adadelta', do_augment=False, classifier=None): #from .models.all_models import model_from_name # check if user gives model name instead of the model object #if isinstance(model, six.string_types): # create the model from the name # assert (n_classes is not None), "Please provide the n_classes" # if (input_height is not None) and (input_width is not None): # model = model_from_name[model]( # n_classes, input_height=input_height, input_width=input_width) # else: # model = model_from_name[model](n_classes) num_gpus = len( [x for x in device_lib.list_local_devices() if x.device_type == 'GPU']) n_classes = model.n_classes input_height = model.input_height input_width = model.input_width output_height = model.output_height output_width = model.output_width if validate: # assert val_images is not None # assert val_annotations is not None if validation_split == 0.0: assert val_images is not None assert val_annotations is not None #loss="categorical_crossentropy",# if optimizer_name is not None: model.compile( loss= "categorical_crossentropy", # loss=lambda yTrue, yPred: customLoss(yTrue, yPred), optimizer=optimizer_name, metrics=['accuracy']) if checkpoints_path is not None: with open(checkpoints_path + "_config.json", "w") as f: json.dump( { "model_class": model.model_name, "n_classes": n_classes, "input_height": input_height, "input_width": input_width, "output_height": output_height, "output_width": output_width }, f) if load_weights is not None and len(load_weights) > 0: print("Loading weights from ", load_weights) model.load_weights(load_weights) if auto_resume_checkpoint and (checkpoints_path is not None): latest_checkpoint = find_latest_checkpoint(checkpoints_path) if latest_checkpoint is not None: print("Loading the weights from latest checkpoint ", latest_checkpoint) model.load_weights(latest_checkpoint) '''additions by Alabi Bojesomo''' ##################################################### if validation_split != 0.0: annotation_path = os.path.dirname(train_annotations) annotations = os.path.join(annotation_path, 'annotations.csv') if not os.path.exists(annotations): create_annotation_df(annotation_path) anno_df = pd.read_csv(annotations, dtype=str) train_sampler, val_sampler = train_test_split( anno_df, test_size=validation_split, stratify=anno_df['class_id'], random_state=12345) val_images = train_images val_annotations = train_annotations else: train_sampler = None val_sampler = None ###################################################### if verify_dataset: print("Verifying training dataset") verified = verify_segmentation_dataset(train_images, train_annotations, n_classes) assert verified if validate: if validation_split == 0.0: print("Verifying validation dataset") verified = verify_segmentation_dataset(val_images, val_annotations, n_classes) assert verified train_gen = ImageSegmentationGen(train_images, train_annotations, batch_size * num_gpus, n_classes, input_height, input_width, output_height, output_width, do_augment=do_augment, sampler=train_sampler) if validate: val_gen = ImageSegmentationGen(val_images, val_annotations, val_batch_size * num_gpus, n_classes, input_height, input_width, output_height, output_width, sampler=val_sampler) if num_gpus > 1: model = multi_gpu_model(model, gpus=num_gpus) if optimizer_name is not None: model.compile( loss= "categorical_crossentropy", # loss=lambda yTrue, yPred: customLoss(yTrue, yPred), optimizer=optimizer_name, metrics=['accuracy']) if not validate: for ep in range(epochs): print("Starting Epoch ", ep) history = model.fit_generator(train_gen, steps_per_epoch, epochs=1, workers=1) if checkpoints_path is not None: model.save_weights(checkpoints_path + "." + str(ep)) print("saved ", checkpoints_path + ".model." + str(ep)) else: for ep in range(epochs): print("Starting Epoch ", ep) history = model.fit_generator(train_gen, steps_per_epoch, validation_data=val_gen, validation_steps=validation_steps, epochs=1, workers=1) if checkpoints_path is not None: model.save_weights(checkpoints_path + "." + str(ep)) print("saved ", checkpoints_path + ".model." + str(ep)) print("Finished Epoch", ep)
# Build model model = resnet.ResnetBuilder.build_resnet_18((12, 240, 320), 3, is_classification=True) adam = keras.optimizers.Adam(lr=1e-04, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) callbacks_list = [ keras.callbacks.TensorBoard(log_dir=logs_path, write_graph=False), keras.callbacks.ModelCheckpoint(current_model_path, period=100) ] # Run model on multiple GPUs if available try: model = multi_gpu_model(model) print("Training model on multiple GPUs") except ValueError: print("Training model on single GPU") # Train model model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['binary_accuracy']) model.fit_generator(generator, steps_per_epoch=100, epochs=2500, callbacks=callbacks_list)
def create_models(backbone_retinanet, num_classes, weights, multi_gpu=0, freeze_backbone=False, lr=1e-5, config=None): """ Creates three models (model, training_model, prediction_model). Args backbone_retinanet : A function to call to create a retinanet model with a given backbone. num_classes : The number of classes to train. weights : The weights to load into the model. multi_gpu : The number of GPUs to use for training. freeze_backbone : If True, disables learning for the backbone. config : Config parameters, None indicates the default configuration. Returns model : The base model. This is also the model that is saved in snapshots. training_model : The training model. If multi_gpu=0, this is identical to model. prediction_model : The model wrapped with utility functions to perform object detection (applies regression values and performs NMS). """ modifier = freeze_model if freeze_backbone else None # load anchor parameters, or pass None (so that defaults will be used) anchor_params = None num_anchors = None if config and 'anchor_parameters' in config: anchor_params = parse_anchor_parameters(config) num_anchors = anchor_params.num_anchors() # Keras recommends initialising a multi-gpu model on the CPU to ease weight sharing, and to prevent OOM errors. # optionally wrap in a parallel model if multi_gpu > 1: from keras.utils import multi_gpu_model with tf.device('/cpu:0'): model = model_with_weights(backbone_retinanet( num_classes, num_anchors=num_anchors, modifier=modifier), weights=weights, skip_mismatch=True) training_model = multi_gpu_model(model, gpus=multi_gpu) else: model = model_with_weights(backbone_retinanet(num_classes, num_anchors=num_anchors, modifier=modifier), weights=weights, skip_mismatch=True) training_model = model # make prediction model prediction_model = retinanet_bbox(model=model, anchor_params=anchor_params) # compile model training_model.compile(loss={ 'regression': losses.smooth_l1(), 'classification': losses.focal() }, optimizer=keras.optimizers.adam(lr=lr, clipnorm=0.001)) return model, training_model, prediction_model
model.add(Flatten(name='flatten')) model.add(Dense(4096, name='fullconnected1')) model.add(BatchNormalization()) model.add(Activation('relu')) model.add(Dense(4096, name='fullconnected2')) model.add(BatchNormalization()) model.add(Activation('relu')) model.add(Dense(1000, name='fullconnected3')) model.add(BatchNormalization()) model.add(Activation('relu')) model.add(Dense(classes, activation='softmax', name='predictions')) model = multi_gpu_model(model, gpus=2) tensorboard = TensorBoard(log_dir='alexnetlogs/{}'.format(time.time()), histogram_freq=0, write_graph=True, write_images=True) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) start_time = time.time() history = model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=100, batch_size=32,
def train_keras(model_name, window_size, stride_size, model_config, mapping, train_datasets, validation_datasets, pre_callbacks=(), enable_multi_gpu=False, gpus=None, cpu_merge=True, cpu_relocation=False, batch_size=None, random_seed=None, ): log.info("Starting keras training") import tensorflow as tf # Seed initialization should happed as early as possible if random_seed is not None: log.info("Setting Tensorflow random seed to: %d", random_seed) tf.set_random_seed(random_seed) from keras.callbacks import EarlyStopping, TensorBoard, ReduceLROnPlateau from ..tools.callbacks import ModelCheckpoint, CSVLogger from keras.optimizers import Adam from ..tools.utils import import_model_builder from keras.models import load_model from keras.utils import multi_gpu_model if batch_size is None: batch_size = model_config.get("batch_size", None) model_path = model_config["model_path"] model_loss = model_config.get("loss", "categorical_crossentropy") log.info("Using loss: %s", model_loss) model_metrics = model_config.get("metrics", "accuracy") # Make code compatible with previous version format_converter = model_config.get("format_converter", CategoricalConverter(2)) swap_axes = model_config["swap_axes"] train_epochs = model_config["train_epochs"] prefetch_queue_size = model_config.get("prefetch_queue_size", 10) input_channels = len(mapping["inputs"]) train_data = DataGenerator(train_datasets, batch_size, mapping["inputs"], mapping["target"], format_converter=format_converter, swap_axes=swap_axes, postprocessing_callbacks=pre_callbacks, default_window_size=window_size, default_stride_size=stride_size) train_data = ThreadedDataGenerator(train_data, queue_size=prefetch_queue_size) validation_data = DataGenerator(validation_datasets, batch_size, mapping["inputs"], mapping["target"], format_converter=format_converter, swap_axes=swap_axes, default_window_size=window_size, default_stride_size=stride_size) validation_data = ThreadedDataGenerator(validation_data, queue_size=prefetch_queue_size) model_builder, model_builder_custom_options = import_model_builder(model_config["model_builder"]) model_builder_option = model_config.get("options", {}) steps_per_epoch = getattr(model_config, "steps_per_epoch", len(train_data) // batch_size) validation_steps_per_epoch = getattr(model_config, "validation_steps_per_epoch", len(validation_data) // batch_size) log.info("Traing data has %d tiles", len(train_data)) log.info("Validation data has %d tiles", len(validation_data)) log.info("validation_steps_per_epoch: %d", validation_steps_per_epoch) log.info("steps_per_epoch: %d", steps_per_epoch) load_only_weights = model_config.get("load_only_weights", False) checkpoint = model_config.get("checkpoint", None) callbacks = [] early_stopping = model_config.get("early_stopping", None) adaptive_lr = model_config.get("adaptive_lr", None) tensor_board = model_config.get("tensor_board", False) tb_log_dir = model_config.get("tb_log_dir", os.path.join("/tmp/", model_name)) # TensorBoard log directory tb_log_dir = tb_log_dir.format(model_name=model_name, time=str(time.time()), hostname=socket.gethostname(), user=getpass.getuser()) keras_logging = model_config.get("log", None) if not keras_logging: log.info("Keras logging is disabled") else: csv_log_file = keras_logging.format(model_name=model_name, time=str(time.time()), hostname=socket.gethostname(), user=getpass.getuser()) dir_head, dir_tail = os.path.split(csv_log_file) if dir_tail and not IOUtils.file_exists(dir_head): log.info("Creating directory: %s", dir_head) IOUtils.recursive_create_dir(dir_head) log.info("Logging training data to csv file: %s", csv_log_file) csv_logger = CSVLogger(csv_log_file, separator=',', append=False) callbacks.append(csv_logger) if tensor_board: log.info("Registering TensorBoard callback") log.info("Event log dir set to: {}".format(tb_log_dir)) tb_callback = TensorBoard(log_dir=tb_log_dir, histogram_freq=0, write_graph=True, write_images=True) callbacks.append(tb_callback) log.info("To access TensorBoard run: tensorboard --logdir {} --port <port_number> --host <host_ip> ".format( tb_log_dir)) if checkpoint: checkpoint_file = checkpoint["path"] log.info("Registering checkpoint callback") destination_file = checkpoint_file % { 'model_name': model_name, 'time': str(time.time()), 'hostname': socket.gethostname(), 'user': getpass.getuser()} dir_head, dir_tail = os.path.split(destination_file) if dir_tail and not IOUtils.file_exists(dir_head): log.info("Creating directory: %s", dir_head) IOUtils.recursive_create_dir(dir_head) log.info("Checkpoint data directed to: %s", destination_file) checkpoint_options = checkpoint.get("options", {}) checkpoint_callback = ModelCheckpoint(destination_file, **checkpoint_options) callbacks.append(checkpoint_callback) log.info("Starting training") options = { 'epochs': train_epochs, 'callbacks': callbacks } if len(validation_data) > 0 and validation_steps_per_epoch: log.info("We have validation data") options['validation_data'] = validation_data options["validation_steps"] = validation_steps_per_epoch if early_stopping: log.info("Enabling early stopping %s", str(early_stopping)) callback_early_stopping = EarlyStopping(**early_stopping) options["callbacks"].append(callback_early_stopping) if adaptive_lr: log.info("Enabling reduce lr on plateu: %s", str(adaptive_lr)) callback_lr_loss = ReduceLROnPlateau(**adaptive_lr) options["callbacks"].append(callback_lr_loss) else: log.warn("No validation data available. Ignoring") final_model_location = model_path.format(model_name=model_name, time=str(time.time()), hostname=socket.gethostname(), user=getpass.getuser()) log.info("Model path is %s", final_model_location) existing_model_location = None if IOUtils.file_exists(final_model_location): existing_model_location = final_model_location if existing_model_location is not None and not load_only_weights: log.info("Loading existing model from: %s", existing_model_location) custom_objects = {} if model_builder_custom_options is not None: custom_objects.update(model_builder_custom_options) if enable_multi_gpu: with tf.device('/cpu:0'): model = load_model(existing_model_location, custom_objects=custom_objects) else: model = load_model(existing_model_location, custom_objects=custom_objects) log.info("Model loaded!") else: log.info("Building model") model_options = model_builder_option model_options['n_channels'] = input_channels input_height, input_width = window_size model_options['input_width'] = model_builder_option.get('input_width', input_width) model_options['input_height'] = model_builder_option.get('input_height', input_height) activation = model_config.get('activation', None) if activation: model_options["activation"] = activation if enable_multi_gpu: with tf.device('/cpu:0'): model = model_builder(**model_options) else: model = model_builder(**model_options) log.info("Model built") if load_only_weights and existing_model_location is not None: log.info("Loading weights from %s", existing_model_location) model.load_weights(existing_model_location) log.info("Finished loading weights") optimiser = model_config.get("optimiser", None) if optimiser is None: log.info("No optimiser specified. Using default Adam") optimiser = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-8) if enable_multi_gpu: log.info("Using Keras Multi-GPU Training") fit_model = multi_gpu_model(model, gpus=gpus, cpu_merge=cpu_merge, cpu_relocation=cpu_relocation) else: log.info("Using Keras default GPU Training") fit_model = model log.info("Compiling model") fit_model.compile(loss=model_loss, optimizer=optimiser, metrics=model_metrics) log.info("Model compiled") model.summary() fit_model.fit_generator(train_data, steps_per_epoch, **options) log.info("Saving model to %s", os.path.abspath(final_model_location)) dir_head, dir_tail = os.path.split(final_model_location) if dir_tail and not IOUtils.file_exists(dir_head): log.info("Creating directory: %s", dir_head) IOUtils.recursive_create_dir(dir_head) model.save(final_model_location) log.info("Done saving") log.info("Training completed")
def main(): # parser config config_file = "./config.ini" cp = ConfigParser() cp.read(config_file) # default config output_dir = cp["DEFAULT"].get("output_dir") image_source_dir = cp["DEFAULT"].get("image_source_dir") train_patient_count = cp["DEFAULT"].getint("train_patient_count") dev_patient_count = cp["DEFAULT"].getint("dev_patient_count") data_entry_file = cp["DEFAULT"].get("data_entry_file") class_names = cp["DEFAULT"].get("class_names").split(",") # train config use_base_model_weights = cp["TRAIN"].getboolean("use_base_model_weights") use_trained_model_weights = cp["TRAIN"].getboolean("use_trained_model_weights") use_best_weights = cp["TRAIN"].getboolean("use_best_weights") output_weights_name = cp["TRAIN"].get("output_weights_name") epochs = cp["TRAIN"].getint("epochs") batch_size = cp["TRAIN"].getint("batch_size") initial_learning_rate = cp["TRAIN"].getfloat("initial_learning_rate") train_steps = cp["TRAIN"].get("train_steps") patience_reduce_lr = cp["TRAIN"].getint("patience_reduce_lr") validation_steps = cp["TRAIN"].get("validation_steps") positive_weights_multiply = cp["TRAIN"].getfloat("positive_weights_multiply") use_class_balancing = cp["TRAIN"].getboolean("use_class_balancing") use_default_split = cp["TRAIN"].getboolean("use_default_split") # if previously trained weights is used, never re-split if use_trained_model_weights: # resuming mode print("** use trained model weights, turn on use_skip_split automatically **") use_skip_split = True # load training status for resuming training_stats_file = os.path.join(output_dir, ".training_stats.json") if os.path.isfile(training_stats_file): # TODO: add loading previous learning rate? training_stats = json.load(open(training_stats_file)) else: training_stats = {} else: # start over use_skip_split = cp["TRAIN"].getboolean("use_skip_split ") training_stats = {} split_dataset_random_state = cp["TRAIN"].getint("split_dataset_random_state") show_model_summary = cp["TRAIN"].getboolean("show_model_summary") # end parser config # check output_dir, create it if not exists if not os.path.isdir(output_dir): os.makedirs(output_dir) running_flag_file = os.path.join(output_dir, ".training.lock") if os.path.isfile(running_flag_file): raise RuntimeError("A process is running in this directory!!!") else: open(running_flag_file, "a").close() try: print(f"backup config file to {output_dir}") shutil.copy(config_file, os.path.join(output_dir, os.path.split(config_file)[1])) # split train/dev/test if use_default_split: datasets = ["train", "dev", "test"] for dataset in datasets: shutil.copy(f"./data/default_split/{dataset}.csv", output_dir) elif not use_skip_split: print("** split dataset **") split_data( data_entry_file, class_names, train_patient_count, dev_patient_count, output_dir, split_dataset_random_state, ) # get train/dev sample counts train_counts, train_pos_counts = get_sample_counts(output_dir, "train", class_names) dev_counts, _ = get_sample_counts(output_dir, "dev", class_names) # compute steps if train_steps == "auto": train_steps = int(train_counts / batch_size) else: try: train_steps = int(train_steps) except ValueError: raise ValueError(f""" train_steps: {train_steps} is invalid, please use 'auto' or integer. """) print(f"** train_steps: {train_steps} **") if validation_steps == "auto": validation_steps = int(dev_counts / batch_size) else: try: validation_steps = int(validation_steps) except ValueError: raise ValueError(f""" validation_steps: {validation_steps} is invalid, please use 'auto' or integer. """) print(f"** validation_steps: {validation_steps} **") # compute class weights print("** compute class weights from training data **") class_weights = get_class_weights( train_counts, train_pos_counts, multiply=positive_weights_multiply, use_class_balancing=use_class_balancing ) print("** class_weights **") for c, w in class_weights.items(): print(f" {c}: {w}") print("** load model **") if use_base_model_weights: base_model_weights_file = cp["TRAIN"].get("base_model_weights_file") else: base_model_weights_file = None if use_trained_model_weights: if use_best_weights: model_weights_file = os.path.join(output_dir, f"best_{output_weights_name}") else: model_weights_file = os.path.join(output_dir, output_weights_name) else: model_weights_file = None model = get_model(class_names, base_model_weights_file, model_weights_file) if show_model_summary: print(model.summary()) # recreate symlink folder for ImageDataGenerator symlink_dir_name = "image_links" create_symlink(image_source_dir, output_dir, symlink_dir_name) print("** create image generators **") train_data_path = f"{output_dir}/{symlink_dir_name}/train/" train_generator = custom_image_generator( ImageDataGenerator(horizontal_flip=True, rescale=1./255), train_data_path, batch_size=batch_size, class_names=class_names, ) dev_data_path = f"{output_dir}/{symlink_dir_name}/dev/" dev_generator = custom_image_generator( ImageDataGenerator(horizontal_flip=True, rescale=1./255), dev_data_path, batch_size=batch_size, class_names=class_names, ) output_weights_path = os.path.join(output_dir, output_weights_name) print(f"** set output weights path to: {output_weights_path} **") print("** check multiple gpu availability **") gpus = len(os.getenv("CUDA_VISIBLE_DEVICES", "1").split(",")) if gpus > 1: print(f"** multi_gpu_model is used! gpus={gpus} **") model_train = multi_gpu_model(model, gpus) # FIXME: currently (Keras 2.1.2) checkpoint doesn't work with multi_gpu_model checkpoint = MultiGPUModelCheckpoint( filepath=output_weights_path, base_model=model, ) else: model_train = model checkpoint = ModelCheckpoint(output_weights_path) print("** compile model with class weights **") optimizer = Adam(lr=initial_learning_rate) model_train.compile(optimizer=optimizer, loss="binary_crossentropy") auroc = MultipleClassAUROC( generator=dev_generator, steps=validation_steps, class_names=class_names, weights_path=output_weights_path, stats=training_stats, ) callbacks = [ checkpoint, TensorBoard(log_dir=os.path.join(output_dir, "logs"), batch_size=batch_size), ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=patience_reduce_lr, verbose=1), auroc, ] print("** training start **") history = model_train.fit_generator( generator=train_generator, steps_per_epoch=train_steps, epochs=epochs, validation_data=dev_generator, validation_steps=validation_steps, callbacks=callbacks, class_weight=class_weights, ) # dump history print("** dump history **") with open(os.path.join(output_dir, "history.pkl"), "wb") as f: pickle.dump({ "history": history.history, "auroc": auroc.aurocs, }, f) print("** done! **") finally: os.remove(running_flag_file)