def test_multi_gpu_test_simple_model(self): gpus = 2 num_samples = 1000 input_dim = 10 output_dim = 1 hidden_dim = 10 epochs = 2 target_gpu_id = [0, 1] if not check_if_compatible_devices(gpus=gpus): self.skipTest('multi gpu only') with self.cached_session(): model = keras.models.Sequential() model.add(keras.layers.Dense(hidden_dim, input_shape=(input_dim, ))) model.add(keras.layers.Dense(output_dim)) x = np.random.random((num_samples, input_dim)) y = np.random.random((num_samples, output_dim)) parallel_model = multi_gpu_utils.multi_gpu_model(model, gpus=gpus) parallel_model.compile(loss='mse', optimizer='rmsprop') parallel_model.fit(x, y, epochs=epochs) parallel_model = multi_gpu_utils.multi_gpu_model( model, gpus=target_gpu_id) parallel_model.compile(loss='mse', optimizer='rmsprop') parallel_model.fit(x, y, epochs=epochs)
def test_multi_gpu_test_invalid_devices(self): if not check_if_compatible_devices(gpus=2): self.skipTest('multi gpu only') with self.cached_session(): input_shape = (1000, 10) model = keras.models.Sequential() model.add( keras.layers.Dense(10, activation='relu', input_shape=input_shape[1:])) model.add(keras.layers.Dense(1, activation='sigmoid')) model.compile(loss='mse', optimizer='rmsprop') x = np.random.random(input_shape) y = np.random.random((input_shape[0], 1)) with self.assertRaises(ValueError): parallel_model = multi_gpu_utils.multi_gpu_model( model, gpus=len(keras.backend._get_available_gpus()) + 1) parallel_model.fit(x, y, epochs=2) with self.assertRaises(ValueError): parallel_model = multi_gpu_utils.multi_gpu_model( model, gpus=[0, 2, 4, 6, 8]) parallel_model.fit(x, y, epochs=2) with self.assertRaises(ValueError): parallel_model = multi_gpu_utils.multi_gpu_model(model, gpus=1) parallel_model.fit(x, y, epochs=2) with self.assertRaises(ValueError): parallel_model = multi_gpu_utils.multi_gpu_model(model, gpus=[0]) parallel_model.fit(x, y, epochs=2)
def poly_yolo(input_shape=(224, 640, 1), n_classes=1, backbone='darknet', lr=3e-4, decay=5e-6, multi_gpu=False, GPU_COUNT=2, weight_pt=''): inpt = Input(input_shape) # backbone if backbone == 'darknet': backmodel = darknet(input_shape=input_shape, n_classes=n_classes, include_top=False, multi_out=True) if backbone == 'darknet_se': backmodel = darknet(input_shape=input_shape, n_classes=n_classes, n_filters=24, se=True, include_top=False, multi_out=True) if backbone == 'efficientNet': backmodel = EfficientNet(input_shape=input_shape, n_classes=n_classes, include_top=False, multi_out=True) x = backmodel(inpt) # feature fusion x = feats_fusion(x, 384) # head: 1x1 x = Conv2D(2 + 1 + n_classes, 1, strides=1, padding='same')(x) # x,y,conf,cls # yt: [b,h,w,2+1+cls] y_true = Input( (input_shape[0] // 4, input_shape[1] // 4, 2 + 1 + n_classes)) # loss loss = Lambda(mix_loss)([y_true, x]) # model model = Model([inpt, y_true], loss) if os.path.exists(weight_pt): print("load weight: ", weight_pt) model.load_weights(weight_pt, by_name=True, skip_mismatch=True) single_model = model if multi_gpu: model = multi_gpu_model(model, gpus=GPU_COUNT) model.compile(Adam(lr=lr, decay=decay), loss=lambda y_true, y_pred: K.mean(y_pred[:, 0]), metrics=metric_lst) return model, single_model
def get_Inception_classifier(): inputs = Input((CLASSIFY_INPUT_WIDTH, CLASSIFY_INPUT_HEIGHT, CLASSIFY_INPUT_DEPTH, CLASSIFY_INPUT_CHANNEL)) print('inputs') print(inputs.get_shape()) # Make inception base x = inception_base(inputs) for i in range(INCEPTION_BLOCKS): x = inception_block(x, filters=INCEPTION_KEEP_FILTERS) if (i + 1) % INCEPTION_REDUCTION_STEPS == 0 and i != INCEPTION_BLOCKS - 1: x = reduction_block(x, filters=INCEPTION_KEEP_FILTERS // 2) print('top') x = GlobalMaxPooling3D()(x) print(x.get_shape()) x = Dropout(INCEPTION_DROPOUT)(x) x = Dense(2, activation='softmax')(x) print(x.get_shape()) model_s = Model(inputs=inputs, outputs=x) model = multi_gpu_model(model_s, gpus=4) model.compile(optimizer=Adam(lr=TRAIN_CLASSIFY_LEARNING_RATE), loss='binary_crossentropy', metrics=['accuracy']) return model,model_s
def get_DenseNet_classifier(): inputs = Input((32, 32, 32, 1)) x = Conv3D(DENSE_NET_INITIAL_CONV_DIM, (3, 3, 3), padding='same')(inputs) print('input') print(x.get_shape()) for i in range(DENSE_NET_BLOCKS): x = dense_block(x) if i != DENSE_NET_BLOCKS - 1: x = transition_block(x) print('top') x = GlobalAveragePooling3D()(x) print(x.get_shape()) if DENSE_NET_ENABLE_DROPOUT: x = Dropout(DENSE_NET_DROPOUT)(x) x = Dense(2, activation='softmax')(x) print(x.get_shape()) model_s = Model(inputs=inputs, outputs=x) model = multi_gpu_model(model_s,gpus=4) model.compile(optimizer=Adam(lr=TRAIN_CLASSIFY_LEARNING_RATE), loss='binary_crossentropy', metrics=['accuracy']) return model,model_s
def __init__(self, data_rows=128, data_cols=128, weight_filepath=None,inference_only=False, net_name='default', gpus=1): """Create the PConvUnet. If variable data size, set data_rows and data_cols to None :param data_rows (int): data height. :param data_cols (int): data width. :param inference_only (bool): initialize BN layers for inference. :param net_name (str): Name of this network (used in logging). :param gpus (int): How many GPUs to use for training. """ self.weight_filepath = weight_filepath self.data_rows = data_rows self.data_cols = data_cols self.img_overlap = 30 self.inference_only = inference_only self.net_name = net_name self.gpus = gpus assert self.data_rows >= 64, 'Height must be >64 ' assert self.data_cols >= 64, 'Width must be >64 ' self.current_epoch = 0 if self.gpus <= 1: self.model= self.build_resnet() self.compile_resnet(self.model) else: with tf.device("/cpu:0"): self.model = self.build_resnet() self.model = multi_gpu_model(self.model, gpus=self.gpus) self.compile_resnet(self.model)
def test_multi_gpu_with_siamese_network(self): gpus = 2 if not check_if_compatible_devices(gpus=gpus): self.skipTest('multi gpu only') with self.cached_session(): input_shape = (3, ) nested_model = keras.models.Sequential([ keras.layers.Dense(32, input_shape=input_shape), keras.layers.Dense(1) ], name='nested') input1 = keras.Input(input_shape) input2 = keras.Input(input_shape) score1 = nested_model(input1) score2 = nested_model(input2) score_sum = keras.layers.Add(name='add')([score1, score2]) siamese = keras.models.Model(inputs=[input1, input2], outputs=[score_sum, score1, score2], name='siamese') parallel_siamese = multi_gpu_utils.multi_gpu_model(siamese, gpus) self.assertEqual(parallel_siamese.output_names, ['add', 'nested', 'nested_1'])
def build(self) -> None: """ Build the Siamese model and compile it to optimize the contrastive loss using Adam. Both arms of the Siamese network will use the same embedder model, i.e. the weights are tied between both arms. The model will be parallelized over all available GPUs is applicable. """ # Both arms of the Siamese network use the same model, # i.e. the weights are tied. try: # The shared weights should be stored on the CPU for easy sharing # between multiple GPUs. # FIXME: https://github.com/keras-team/keras/issues/11313 with tf.device('/cpu:0'): self.siamese_model = self._build_siamese_model() num_gpus = _get_num_gpus() self.siamese_model_parallel = multi_gpu_utils.multi_gpu_model( self.siamese_model, gpus=num_gpus) logger.info('Parallelizing the embedder model over %d GPUs', num_gpus) except ValueError: self.siamese_model = self._build_siamese_model() self.siamese_model_parallel = self.siamese_model logger.info('Running the embedder model on a single GPU') # Train using Adam to optimize the contrastive loss. self.siamese_model_parallel.compile(Adam(self.lr), contrastive_loss)
def __load_model_mlp_classifier_action_vlad(n_classes, input_shape, n_gpus, is_load_weights, weight_path): """ Model """ # optimizer and loss loss = keras_utils.LOSSES[0] metrics = [keras_utils.METRICS[0]] output_activation = keras_utils.ACTIVATIONS[3] optimizer = SGD(lr=0.01) optimizer = Adam(lr=0.01, epsilon=1e-8) optimizer = Adam(lr=0.01, epsilon=1e-4) expansion_factor = 5.0 / 4.0 _, n_timesteps, side_dim, _, n_channels_in = input_shape input_shape = (input_shape[1:]) t_input = Input(shape=input_shape) # (None, 7, 7, 1024) tensor = t_input # spatial convolution n_channels_out = 512 tensor = Conv3D(n_channels_out, kernel_size=(1, 1, 1), padding='same')(tensor) tensor = BatchNormalization()(tensor) tensor = Activation('relu')(tensor) n_channels_in = n_channels_out # reshape for vlad tensor = ReshapeLayer((n_channels_in, ))(tensor) # vlad layer max_samples = n_timesteps * side_dim * side_dim tensor = NetVLAD(n_channels_in, max_samples, 32)(tensor) # dense layers tensor = Dropout(0.5)(tensor) tensor = Dense(256)(tensor) tensor = BatchNormalization()(tensor) tensor = LeakyReLU(alpha=0.2)(tensor) tensor = Dropout(0.25)(tensor) tensor = Dense(n_classes)(tensor) t_output = Activation(output_activation)(tensor) model = Model(input=t_input, output=t_output) if is_load_weights: model.load_weights(weight_path) if n_gpus == 1: model.compile(loss=loss, optimizer=optimizer, metrics=metrics) parallel_model = model else: parallel_model = multi_gpu_utils.multi_gpu_model(model, n_gpus) parallel_model.compile(loss=loss, optimizer=optimizer, metrics=metrics) return model, parallel_model
def __init__(self, img_rows=128, img_cols=128, vgg_weights="imagenet", inference_only=False, net_name='default', gpus=1, vgg_device=None): """Create the PConvUnet. If variable image size, set img_rows and img_cols to None Args: img_rows (int): image height. img_cols (int): image width. vgg_weights (str): which weights to pass to the vgg network. inference_only (bool): initialize BN layers for inference. net_name (str): Name of this network (used in logging). gpus (int): How many GPUs to use for training. vgg_device (str): In case of training with multiple GPUs, specify which device to run VGG inference on. e.g. if training on 8 GPUs, vgg inference could be off-loaded exclusively to one GPU, instead of running on one of the GPUs which is also training the UNet. """ # Settings self.img_rows = img_rows self.img_cols = img_cols self.img_overlap = 30 self.inference_only = inference_only self.net_name = net_name self.gpus = gpus self.vgg_device = vgg_device # Scaling for VGG input self.mean = [0.485, 0.456, 0.406] self.std = [0.229, 0.224, 0.225] # Assertions assert self.img_rows >= 64, 'Height must be >64 pixels' assert self.img_cols >= 64, 'Width must be >64 pixels' # Set current epoch self.current_epoch = 0 # VGG layers to extract features from (first maxpooling layers, see pp. 7 of paper) self.vgg_layers = [3, 6, 10] # Instantiate the vgg network if self.vgg_device: with tf.device(self.vgg_device): self.vgg = self.build_vgg(vgg_weights) else: self.vgg = self.build_vgg(vgg_weights) # Create UNet-like model if self.gpus <= 1: self.model, inputs_mask = self.build_pconv_unet() self.compile_pconv_unet(self.model, inputs_mask) else: with tf.device("/cpu:0"): self.model, inputs_mask = self.build_pconv_unet() self.model = multi_gpu_model(self.model, gpus=self.gpus) self.compile_pconv_unet(self.model, inputs_mask)
def fetch_model_from_arguments(config: Config) -> Tuple[Model, Model]: with tf.device('/cpu:0'): loaded_model = False if config.pretrained_model_file is not None: print('Loading model from %s' % config.pretrained_model_file, flush=True) model = load_model(config.pretrained_model_file) loaded_model = True elif config.model_type == 'baseline': print('Generating baseline model', flush=True) if config.use_conv_activation: model = models.baseline2(input_shape=(*config.input_size, 2)) else: model = models.baseline(input_shape=(*config.input_size, 2)) if config.use_output_postprocessing: models.add_heatmap_layers( model, fixation_sigma=config.postprocessing_fixation_sigma) elif config.model_type == 'transfer': print('Generating transfer model', flush=True) if config.use_conv_activation: model = models.transfer2(input_shape=(*config.input_size, 3)) else: model = models.transfer(input_shape=(*config.input_size, 3)) if config.use_output_postprocessing: models.add_heatmap_layers( model, fixation_sigma=config.postprocessing_fixation_sigma) else: print('Unknown model type %s' % config.model_type, flush=True) exit(-1) model.summary() try: trained_model = multi_gpu_utils.multi_gpu_model(model) print('Generated multi-gpu model:', flush=True) trained_model.summary() except Exception as e: trained_model = model print('Failed to generate multi-gpu model:', flush=True) traceback.print_exc() print('', flush=True) print('Compiling model', flush=True) trained_model.compile( optimizer=Adam(learning_rate=config.learn_rate), loss='mean_squared_error', ) return model, trained_model
def __load_model_mlp_classifier_timeception(n_classes, input_shape, n_gpus, is_load_weights, weight_path): """ Model """ # optimizer and loss loss = keras_utils.LOSSES[0] metrics = [keras_utils.METRICS[0]] output_activation = keras_utils.ACTIVATIONS[3] optimizer = SGD(lr=0.01) optimizer = Adam(lr=0.01, epsilon=1e-8) optimizer = Adam(lr=0.01, epsilon=1e-4) n_tc_layer = 3 expansion_factor = 5.0 / 4.0 _, n_timesteps, side_dim, _, n_channels_in = input_shape n_groups = int(n_channels_in / 128.0) print('... n_groups, expansion factor: %d, %.02f' % (n_groups, expansion_factor)) input_shape = (input_shape[1:]) t_input = Input(shape=input_shape) # (None, 20, 7, 7, 1024) tensor = t_input # timeception layers tensor = timeception.timeception_temporal_convolutions(tensor, n_tc_layer, n_groups, expansion_factor, is_dilated=True) # spatio-temporal pooling tensor = MaxLayer(axis=(1, 2, 3))(tensor) # dense layers tensor = Dropout(0.5)(tensor) tensor = Dense(512)(tensor) tensor = BatchNormalization()(tensor) tensor = LeakyReLU(alpha=0.2)(tensor) tensor = Dropout(0.25)(tensor) tensor = Dense(n_classes)(tensor) t_output = Activation(output_activation)(tensor) model = Model(input=t_input, output=t_output) if is_load_weights: model.load_weights(weight_path) if n_gpus == 1: model.compile(loss=loss, optimizer=optimizer, metrics=metrics) parallel_model = model else: parallel_model = multi_gpu_utils.multi_gpu_model(model, n_gpus) parallel_model.compile(loss=loss, optimizer=optimizer, metrics=metrics) return model, parallel_model
def ConstructModel(): K.clear_session() if _g.TYPE == '30': # ModelNet30 model = build_model_30( input_voxel_size=_g.INPUT_VOXEL_SIZE, augmenting_dropout_rate=_g.AUGMENTED_DROPOUT_RATE, conv_num_filters=_g.CONV_NUM_FILTERS, conv_filter_sizes=_g.CONV_FILTER_SIZES, conv_strides=_g.CONV_STRIDES, desc_dims=_g.DESC_DIMS) elif _g.TYPE == '64': # KNU_Simplification model = build_model_64( input_voxel_size=_g.INPUT_VOXEL_SIZE, augmenting_dropout_rate=_g.AUGMENTED_DROPOUT_RATE, conv_num_filters=_g.CONV_NUM_FILTERS, conv_filter_sizes=_g.CONV_FILTER_SIZES, conv_strides=_g.CONV_STRIDES, desc_dims=_g.DESC_DIMS) elif _g.TYPE == '64deeper': # KNU_Simplification model = build_model_64_deeper( input_voxel_size=_g.INPUT_VOXEL_SIZE, augmenting_dropout_rate=_g.AUGMENTED_DROPOUT_RATE, conv_num_filters=_g.CONV_NUM_FILTERS, conv_filter_sizes=_g.CONV_FILTER_SIZES, conv_strides=_g.CONV_STRIDES, desc_dims=_g.DESC_DIMS) elif _g.TYPE == '128': # KNU_Simplification model = build_model_128( input_voxel_size=_g.INPUT_VOXEL_SIZE, augmenting_dropout_rate=_g.AUGMENTED_DROPOUT_RATE, conv_num_filters=_g.CONV_NUM_FILTERS, conv_filter_sizes=_g.CONV_FILTER_SIZES, conv_strides=_g.CONV_STRIDES, desc_dims=_g.DESC_DIMS) sgd = optimizers.SGD(lr=_g.LEARNING_RATE, decay=_g.DECAY, momentum=_g.MOMENTUM) model = multi_gpu_model(model, gpus=2) model.compile(loss='binary_crossentropy', optimizer=sgd) if _g.LOAD_WEIGHT: model.load_weights(_g.LOAD_WEIGHT_PATH) return model
def test_multi_gpu_test_multi_io_model(self): gpus = 2 num_samples = 1000 input_dim_a = 10 input_dim_b = 5 output_dim_a = 1 output_dim_b = 2 hidden_dim = 10 epochs = 2 target_gpu_id = [0, 1] if not check_if_compatible_devices(gpus=gpus): self.skipTest('multi gpu only') with self.cached_session(): input_a = keras.Input((input_dim_a, )) input_b = keras.Input((input_dim_b, )) a = keras.layers.Dense(hidden_dim)(input_a) b = keras.layers.Dense(hidden_dim)(input_b) c = keras.layers.concatenate([a, b]) output_a = keras.layers.Dense(output_dim_a)(c) output_b = keras.layers.Dense(output_dim_b)(c) model = keras.models.Model([input_a, input_b], [output_a, output_b]) a_x = np.random.random((num_samples, input_dim_a)) b_x = np.random.random((num_samples, input_dim_b)) a_y = np.random.random((num_samples, output_dim_a)) b_y = np.random.random((num_samples, output_dim_b)) parallel_model = multi_gpu_utils.multi_gpu_model(model, gpus=gpus) parallel_model.compile(loss='mse', optimizer='rmsprop') parallel_model.fit([a_x, b_x], [a_y, b_y], epochs=epochs) parallel_model = multi_gpu_utils.multi_gpu_model( model, gpus=target_gpu_id) parallel_model.compile(loss='mse', optimizer='rmsprop') parallel_model.fit([a_x, b_x], [a_y, b_y], epochs=epochs)
def __compile_model_for_finetuning(model, n_gpus): # optimizer and loss loss = keras_utils.LOSSES[3] optimizer = Adam(lr=0.01, epsilon=1e-8) optimizer = Adam(lr=0.001, epsilon=1e-4) optimizer = SGD(lr=0.1, momentum=0.9, decay=0.0000001) optimizer = SGD(lr=0.02, momentum=0.8) if n_gpus == 1: model.compile(loss=loss, optimizer=optimizer) parallel_model = model else: parallel_model = multi_gpu_utils.multi_gpu_model(model, n_gpus) parallel_model.compile(loss=loss, optimizer=optimizer) return model, parallel_model
def generate(self): model_path = os.path.expanduser(self.model_path) assert model_path.endswith( '.h5'), 'Keras model or weights must be a .h5 file.' # Load model, or construct model and load weights. num_anchors = len(self.anchors) num_classes = len(self.class_names) is_tiny_version = num_anchors == 6 # default setting try: self.yolo_model = load_model(model_path, compile=False) except: self.yolo_model = tiny_yolo_body(Input(shape=(None,None,3)), num_anchors//2, num_classes) \ if is_tiny_version else yolo_body(Input(shape=(None,None,3)), num_anchors//3, num_classes) self.yolo_model.load_weights( self.model_path) # make sure model, anchors and classes match else: assert self.yolo_model.layers[-1].output_shape[-1] == \ num_anchors/len(self.yolo_model.output) * (num_classes + 5), \ 'Mismatch between model and given anchor and class sizes' print('{} model, anchors, and classes loaded.'.format(model_path)) # Generate colors for drawing bounding boxes. hsv_tuples = [(x / len(self.class_names), 1., 1.) for x in range(len(self.class_names))] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) np.random.seed(10101) # Fixed seed for consistent colors across runs. np.random.shuffle( self.colors) # Shuffle colors to decorrelate adjacent classes. np.random.seed(None) # Reset seed to default. # Generate output tensor targets for filtered bounding boxes. self.input_image_shape = K.placeholder(shape=(2, )) if self.gpu_num >= 2: self.yolo_model = multi_gpu_model(self.yolo_model, gpus=self.gpu_num) boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors, len(self.class_names), self.input_image_shape, score_threshold=self.score, iou_threshold=self.iou) return boxes, scores, classes
def test_multi_gpu_with_multi_input_layers(self): gpus = 2 if not check_if_compatible_devices(gpus=gpus): self.skipTest('multi gpu only') with self.cached_session(): inputs = keras.Input((4, 3)) init_state = keras.Input((3, )) outputs = keras.layers.SimpleRNN(3, return_sequences=True)( inputs, initial_state=init_state) x = [np.random.randn(2, 4, 3), np.random.randn(2, 3)] y = np.random.randn(2, 4, 3) model = keras.Model([inputs, init_state], outputs) parallel_model = multi_gpu_utils.multi_gpu_model(model, gpus=gpus) parallel_model.compile(loss='mean_squared_error', optimizer='adam') parallel_model.train_on_batch(x, y)
def test_nested_model_with_tensor_input(self): gpus = 2 input_dim = 10 shape = (input_dim, ) num_samples = 16 num_classes = 10 if not check_if_compatible_devices(gpus=gpus): self.skipTest('multi gpu only') with tf.Graph().as_default(), self.cached_session(): input_shape = (num_samples, ) + shape x_train = np.random.randint(0, 255, input_shape) y_train = np.random.randint(0, num_classes, (input_shape[0], )) y_train = np_utils.to_categorical(y_train, num_classes) x_train = x_train.astype('float32') y_train = y_train.astype('float32') dataset = tf.compat.v1.data.Dataset.from_tensor_slices( (x_train, y_train)) dataset = dataset.repeat() dataset = dataset.batch(4) iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) inputs, targets = iterator.get_next() input_tensor = keras.layers.Input(tensor=inputs) model = keras.models.Sequential() model.add(keras.layers.Dense(3, input_shape=(input_dim, ))) model.add(keras.layers.Dense(num_classes)) output = model(input_tensor) outer_model = keras.Model(input_tensor, output) parallel_model = multi_gpu_utils.multi_gpu_model(outer_model, gpus=gpus) parallel_model.compile(loss='categorical_crossentropy', optimizer=optimizer_v1.RMSprop(lr=0.0001, decay=1e-6), metrics=['accuracy'], target_tensors=[targets]) parallel_model.fit(epochs=1, steps_per_epoch=3)
def __build_model(self): input = Input(shape=self.image_shape, name="the_input") nb_filter = self.filters x = Conv2D(nb_filter, (5, 5), strides=(2, 2), kernel_initializer='he_normal', padding='same', use_bias=False, kernel_regularizer=l2(self.weight_decay))(input) # 64 + 8 * 8 = 128 x, nb_filter = _dense_block(x, 8, nb_filter, 8, None, self.weight_decay) # 128 x, nb_filter = _transition_block(x, 128, self.dropout_rate, 2, self.weight_decay) # 128 + 8 * 8 = 192 x, nb_filter = _dense_block(x, 8, nb_filter, 8, None, self.weight_decay) # 192->128 x, nb_filter = _transition_block(x, 128, self.dropout_rate, 2, self.weight_decay) # 128 + 8 * 8 = 192 x, nb_filter = _dense_block(x, 8, nb_filter, 8, None, self.weight_decay) x = BatchNormalization(axis=-1, epsilon=1.1e-5)(x) x = Activation('relu')(x) x = Permute((2, 1, 3), name='permute')(x) x = TimeDistributed(Flatten(), name='flatten')(x) y_pred = Dense(self.num_classes, name='out', activation='softmax')(x) base_model = Model(inputs=input, outputs=y_pred) labels = Input(shape=(self.maxlen,), dtype='float32', name="the_labels") input_length = Input(shape=(1,), name="input_length", dtype='int64') label_length = Input(shape=(1,), name="label_length", dtype='int64') loss_out = Lambda(_ctc_loss, output_shape=(1,), name='ctc')([labels, y_pred, input_length, label_length]) model = Model(inputs=[input, labels, input_length, label_length], outputs=loss_out) parallel_model = model if self.num_gpu > 1: parallel_model = multi_gpu_model(model, gpus=self.num_gpu) adam = Adam(self.lr) parallel_model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=adam, metrics=['accuracy']) return base_model, model, parallel_model
def get_gpu_model(input_size=None, activation=None, initial_weights=None, is_corruption=False): ResNet50v2, preprocess_input = Classifiers.get('resnet50v2') model = ResNet50v2(input_shape=input_size, weights='imagenet', classes=1, include_top=False, pooling='avg') model_inputs = model.inputs model_outsputs = model.output model_outsputs = Dense(128, activation='relu')(model_outsputs) model_outsputs = Dense(32, activation='relu')(model_outsputs) model_outsputs = Dense(1, activation=activation)(model_outsputs) model = Model(model_inputs, model_outsputs) model = multi_gpu_model(model, gpus=2) model.compile(loss=keras.losses.mean_squared_error, optimizer=keras.optimizers.Adam()) return model
def model_0(self): self.model_type = 0 bm = self.tc.board_model x = Input(shape=(self.x.shape[1],), name='board_inputs') hidden_layers = self.build_hidden_layers( x=x, layers=bm['layers'], params=bm['layer_params'], dropouts=bm['dropouts'] ) outputs = Dense(1, activation='sigmoid')(hidden_layers) model = Model(inputs=x, outputs=outputs) if bm['use_multi_gpu']: model = multi_gpu_model(model, gpus=bm['gpu_counts']) optimizer = OPTIMIZER_MAP[bm['optimizer']](**bm['optimizer_params']) model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy', auc_roc]) return model
def build_gan(self): # set if generator is going to use spectral norm image, pc, elev, azim = self.train_source.next_batch() elev_code = Input(shape=(1, ), name='elev_code') azim_code = Input(shape=(1, ), name='azim_code') pc_code = Input(shape=(self.pc_code_dim, ), name='pc_code') noise_code = Input(shape=(self.noise_dim, ), name='noise_code') model_name = "pc2pix" image_size = image.shape[1] if self.color: input_shape = (image_size, image_size, 3) else: input_shape = (image_size, image_size, 1) inputs = Input(shape=input_shape, name='image_input') if self.gen_spectral_normalization: optimizer = Adam(lr=4e-4, beta_1=0.0, beta_2=0.9) else: optimizer = Adam(lr=2e-4, beta_1=0.5, beta_2=0.999) # build discriminator # by default, discriminator uses SN if self.gpus <= 1: self.discriminator = model.discriminator( input_shape, pc_code_dim=self.pc_code_dim) if self.dw is not None: print("loading discriminator weights: ", self.dw) self.discriminator.load_weights(self.dw) self.discriminator_single = self.discriminator else: with tf.device("/cpu:0"): self.discriminator_single = model.discriminator( input_shape, pc_code_dim=self.pc_code_dim) if self.dw is not None: print("loading discriminator weights: ", self.dw) self.discriminator_single.load_weights(self.dw) self.discriminator = multi_gpu_model(self.discriminator_single, gpus=self.gpus) loss = ['binary_crossentropy', 'mae', self.elev_loss, self.azim_loss] loss_weights = [1., 10., 10., 10.] self.discriminator.compile(loss=loss, loss_weights=loss_weights, optimizer=optimizer) self.discriminator_single.summary() path = os.path.join(self.model_dir, "discriminator.png") plot_model(self.discriminator_single, to_file=path, show_shapes=True) # build generator # try SN to see if mode collapse is avoided if self.gpus <= 1: self.generator = model.generator( input_shape, noise_code=noise_code, pc_code=pc_code, elev_code=elev_code, azim_code=azim_code, spectral_normalization=self.gen_spectral_normalization, color=self.color) if self.gw is not None: print("loading generator weights: ", self.gw) self.generator.load_weights(self.gw) self.generator_single = self.generator else: with tf.device("/cpu:0"): self.generator_single = model.generator( input_shape, noise_code=noise_code, pc_code=pc_code, elev_code=elev_code, azim_code=azim_code, spectral_normalization=self.gen_spectral_normalization, color=self.color) if self.gw is not None: print("loading generator weights: ", self.gw) self.generator_single.load_weights(self.gw) self.generator = multi_gpu_model(self.generator_single, gpus=self.gpus) self.generator_single.summary() path = os.path.join(self.model_dir, "generator.png") plot_model(self.generator_single, to_file=path, show_shapes=True) self.discriminator.trainable = False if self.gen_spectral_normalization: optimizer = Adam(lr=1e-4, beta_1=0.0, beta_2=0.9) else: optimizer = Adam(lr=1e-4, beta_1=0.5, beta_2=0.999) if self.gpus <= 1: self.adversarial = Model( [noise_code, pc_code, elev_code, azim_code], self.discriminator( self.generator([noise_code, pc_code, elev_code, azim_code])), name=model_name) self.adversarial_single = self.adversarial else: with tf.device("/cpu:0"): self.adversarial_single = Model( [noise_code, pc_code, elev_code, azim_code], self.discriminator( self.generator( [noise_code, pc_code, elev_code, azim_code])), name=model_name) self.adversarial = multi_gpu_model(self.adversarial_single, gpus=self.gpus) self.adversarial.compile(loss=loss, loss_weights=loss_weights, optimizer=optimizer) self.adversarial_single.summary() path = os.path.join(self.model_dir, "adversarial.png") plot_model(self.adversarial_single, to_file=path, show_shapes=True) print("Using split file: ", self.split_file) print("1 epoch datalen: ", self.epoch_datalen) print("1 epoch train steps: ", self.train_steps) print("Using pc codes: ", self.pc_codes_filename)
def main(): load_size = 512 batch_size = 32 num_classes = 228 num_channels = 3 epochs = 10 data_augmentation = True save_dir = os.path.join(os.getcwd(), 'saved_models') model_name = 'keras_fashion_trained_model.h5' image_dim = 64 X_load_batch = np.zeros((load_size, image_dim, image_dim, num_channels), dtype='uint8') Y_load_batch = np.zeros((load_size, num_classes), dtype='uint8') # The data, split between train and test sets: #x_train = np.load('x_train_size_256.npy') #y_train = np.load('y_train_size_256.npy') #print('reading train data done') x_eval = np.load( 'D:/PrivacyPreservingDistributedDL/Filters/x_eval_all_64.npy') #y_eval = np.load('y_eval_size_256.npy') #print('reading eval data done') #x_test = np.load('D:/PrivacyPreservingDistributedDL/Filters/x_test.npy') #y_test = np.load('D:/PrivacyPreservingDistributedDL/Filters/y_test.npy') x_Test = np.load( 'D:/PrivacyPreservingDistributedDL/Filters/x_TestSub_64.npy') print('reading test done') ### creating the filename and label vectors train_labels = sps.load_npz( "D:/PrivacyPreservingDistributedDL/Filters/train_image_mat.npz" ).todense() ############### loading validation image mat eval_labels = sps.load_npz( "D:/PrivacyPreservingDistributedDL/Filters/eval_image_mat.npz" ).todense() train_labels = train_labels[0:150000, ] train_set_size = train_labels.shape[0] - 1 eval_set_size = eval_labels.shape[0] - 1 label_size = train_labels.shape[1] - 1 print('train_set_size ', train_set_size) print('eval_set_size ', eval_set_size) print('label_size ', label_size) #x_train = np.zeros(shape=(train_set_size,image_dim,image_dim,3), dtype='uint8') y_train = train_labels[0:train_set_size + 1, 1:229] y_eval = eval_labels[0:eval_set_size + 1, 1:229] #x_eval = np.zeros(shape=(eval_set_size,image_dim,image_dim,3), dtype='uint8') #y_eval = np.zeros(shape=(eval_set_size,label_size),dtype='uint8') # This will do preprocessing and realtime data augmentation: datagen = ImageDataGenerator( featurewise_center=False, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization= False, # divide inputs by std of the dataset samplewise_std_normalization=False, # divide each input by its std zca_whitening=False, # apply ZCA whitening rotation_range= 20, # randomly rotate images in the range (degrees, 0 to 180) width_shift_range= 0.1, # randomly shift images horizontally (fraction of total width) height_shift_range= 0.1, # randomly shift images vertically (fraction of total height) horizontal_flip=True, # randomly flip images vertical_flip=False) # randomly flip images # Compute quantities required for feature-wise normalization # (std, mean, and principal components if ZCA whitening is applied). # read images in batchs (start with 100000) #print('x_train shape:', x_train.shape) #print(x_Test.shape[0], 'Test samples') """ for i in range(0,x_train.shape[0]-1): image = x_train[i,:,:,:] plt.imshow(image) plt.show() time.sleep(1) """ # Convert class vectors to binary class matrices. #y_train = keras.utils.to_categorical(y_train, num_classes) #y_test = keras.utils.to_categorical(y_test, num_classes) model = Sequential() model.add( Conv2D(32, (3, 3), padding='same', input_shape=(image_dim, image_dim, 3))) #conv1 model.add(Activation('relu')) #print('x_train shape for Conv1:', x_train.shape[1:]) model.add(Conv2D(32, (3, 3))) #conv2 model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Conv2D(64, (3, 3), padding='same')) #conv3 model.add(Activation('relu')) model.add(Conv2D(64, (3, 3))) #conv4 model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(512)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(num_classes)) model.add(Activation('sigmoid')) model = multi_gpu_model(model, gpus=2) # initiate RMSprop optimizer opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6) def _sd(y_true, y_pred): return 1 - K.mean(K.square(K.abs(y_true - K.round(K.abs(y_pred))))) * 5 def _ap(y_true, y_pred): y_pred_pos = K.round(K.clip(y_pred, 0, 1)) y_pred_neg = 1 - y_pred_pos y_pos = K.round(K.clip(y_true, 0, 1)) y_neg = 1 - y_pos tp = K.sum(y_pos * y_pred_pos) #tn = K.sum(y_neg * y_pred_neg,axis=1) fp = K.sum(y_neg * y_pred_pos) fn = K.sum(y_pos * y_pred_neg) precision = tp / (tp + fp) recall = tp / (tp + fn) return (precision + recall) / 2 def _f1(y_true, y_pred): y_pred_pos = K.round(K.clip(y_pred, 0, 1)) y_pred_neg = 1 - y_pred_pos y_pos = K.round(K.clip(y_true, 0, 1)) y_neg = 1 - y_pos tp = K.sum(y_pos * y_pred_pos) #tn = K.sum(y_neg * y_pred_neg,axis=1) fp = K.sum(y_neg * y_pred_pos) fn = K.sum(y_pos * y_pred_neg) precision = tp / (tp + fp) recall = tp / (tp + fn) return 2 * (precision * recall) / (precision + recall) def _sumLabels(y_true, y_pred): return K.sum(y_true, axis=1) def abs_KL_div(y_true, y_pred): y_true = K.clip(y_true, K.epsilon(), None) y_pred = K.clip(y_pred, K.epsilon(), None) return K.sum(K.abs((y_true - y_pred) * (K.log(y_true / y_pred))), axis=-1) # Let's train the model using adam model.compile(loss='binary_crossentropy', optimizer=opt, metrics=[_ap, _f1]) #x_eval = x_eval.astype('float32') #x_eval /= 255 #x_Test /= 255 if not data_augmentation: print('Not using data augmentation.') """ model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_eval, y_eval), shuffle=True) """ else: print('Using real-time data augmentation.') for e in tqdm(range(epochs)): print('Epoch', e) n_load_batch = int(np.ceil(train_set_size / load_size)) for load_batch in tqdm(range(1, n_load_batch + 1)): if load_batch == n_load_batch: load_sizel = train_set_size - load_size * (load_batch - 1) else: load_sizel = load_size #######load the images, resize them r_start = (load_batch - 1) * load_size #r_end = r_start+load_sizel #with tf.device("/cpu:0"): for i in range(0, load_sizel): img = mpimg.imread( 'D:/dataset/inputs/{}.jpg'.format(r_start + i + 1)) X_load_batch[i, ] = img_as_ubyte( cv2.resize(img, (image_dim, image_dim), interpolation=cv2.INTER_AREA)) Y_load_batch[i, ] = y_train[r_start + i + 1, ] print('\n Epoch: ', e) model.fit_generator(datagen.flow(X_load_batch[0:load_sizel, ], Y_load_batch[0:load_sizel], batch_size=batch_size), verbose=1) model.evaluate(x_eval, y_eval, verbose=1) """ for x_batch, y_batch in datagen.flow(X_load_batch[0:load_sizel,], y_train[0:load_sizel], batch_size=batch_size): model.fit(x_batch, y_batch,shuffle=True) batches += 1 if batches >= load_sizel / batch_size: # we need to break the loop by hand because # the generator loops indefinitely break """ preds = model.predict(x_Test) preds[preds >= 0.5] = 1 preds[preds < 0.5] = 0 #print('Shape of y_test: ',y_eval.shape) #print('Shape of preds: ',preds.shape) #print ('msd accuracy =',1-np.mean(np.square(y_eval-preds))) ############## save Test matrix np.save( 'D:/PrivacyPreservingDistributedDL/Filters/y_TestSub_size_64_all.npy', preds) print("Test matrix saved.") # Save model and weights if not os.path.isdir(save_dir): os.makedirs(save_dir) model_path = os.path.join(save_dir, model_name) model.save(model_path) print('Saved trained model at %s ' % model_path) # Score trained model. #scores = model.evaluate(x_eval, y_eval, verbose=1) #print('Test loss:', scores[0]) #print('Test accuracy:', scores[1]) print(model.summary())
#------------------------------------------------------# # 创建yolo模型 #------------------------------------------------------# model_body = yolo_body((input_shape[0], input_shape[1], 3), anchors_mask, num_classes, phi=phi) if model_path != '': #------------------------------------------------------# # 载入预训练权重 #------------------------------------------------------# print('Load weights {}.'.format(model_path)) model_body.load_weights(model_path, by_name=True, skip_mismatch=True) if ngpus_per_node > 1: model = multi_gpu_model(model_body, gpus=ngpus_per_node) model = get_train_model(model, input_shape, num_classes, anchors, anchors_mask, label_smoothing) else: model = get_train_model(model_body, input_shape, num_classes, anchors, anchors_mask, label_smoothing) #---------------------------# # 读取数据集对应的txt #---------------------------# with open(train_annotation_path, encoding='utf-8') as f: train_lines = f.readlines() with open(val_annotation_path, encoding='utf-8') as f: val_lines = f.readlines() num_train = len(train_lines) num_val = len(val_lines)
def __init__(self, model, num_gpus): parallel_model = multi_gpu_model(model, num_gpus) self.__dict__.update(parallel_model.__dict__) self._model = model
if K._BACKEND == 'tensorflow': from tensorflow.python.client import device_lib def get_available_gpus(): local_device_protos = device_lib.list_local_devices() return [ x.name for x in local_device_protos if x.device_type == 'GPU' ] ngpus = len(get_available_gpus()) print("[INFO] training with {} GPUs...".format(ngpus)) import tensorflow as tf with tf.device("/cpu:0"): original_built_model = model.build(input_shape=dataset.input_shape, num_classes=dataset.num_classes) built_model = multi_gpu_model(original_built_model, gpus=ngpus) elif K._BACKEND == 'cntk': built_model = model.build(input_shape=dataset.input_shape, num_classes=dataset.num_classes) else: print("Multi GPU not available on this backend.") # import numpy as np # class_weights = np.ones(dataset.num_classes) # model compilation with loss and accuracy def custom_loss(y_true, y_pred): final_loss = 0. if dataset.enable_boundingbox: obj_true = y_true[..., dataset.num_classes]
if G <= 1: print("[INFO] training with 1 GPU...") model = MiniGoogLeNet.build(width=32, height=32, depth=3, classes=10) # otherwise, we are compiling using multiple GPUs else: print("[INFO] training with {} GPUs...".format(G)) # we'll store a copy of the model on *every* GPU and then combine # the results from the gradient updates on the CPU with tf.device("/cpu:0"): # initialize the model model = MiniGoogLeNet.build(width=32, height=32, depth=3, classes=10) # make the model parallel model = multi_gpu_model(model, gpus=G) # initialize the optimizer and model print("[INFO] compiling model...") opt = SGD(lr=INIT_LR, momentum=0.9) model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"]) # train the network print("[INFO] training network...") H = model.fit_generator(aug.flow(trainX, trainY, batch_size=64 * G), validation_data=(testX, testY), steps_per_epoch=len(trainX) // (64 * G), epochs=NUM_EPOCHS, callbacks=callbacks,
return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1)) \ -K.sum((1 - alpha) * K.pow(pt_0, gamma) * K.log(1. - pt_0)) return binary_focal_loss_fixed # def false_rates(y_true, y_pred): # false_neg = ... # false_pos = ... # return { # 'false_neg': false_neg, # 'false_pos': false_pos, # } parallel_model = multi_gpu_model(model, gpus=G) if FL: parallel_model.compile(optimizer='adam', loss=binary_focal_loss(gamma=0., alpha=.5), metrics=['accuracy']) else: parallel_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) ########################################################### #Data generator# ########################################################### # Data Generator for training (always use json files) train_datagen = MB.ImageDataGenerator( rescale=1. / 255, # shear_range=0.2,
def __load_model_mlp_classifier_video_graph(centroids, n_classes, input_shape_x, n_gpus, is_load_weights, weight_path): """ Model """ # optimizer and loss loss = keras_utils.LOSSES[0] metrics = [keras_utils.METRICS[0]] output_activation = keras_utils.ACTIVATIONS[3] optimizer = SGD(lr=0.01) optimizer = Adam(lr=0.01, epsilon=1e-8) optimizer = Adam(lr=0.01, epsilon=1e-4) expansion_factor = 5.0 / 4.0 n_groups = int(input_shape_x[-1] / 128.0) # per-layer kernel size and max pooling for centroids and timesteps n_graph_layers = 2 # time kernel t_kernel_size = 7 t_max_size = 3 # node kernel c_kernel_size = 7 c_max_size = 3 c_avg_size = 4 # space kernel s_kernel_size = 2 s_kernel_size = 1 n_centroids, _ = centroids.shape _, n_timesteps, side_dim, side_dim, n_channels_in = input_shape_x t_input_x = Input(shape=(n_timesteps, side_dim, side_dim, n_channels_in), name='input_x') # (None, 64, 1024) t_input_c = Input(tensor=tf.constant(centroids, dtype=tf.float32), name='input_n') # (1, 100, 1024) tensor = t_input_x # spatial convolution n_channels_in = 1024 tensor = Conv3D(n_channels_in, (1, s_kernel_size, s_kernel_size), padding='VALID', name='conv_s')(tensor) tensor = BatchNormalization()(tensor) tensor = LeakyReLU(alpha=0.2)(tensor) # pool over space tensor = MaxLayer(axis=(2, 3), is_keep_dim=True, name='global_pool_s')(tensor) # (None, 64, 7, 7, 1024) # centroid-attention tensor = videograph.node_attention( tensor, t_input_c, n_channels_in, activation_type='relu') # (N, 100, 64, 7, 7, 1024) # graph embeddings tensor = videograph.graph_embedding(tensor, n_graph_layers, c_avg_size, c_kernel_size, t_kernel_size, c_max_size, t_max_size) # (N, 100, 64, 7, 7, 1024) # centroid pooling tensor = MeanLayer(axis=(1, ), name='global_pool_n')(tensor) # temporal pooling tensor = MaxLayer(axis=(1, 2, 3), name='global_pool_t')(tensor) # activity classification tensor = Dropout(0.25)(tensor) tensor = Dense(512)(tensor) tensor = BatchNormalization()(tensor) tensor = LeakyReLU(alpha=0.2)(tensor) tensor = Dropout(0.25)(tensor) tensor = Dense(n_classes)(tensor) t_output = Activation(output_activation)(tensor) model = Model(input=[t_input_x, t_input_c], output=t_output) if is_load_weights: model.load_weights(weight_path) if n_gpus == 1: model.compile(loss=loss, optimizer=optimizer, metrics=metrics) parallel_model = model else: parallel_model = multi_gpu_utils.multi_gpu_model(model, n_gpus) parallel_model.compile(loss=loss, optimizer=optimizer, metrics=metrics) return model, parallel_model
def main( net_name, A_name_list=['A_downstream', 'A_upstream', 'A_neighbors'], run_name=None, flatten_A=False, val_split_proportion=.1, test_split_proportion=.1, loss_function='mse', batch_size=4, time_window=150, average_interval=None, max_time=3599, epochs=50, no_liu=False, attn_dim=64, attn_heads=4, attn_depth=2, attn_residual_connection=False, gat_highway_connection=False, layer_norm=False, rnn_dim=64, stateful_rnn=False, dense_dim=64, dropout_rate=.3, attn_dropout=0., seed=123, per_step_metrics=False, old_model=False, num_gpus=1, no_plots=False, use_gcn=False, gcn_filter_type='localpool', gcn_chebyshev_degree=2, ): if use_gcn: assert 'A_eye' in A_name_list tf.set_random_seed(seed) np.random.seed(seed) net_dir = os.path.join('data', 'networks', net_name) sn = SumoNetwork.from_preexisting_directory(net_dir) lanes = sn.lanes_with_detectors() num_lanes = len(lanes) data_dir = os.path.join(net_dir, 'preprocessed_data') x_feature_subset = [ 'e1_0/occupancy', 'e1_0/speed', 'e1_1/occupancy', 'e1_1/speed', 'liu_estimated_veh', 'green' ] y_feature_subset = ['e2_0/nVehSeen', 'e2_0/maxJamLengthInVehicles'] if no_liu: x_feature_subset.remove('liu_estimated_veh') write_dir = os.path.join(net_dir, 'models') if not os.path.isdir(write_dir): os.makedirs(write_dir) with tf.device('/cpu:0'): batch_gen = TFBatcher(data_dir, batch_size, time_window, average_interval=average_interval, val_proportion=val_split_proportion, shuffle=True, A_name_list=A_name_list, x_feature_subset=x_feature_subset, y_feature_subset=y_feature_subset, flatten_A=flatten_A, max_time=max_time, gpu_prefetch=True) Xtens = batch_gen.X Atens = tf.cast(batch_gen.A, tf.float32) # X dimensions: timesteps x lanes x feature dim X_in = Input(batch_shape=(None, None, num_lanes, len(x_feature_subset)), name='X', tensor=Xtens) # A dimensions: timesteps x num edge types x lanes x lanes if not flatten_A: num_edge_types = len(A_name_list) else: num_edge_types = 1 A_in = Input(batch_shape=(None, None, num_edge_types, num_lanes, num_lanes), name='A', tensor=Atens) attn_dim = iterfy(attn_dim) * attn_depth attn_heads = iterfy(attn_heads) * attn_depth def make_model(X_in, A_in): if use_gcn: X = gcn_encoder(X_in, A_in, gcn_filter_type, attn_dim, dropout_rate, dense_dim, cheb_polynomial_degree=gcn_chebyshev_degree, layer_norm=layer_norm) else: X = gat_encoder(X_in, A_in, attn_dim, attn_heads, dropout_rate, attn_dropout, gat_activation='relu', dense_dim=dense_dim, gat_highway_connection=gat_highway_connection, layer_norm=layer_norm, residual_connection=attn_residual_connection) if stateful_rnn: reshape_batch_size = batch_size else: reshape_batch_size = None reshaped_1 = ReshapeFoldInLanes(batch_size=reshape_batch_size)(X) encoded = rnn_encode(reshaped_1, [rnn_dim], 'GRU', stateful=stateful_rnn) decoded = rnn_attn_decode('GRU', rnn_dim, encoded, stateful=stateful_rnn) reshaped_decoded = ReshapeUnfoldLanes(num_lanes)(decoded) output = TimeDistributed( Dense(len(y_feature_subset), activation='relu'))(reshaped_decoded) outputs = output_tensor_slices(output, y_feature_subset) model = Model([X_in, A_in], outputs) return model if num_gpus > 1: with tf.device('/cpu:0'): base_model = make_model(X_in, A_in) model = multi_gpu_model(base_model, num_gpus) else: base_model = make_model(X_in, A_in) model = base_model Ytens = batch_gen.Y_slices if loss_function.lower() == 'mse': losses = ['mse', negative_masked_mse] metrics = [ negative_masked_mae, negative_masked_huber, negative_masked_mape ] elif loss_function.lower() == 'mae': losses = ['mae', negative_masked_mae] metrics = [ negative_masked_mse, negative_masked_huber, negative_masked_mape ] elif loss_function.lower() == 'huber': losses = [huber, negative_masked_huber] metrics = [ negative_masked_mse, negative_masked_mae, negative_masked_mape ] model.compile( optimizer='Adam', loss=losses, metrics=metrics, target_tensors=Ytens, ) model.summary(print_fn=_logger.info) verbose = 1 if val_split_proportion > 0: do_validation = True else: do_validation = False callback_list = make_callbacks(model, write_dir, do_validation, run_name, base_model) # record hyperparameters hyperparams = dict( net_name=net_name, A_name_list=A_name_list, no_liu=no_liu, x_feature_subset=x_feature_subset, y_feature_subset=y_feature_subset, flatten_A=flatten_A, param_count=model.count_params(), val_split_proportion=val_split_proportion, test_split_proportion=test_split_proportion, loss_function=loss_function, batch_size=batch_size, time_window=time_window, average_interval=average_interval, max_time=max_time, epochs=epochs, attn_dim=attn_dim, attn_depth=attn_depth, attn_residual_connection=attn_residual_connection, layer_norm=layer_norm, gat_highway_connection=gat_highway_connection, attn_heads=attn_heads, rnn_dim=rnn_dim, stateful_rnn=stateful_rnn, dense_dim=dense_dim, dropout_rate=dropout_rate, attn_dropout=attn_dropout, seed=seed, num_gpus=num_gpus, use_gcn=use_gcn, gcn_filter_type=gcn_filter_type, gcn_chebyshev_degree=gcn_chebyshev_degree, ) logdir = get_logging_dir(callback_list) if not os.path.exists(logdir): os.makedirs(logdir) with open(os.path.join(logdir, 'params.json'), 'w') as f: json.dump(hyperparams, f) _logger.info('Run dir: %s', logdir) # Guess at the number of steps per simulation. This only affects Keras's # progress bar per training epoch so it can be wrong. if per_step_metrics: timesteps_per_simulation = 3600 steps = batch_gen.num_train_batches * math.ceil( timesteps_per_simulation / time_window) else: steps = batch_gen.num_train_batches set_callback_params(callback_list, epochs, batch_size, verbose, do_validation, model, steps) fit_loop_init(model, callback_list, batch_gen) with K.get_session().as_default(): fit_loop_tf(model, callback_list, batch_gen, epochs, per_step_metrics=per_step_metrics) predict_eval_tf(model, get_logging_dir(callback_list), batch_gen, plot_results=not (no_plots)) if hasattr(model, 'history'): return model.history #pylint: disable=no-member