def testCompositeTypeSpecArgWithoutDtype(self): for assign_variant_dtype in [False, True]: # Create a Keras Input spec = TwoTensorsSpecNoOneDtype( (1, 2, 3), tf.float32, (1, 2, 3), tf.int64, assign_variant_dtype=assign_variant_dtype, ) x = input_layer_lib.Input(type_spec=spec) def lambda_fn(tensors): return tf.cast(tensors.x, tf.float64) + tf.cast( tensors.y, tf.float64) # Verify you can construct and use a model w/ this input model = functional.Functional(x, core.Lambda(lambda_fn)(x)) # And that the model works two_tensors = TwoTensors( tf.ones((1, 2, 3)) * 2.0, tf.ones(1, 2, 3)) self.assertAllEqual(model(two_tensors), lambda_fn(two_tensors)) # Test serialization / deserialization model = functional.Functional.from_config(model.get_config()) self.assertAllEqual(model(two_tensors), lambda_fn(two_tensors)) model = model_config.model_from_json(model.to_json()) self.assertAllEqual(model(two_tensors), lambda_fn(two_tensors))
def convnet_simple_lion_keras(image_dims): model = keras.models.Sequential() model.add(core.Lambda(lambda x: (x / 255.0) - 0.5, input_shape=image_dims)) model.add( convolutional.Conv2D(32, (3, 3), activation='relu', padding='same')) model.add(convolutional.MaxPooling2D(pool_size=(2, 2))) model.add( convolutional.Conv2D(64, (3, 3), activation='relu', padding='same')) model.add(convolutional.MaxPooling2D(pool_size=(2, 2))) model.add( convolutional.Conv2D(128, (3, 3), activation='relu', padding='same')) model.add(convolutional.MaxPooling2D(pool_size=(2, 2))) model.add(core.Flatten()) model.add(core.Dense(512, activation='relu')) model.add(core.Dropout(0.5)) model.add(core.Dense(1024, activation='relu')) model.add(core.Dropout(0.5)) model.add(core.Dense(6, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc']) return model
def test_Bidirectional_ragged_input(self, merge_mode): np.random.seed(100) rnn = keras.layers.LSTM units = 3 x = tf.ragged.constant( [ [[1, 1, 1], [1, 1, 1]], [[1, 1, 1]], [[1, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1]], [[1, 1, 1], [1, 1, 1], [1, 1, 1]], ], ragged_rank=1, ) x = tf.cast(x, "float32") # pylint: disable=g-long-lambda with self.cached_session(): if merge_mode == "ave": merge_func = lambda y, y_rev: (y + y_rev) / 2 elif merge_mode == "concat": merge_func = lambda y, y_rev: tf.concat((y, y_rev), axis=-1) elif merge_mode == "mul": merge_func = lambda y, y_rev: (y * y_rev) # pylint: enable=g-long-lambda inputs = keras.Input(shape=(None, 3), batch_size=4, dtype="float32", ragged=True) layer = keras.layers.Bidirectional(rnn(units, return_sequences=True), merge_mode=merge_mode) f_merged = keras.backend.function([inputs], layer(inputs)) f_forward = keras.backend.function([inputs], layer.forward_layer(inputs)) # TODO(kaftan): after KerasTensor refactor TF op layers should work # with many composite tensors, and this shouldn't need to be a lambda # layer. reverse_layer = core.Lambda(tf.reverse, arguments=dict(axis=[1])) f_backward = keras.backend.function( [inputs], reverse_layer(layer.backward_layer(inputs))) y_merged = f_merged(x) y_expected = merge_func( convert_ragged_tensor_value(f_forward(x)), convert_ragged_tensor_value(f_backward(x)), ) y_merged = convert_ragged_tensor_value(y_merged) self.assertAllClose(y_merged.flat_values, y_expected.flat_values)
def test_fixed_loss_scaling(self, strategy_fn): # Note: We do not test mixed precision in this method, only loss scaling. loss_scale = 8.0 batch_size = 4 with strategy_fn().scope(): x = layers.Input(shape=(1,), batch_size=batch_size) layer = mp_test_util.MultiplyLayer() y = layer(x) # The gradient of 'y' at this point is 1. With loss scaling, the gradient # is 'loss_scale'. We divide by the batch size since the loss is averaged # across batch elements. expected_gradient = loss_scale / batch_size identity_with_grad_check_fn = ( mp_test_util.create_identity_with_grad_check_fn( [expected_gradient] ) ) y = core.Lambda(identity_with_grad_check_fn)(y) model = models.Model(inputs=x, outputs=y) def loss_fn(y_true, y_pred): del y_true return tf.reduce_mean(y_pred) opt = gradient_descent.SGD(1.0) opt = loss_scale_optimizer.LossScaleOptimizer( opt, dynamic=False, initial_scale=loss_scale ) model.compile( opt, loss=loss_fn, run_eagerly=test_utils.should_run_eagerly() ) self.assertEqual(backend.eval(layer.v), 1) x = np.ones((batch_size, 1)) y = np.ones((batch_size, 1)) dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(batch_size) model.fit(dataset) # Variable starts at 1, and should have gradient of 1 subtracted from it. expected = 0 self.assertEqual(backend.eval(layer.v), expected)
def test_adapt_preprocessing_stage_with_dict_input(self): x0 = Input(shape=(3, ), name='x0') x1 = Input(shape=(4, ), name='x1') x2 = Input(shape=(3, 5), name='x2') # dimension will mismatch if x1 incorrectly placed. x1_sum = core.Lambda( lambda x: tf.reduce_sum(x, axis=-1, keepdims=True))(x1) x2_sum = core.Lambda(lambda x: tf.reduce_sum(x, axis=-1))(x2) l0 = PLMerge() y = l0([x0, x1_sum]) l1 = PLMerge() y = l1([y, x2_sum]) l2 = PLSplit() z, y = l2(y) stage = preprocessing_stage.FunctionalPreprocessingStage( { 'x2': x2, 'x0': x0, 'x1': x1 }, [y, z]) stage.compile() # Test with dict of NumPy array one_array0 = np.ones((4, 3), dtype='float32') one_array1 = np.ones((4, 4), dtype='float32') one_array2 = np.ones((4, 3, 5), dtype='float32') adapt_data = {'x1': one_array1, 'x0': one_array0, 'x2': one_array2} stage.adapt(adapt_data) self.assertEqual(l0.adapt_count, 1) self.assertEqual(l1.adapt_count, 1) self.assertEqual(l2.adapt_count, 1) self.assertLessEqual(l0.adapt_time, l1.adapt_time) self.assertLessEqual(l1.adapt_time, l2.adapt_time) # Check call y, z = stage({ 'x1': tf.constant(one_array1), 'x2': tf.constant(one_array2), 'x0': tf.constant(one_array0) }) self.assertAllClose(y, np.zeros((4, 3), dtype='float32') + 9.) self.assertAllClose(z, np.zeros((4, 3), dtype='float32') + 11.) # Test with list of NumPy array adapt_data = [one_array0, one_array1, one_array2] stage.adapt(adapt_data) self.assertEqual(l0.adapt_count, 2) self.assertEqual(l1.adapt_count, 2) self.assertEqual(l2.adapt_count, 2) self.assertLessEqual(l0.adapt_time, l1.adapt_time) self.assertLessEqual(l1.adapt_time, l2.adapt_time) # Test with flattened dataset adapt_data = tf.data.Dataset.from_tensor_slices( (one_array0, one_array1, one_array2)) adapt_data = adapt_data.batch(2) # 5 batches of 2 samples stage.adapt(adapt_data) self.assertEqual(l0.adapt_count, 3) self.assertEqual(l1.adapt_count, 3) self.assertEqual(l2.adapt_count, 3) self.assertLessEqual(l0.adapt_time, l1.adapt_time) self.assertLessEqual(l1.adapt_time, l2.adapt_time) # Test with dataset in dict shape adapt_data = tf.data.Dataset.from_tensor_slices({ 'x0': one_array0, 'x2': one_array2, 'x1': one_array1 }) adapt_data = adapt_data.batch(2) # 5 batches of 2 samples stage.adapt(adapt_data) self.assertEqual(l0.adapt_count, 4) self.assertEqual(l1.adapt_count, 4) self.assertEqual(l2.adapt_count, 4) self.assertLessEqual(l0.adapt_time, l1.adapt_time) self.assertLessEqual(l1.adapt_time, l2.adapt_time) # Test error with bad data with self.assertRaisesRegex(ValueError, 'requires a '): stage.adapt(None)
def test_dynamic_loss_scaling(self, strategy_fn, get_config=False): strategy = strategy_fn() initial_loss_scale = 2.0 batch_size = 4 expected_gradient = backend.variable( [initial_loss_scale / batch_size], dtype=tf.float16 ) # If this variable is set to True, the model below will have NaN gradients have_nan_gradients = backend.variable(False, dtype=tf.bool) with strategy.scope(): opt = gradient_descent.SGD(1.0) opt = loss_scale_optimizer.LossScaleOptimizer( opt, initial_scale=initial_loss_scale, dynamic_growth_steps=2 ) with policy.policy_scope("mixed_float16"): x = layers.Input( shape=(1,), batch_size=batch_size, dtype=tf.float16 ) layer = mp_test_util.MultiplyLayer(assert_type=tf.float16) y = layer(x) identity_with_nan_grads = ( mp_test_util.create_identity_with_nan_gradients_fn( have_nan_gradients ) ) y = core.Lambda(identity_with_nan_grads)(y) identity_with_grad_check_fn = ( mp_test_util.create_identity_with_grad_check_fn( expected_dtype=tf.float16, expected_gradient=expected_gradient, ) ) y = core.Lambda(identity_with_grad_check_fn)(y) model = models.Model(inputs=x, outputs=y) if get_config: config = model.get_config() model = model.__class__.from_config( config, custom_objects={ "MultiplyLayer": mp_test_util.MultiplyLayer }, ) (layer,) = ( layer for layer in model.layers if isinstance(layer, mp_test_util.MultiplyLayer) ) def loss_fn(y_true, y_pred): del y_true return tf.reduce_mean(y_pred) model.compile( opt, loss=loss_fn, run_eagerly=test_utils.should_run_eagerly(), ) self.assertEqual(backend.eval(layer.v), 1) x = np.ones((batch_size, 1)) y = np.ones((batch_size, 1)) dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(batch_size) model.fit(dataset) # The variables starts with 1 and has a gradient of 1, so will go down by 1 # each step. self.assertEqual(backend.eval(layer.v), 0) model.fit(dataset) self.assertEqual(backend.eval(layer.v), -1) # There have been two steps without NaNs, so the loss scale will double backend.set_value( expected_gradient, backend.get_value(expected_gradient * 2) ) model.fit(dataset) self.assertEqual(backend.eval(layer.v), -2) # Next test with NaN gradients. backend.set_value(have_nan_gradients, True) model.fit(dataset) # Variable should not be updated self.assertEqual(backend.eval(layer.v), -2) # Test with finite gradients again backend.set_value(have_nan_gradients, False) # The loss scale will be halved due to the NaNs, so the gradient will also # be halved backend.set_value( expected_gradient, backend.get_value(expected_gradient / 2) ) model.fit(dataset) self.assertEqual(backend.eval(layer.v), -3)
def test_advanced_model(self, strategy_fn, use_loss_scaling=False): # The advanced model tests mixed-precision-related features that would occur # in a resnet50 model. It tests a model that has: # * Multiple layers, some which use auto-cast variables and some which do # not # * Regularization on some variables and not others. # * A fixed loss scale (if use_loss_scaling is True) strategy = strategy_fn() if use_loss_scaling: loss_scale = 8.0 learning_rate = 2**-14 with strategy.scope(): with policy.policy_scope(policy.Policy("mixed_float16")): x = layers.Input(shape=(1,), batch_size=2) layer1 = mp_test_util.MultiplyLayer( assert_type=tf.float16, regularizer=mp_test_util.IdentityRegularizer(), use_operator=True, ) layer2 = mp_test_util.MultiplyLayerWithoutAutoCast( assert_type=tf.float16, use_operator=True ) layer3 = mp_test_util.MultiplyLayer( assert_type=tf.float16, use_operator=False ) layer4 = mp_test_util.MultiplyLayerWithoutAutoCast( assert_type=tf.float16, regularizer=mp_test_util.IdentityRegularizer(), use_operator=False, ) y = layer1(x) y = layer2(y) y = layer3(y) y = layer4(y) if use_loss_scaling: # The gradient of 'y' at this point is 1. With loss scaling, the # gradient is 'loss_scale'. We divide by the batch size of 2 since the # loss is averaged across batch elements. expected_gradient = loss_scale / 2 identity_with_grad_check_fn = ( mp_test_util.create_identity_with_grad_check_fn( expected_dtype=tf.float16, expected_gradient=[expected_gradient], ) ) y = core.Lambda(identity_with_grad_check_fn)(y) model = models.Model(inputs=x, outputs=y) def loss_fn(y_true, y_pred): del y_true return tf.reduce_mean(y_pred) opt = gradient_descent.SGD(learning_rate) if use_loss_scaling: opt = loss_scale_optimizer.LossScaleOptimizer( opt, dynamic=False, initial_scale=loss_scale ) model.compile( opt, loss=loss_fn, run_eagerly=test_utils.should_run_eagerly(), ) x = np.ones((2, 1)) y = np.ones((2, 1)) dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(2) model.fit(dataset) for layer in (layer1, layer2, layer3, layer4): if layer.losses: # Layer has weight regularizer self.assertEqual(backend.eval(layer.v), 1 - 2 * learning_rate) else: # Layer does not have weight regularizer self.assertEqual(backend.eval(layer.v), 1 - learning_rate)
def build_model_200(self, hyperparameters): seed = None np.random.seed(None) model = None num_hiddenunits = hyperparameters['num_hiddenunits'] hidden_layers = hyperparameters['num_hiddenlayers'] drop = hyperparameters['dropout'] act = hyperparameters['non_linearity'] #bias_initializer = 'zeros' kernel_initializer = VarianceScaling(scale=1.0, mode='fan_in', distribution='normal', seed=None) kernel_regularizer = hyperparameters['weight_decay_type']( hyperparameters['weight_decay']) activity_regularizer = l2(0.00) bnorm_kwargs = { 'axis': -1, 'momentum': 0.99, 'epsilon': 0.001, 'center': True, 'scale': True, 'beta_initializer': 'zeros', 'gamma_initializer': 'ones', 'moving_mean_initializer': 'zeros', 'moving_variance_initializer': 'ones', 'beta_regularizer': None, 'gamma_regularizer': None, 'beta_constraint': None, 'gamma_constraint': None } dense_kwargs = { 'kernel_initializer': kernel_initializer, 'kernel_regularizer': kernel_regularizer } def output_bias(shape, dtype=None): return class_weights def dummyscore_bias(shape, dtype=None): return dummyscore_distro def dummytots_bias(shape, dtype=None): return dummytots_distro def dummyhomescore_bias(shape, dtype=None): return dummyhomescore_distro def dummyawayscore_bias(shape, dtype=None): return dummyawayscore_distro def scores_bias_f(shape, dtype=None): return scores_bias def winner_bias(shape, dtype=None): return class_weights_money def batchcorrelate(ia, ib): assert ia.shape[0] == ib.shape[0] out = [] for n in range(ia.shape[0]): a = ia[n, :] b = ib[n, :] out.append(correlate(a, b)) return np.array(out) def batchconvolve(ia, ib): assert ia.shape[0] == ib.shape[0] out = [] for n in range(ia.shape[0]): a = ia[n, :] b = ib[n, :] out.append(convolve(a, b)) return np.array(out) def k_batchcorrelate(inp_list): out = K.tf.py_func(batchcorrelate, inp_list, K.tf.float32, stateful=False) out.set_shape( (None, inp_list[0].shape[-1] + inp_list[1].shape[-1] - 1)) return out def k_batchcorrelate_shape(input_shape): return (None, input_shape[0][-1] + input_shape[1][-1] - 1) def k_batchconvolve(inp_list): out = K.tf.py_func(batchconvolve, inp_list, K.tf.float32, stateful=False) out.set_shape( (None, inp_list[0].shape[-1] + inp_list[1].shape[-1] - 1)) return out def k_batchconvolve_shape(input_shape): return (None, input_shape[0][-1] + input_shape[1][-1] - 1) sys.setrecursionlimit(10000) num_hoao_inputfeatures = hyperparameters['num_inputfeatures'] try: assert hidden_layers % 2 != 0 except: print('ERROR: number of hidden layers must be odd') raise ########################################################### ### NEURAL ### ### NETWORK ### VisibleLayer = {} for x in ['ho', 'ao']: VisibleLayer[x] = Input(shape=(num_hoao_inputfeatures, ), name=x + '_input') HiddenCell = {} HiddenCell['layers'], HiddenCell['tensors'] = {}, {} for n in range(hidden_layers): nn = str(n + 1) HiddenCell['layers']['rep' + nn] = Dense(num_hiddenunits, name='rep' + nn, **dense_kwargs) #HiddenCell['layers']['rep'+nn+'_norm'] = normalization.BatchNormalization(**bnorm_kwargs) HiddenCell['layers']['rep' + nn + '_act'] = Activation(act) HiddenCell['layers']['rep' + nn + '_drop'] = Dropout(drop) if n > 0 and n % 2 == 0: HiddenCell['layers']['rep' + nn + '_add'] = mergeadd() for x in ['ho', 'ao']: if n == 0: inp = VisibleLayer[x] elif n < 3 or n % 2 == 0: inp = HiddenCell['tensors'][x + '_rep' + str(n) + '_drop'] else: inp = HiddenCell['tensors'][x + '_rep' + str(n) + '_add'] HiddenCell['tensors'][x + '_rep' + nn] = HiddenCell['layers']['rep' + nn](inp) #HiddenCell['tensors'][x+'_rep'+nn+'_norm'] = HiddenCell['layers']['rep'+nn+'_norm'](HiddenCell['tensors'][x+'_rep'+nn]) #HiddenCell['tensors'][x+'_rep'+nn+'_act'] = HiddenCell['layers']['rep'+nn+'_act'](HiddenCell['tensors'][x+'_rep'+nn+'_norm']) HiddenCell['tensors'][ x + '_rep' + nn + '_act'] = HiddenCell['layers']['rep' + nn + '_act']( HiddenCell['tensors'][ x + '_rep' + nn]) ### USE THIS FOR NO BATCH NORM HiddenCell['tensors'][ x + '_rep' + nn + '_drop'] = HiddenCell['layers']['rep' + nn + '_drop']( HiddenCell['tensors'] [x + '_rep' + nn + '_act']) ### USE THIS FOR DROPOUT if n == 2: HiddenCell['tensors'][ x + '_rep' + nn + '_add'] = HiddenCell['layers']['rep' + nn + '_add']([ HiddenCell['tensors'][x + '_rep' + str(n - 1) + '_drop'], HiddenCell['tensors'][x + '_rep' + nn + '_drop'] ]) elif n > 2 and n % 2 == 0: HiddenCell['tensors'][ x + '_rep' + nn + '_add'] = HiddenCell['layers']['rep' + nn + '_add']([ HiddenCell['tensors'][x + '_rep' + str(n - 1) + '_add'], HiddenCell['tensors'][x + '_rep' + nn + '_drop'] ]) if hidden_layers < 4: ho_repfin_drop = HiddenCell['tensors']['ho_rep' + str(hidden_layers) + '_drop'] ao_repfin_drop = HiddenCell['tensors']['ao_rep' + str(hidden_layers) + '_drop'] else: ho_repfin_drop = HiddenCell['tensors']['ho_rep' + str(hidden_layers) + '_add'] ao_repfin_drop = HiddenCell['tensors']['ao_rep' + str(hidden_layers) + '_add'] output_score_layer = Dense(26, activation='softmax', bias_initializer='zeros', name='dummyscore') output_dummyhomescore = output_score_layer(ho_repfin_drop) output_dummyawayscore = output_score_layer(ao_repfin_drop) batchcorrelate_layer = core.Lambda(k_batchcorrelate, output_shape=k_batchcorrelate_shape, name='dummydif_pre') output_dummyruns_pre = batchcorrelate_layer( [output_dummyhomescore, output_dummyawayscore]) output_dummyruns_mid = Dense(256, activation=act, name='dummydif_mid')(output_dummyruns_pre) output_dummyruns = Dense(51, activation='softmax', name='dummydif')(output_dummyruns_mid) batchconvolve_layer = core.Lambda(k_batchconvolve, output_shape=k_batchconvolve_shape, name='dummytots') output_dummytots = batchconvolve_layer( [output_dummyhomescore, output_dummyawayscore]) output_winner_layer = Dense(2, activation='linear', name='winner_pre', trainable=False) output_winner_pre = output_winner_layer(output_dummyruns_pre) output_winner_mid = Dense(48, activation=act, name='winner_mid')(output_winner_pre) output_winner = Dense(2, activation='softmax', name='winner')(output_winner_mid) ########################################################### model = Model(inputs=[VisibleLayer['ho'], VisibleLayer['ao']], outputs=[ output_dummyhomescore, output_dummyawayscore, output_dummyruns, output_dummytots, output_winner ]) model.compile(loss=hyperparameters['loss'], optimizer=hyperparameters['learning_algo']( lr=hyperparameters['learning_rate'], decay=0.001, momentum=hyperparameters['momentum']), metrics=['accuracy'], loss_weights=hyperparameters['loss_weights']) # Set weights for untrainable layers ones = np.ones(25).reshape(-1, 1) zeros = np.zeros(25).reshape(-1, 1) bottom = np.concatenate([ones, zeros], axis=1) top = np.concatenate([zeros, ones], axis=1) middle = np.array([0, 0]).reshape(1, 2) winner_weights = np.concatenate([top, middle, bottom], axis=0) winner_biases = middle.reshape(2, ) output_winner_layer.set_weights([winner_weights, winner_biases]) return model
merge_flat = keras.layers.concatenate([before_merge_rgb, before_merge_nir]) print merge_flat._keras_shape soft_flat = core.Flatten()(soft_dense) print soft_flat._keras_shape repeat = core.RepeatVector(before_merge_nir._keras_shape[1])(soft_flat) print repeat._keras_shape repeat_flat = core.Flatten()(repeat) print repeat_flat._keras_shape reshape_now = keras.layers.multiply([repeat_flat, merge_flat]) reshape_now = core.Reshape((2, -1))(reshape_now) outshape = reshape_now._keras_shape layer1 = core.Lambda(lambda x: x[:, 0:1, :], output_shape=lambda x: (outshape[0], 1, outshape[2]))(reshape_now) layer2 = core.Lambda(lambda x: x[:, 1:2, :], output_shape=lambda x: (outshape[0], 1, outshape[2]))(reshape_now) #------------------------------------------------------------------------------------------------------------------------------- # CONACTENATE the ends of RGB & NIR merge_rgb_nir = keras.layers.add([layer1, layer2]) print merge_rgb_nir._keras_shape #merge_rgb_nir = keras.layers.merge([soft_dense,before_merge_rgb,before_merge_nir], mode=scalarmult) merge_rgb_nir = core.Flatten()(merge_rgb_nir) merge_rgb_nir = core.Reshape( (inshape[1], inshape[2], inshape[3]))(merge_rgb_nir) # DECONVOLUTION Layers