def build_cnn_model(n_features: int, n_classes: int): """Build the P1FP(C) model using Keras.""" model = keras.Sequential() model.add(layers.Reshape((1, n_features, 1), input_shape=(n_features, ), name="input")) model.add(layers.Conv2D( 128, 12, activation="relu", kernel_regularizer="l2", padding="same")) model.add(layers.MaxPool2D(10, padding="same")) model.add(layers.Lambda(nn.local_response_normalization)) model.add(layers.Conv2D( 128, 12, activation="relu", kernel_regularizer="l2", padding="same")) model.add(layers.MaxPool2D(10, padding="same")) model.add(layers.Lambda(nn.local_response_normalization)) # It is flattened for the computation regardless, however tflearn retained # the flattened result whereas keras does not model.add(layers.Flatten()) model.add(layers.Dense(256, activation="tanh")) model.add(layers.Dropout(rate=0.2)) model.add(layers.Dense(n_classes, activation="softmax", name="target")) learning_rate = keras.optimizers.schedules.ExponentialDecay( 0.05, decay_steps=1000, decay_rate=0.96) model.compile( optimizer=keras.optimizers.SGD(learning_rate=learning_rate), loss="categorical_crossentropy", metrics=[keras.metrics.TopKCategoricalAccuracy(3), "accuracy"]) return model
def classification_dnn(input_features): # Creating the initializer initializer = tf.compat.v1.keras.initializers.random_normal(0.0, 0.01) model = tf.compat.v1.keras.Sequential([ keraslayers.Dense(1024, activation=tf.nn.tanh, input_shape=(input_features, ), kernel_initializer=initializer, bias_initializer='zeros'), keraslayers.Dense(512, activation=tf.nn.tanh, kernel_initializer=initializer, bias_initializer='zeros'), keraslayers.Dense(256, activation=tf.nn.tanh, kernel_initializer=initializer, bias_initializer='zeros'), keraslayers.Dense(128, activation=tf.nn.tanh, kernel_initializer=initializer, bias_initializer='zeros'), keraslayers.Dense(100, kernel_initializer=initializer, bias_initializer='zeros') ]) return model
def __init__(self, output_dimension, num_mixtures, **kwargs): self.output_dim = output_dimension self.num_mix = num_mixtures with tf.name_scope('MDN'): self.mdn_mus = layers.Dense(self.num_mix * self.output_dim, name='mdn_mus') # mix*output vals, no activation self.mdn_sigmas = layers.Dense(self.num_mix * self.output_dim, activation=elu_plus_one_plus_epsilon, name='mdn_sigmas') # mix*output vals exp activation self.mdn_pi = layers.Dense(self.num_mix, name='mdn_pi') # mix vals, logits super(MDN, self).__init__(**kwargs)
def discriminator_network(x, data_dim, base_n_count): x = layers.Dense(base_n_count * 4, activation='relu')(x) # x = layers.Dropout(0.1)(x) x = layers.Dense(base_n_count * 2, activation='relu')(x) # x = layers.Dropout(0.1)(x) x = layers.Dense(base_n_count, activation='relu')(x) x = layers.Dense(1, activation='sigmoid')(x) # x = layers.Dense(1)(x) return x
def generator_network_w_label(x, labels, data_dim, label_dim, base_n_count): x = layers.concatenate([x, labels]) x = layers.Dense(base_n_count * 1, activation='relu')(x) # 1 x = layers.Dense(base_n_count * 2, activation='relu')(x) # 2 x = layers.Dense(base_n_count * 4, activation='relu')(x) # x = layers.Dense(base_n_count*4, activation='relu')(x) # extra # x = layers.Dense(base_n_count*4, activation='relu')(x) # extra x = layers.Dense(data_dim)(x) x = layers.concatenate([x, labels]) return x
def critic_network(x, data_dim, base_n_count): x = layers.Dense(base_n_count * 4, activation='relu')(x) # x = layers.Dropout(0.1)(x) x = layers.Dense(base_n_count * 2, activation='relu')(x) # 2 # x = layers.Dropout(0.1)(x) x = layers.Dense(base_n_count * 1, activation='relu')(x) # 1 # x = layers.Dense(base_n_count*4, activation='relu')(x) # extra # x = layers.Dense(base_n_count*4, activation='relu')(x) # extra # x = layers.Dense(1, activation='sigmoid')(x) x = layers.Dense(1)(x) return x
def rnn_model(params, training_dr_lstm=True, training_dr_ll=True): """RNN model for text.""" input_shape = (params['fix_len']) seq_input = layers.Input(shape=input_shape) # vocab+1 because of padding seq_emb = layers.Embedding(params['vocab_size'] + 1, params['emb_size'], input_length=params['fix_len'])(seq_input) lstm_out = layers.LSTM(params['hidden_lstm_size'], dropout=params['dropout_rate_lstm'])( seq_emb, training=training_dr_lstm) out = layers.Dropout(rate=params['dropout_rate'], seed=params['random_seed'])(lstm_out, training=training_dr_ll) if params['variational']: # scale kl loss by number of training examples. # larger training dataset depends less on prior def scaled_kl_fn(p, q, _): return tfp.distributions.kl_divergence(q, p) / params['n_train'] logits = tfpl.DenseReparameterization( params['n_class_in'], activation=None, kernel_divergence_fn=scaled_kl_fn, bias_posterior_fn=tfpl.util.default_mean_field_normal_fn(), name='last_layer')(out) else: logits = layers.Dense( params['n_class_in'], activation=None, kernel_regularizer=regularizers.l2(params['reg_weight']), bias_regularizer=regularizers.l2(params['reg_weight']), name='last_layer')(out) probs = layers.Softmax(axis=1)(logits) return models.Model(seq_input, probs, name='rnn')
def testConfigurableDenseFunctionality(self): out = ops.ConfigurableDense(units=5)(self.inputs) expected = keras_layers.Dense(units=5)(self.inputs) self.assertAllEqual(out.shape, expected.shape) self.assertIn( 'configurable_dense/Tensordot/MatMul', [op.name for op in tf.get_default_graph().get_operations()])
def __init__(self, numbers_of_units, hidden_activation=None, out_activation=None): """Constructor. Args: numbers_of_units: (list of int) number of hidden units for every layer (including the output layer) hidden_activation: activation function to apply after each hidden layer, ignored if there are no hidden layers. out_activation: activation function to apply at the output layer. """ num_layers = len(numbers_of_units) assert num_layers >= 1 activations = [hidden_activation] * (num_layers - 1) + [out_activation] network = models.Sequential([ layers.Dense(units, activation=activation) for units, activation in zip(numbers_of_units, activations) ]) super(FCLComponent, self).__init__('%sFCL' % num_layers, network, [numbers_of_units[-1]])
def PersonalizedAttentivePooling(dim1, dim2, dim3, seed=0): """Soft alignment attention implement. Attributes: dim1 (int): first dimention of value shape. dim2 (int): second dimention of value shape. dim3 (int): shape of query Returns: object: weighted summary of inputs value. """ vecs_input = keras.Input(shape=(dim1, dim2), dtype="float32") query_input = keras.Input(shape=(dim3, ), dtype="float32") user_vecs = layers.Dropout(0.2)(vecs_input) user_att = layers.Dense( dim3, activation="tanh", kernel_initializer=keras.initializers.glorot_uniform(seed=seed), bias_initializer=keras.initializers.Zeros(), )(user_vecs) user_att2 = layers.Dot(axes=-1)([query_input, user_att]) user_att2 = layers.Activation("softmax")(user_att2) user_vec = layers.Dot((1, 1))([user_vecs, user_att2]) model = keras.Model([vecs_input, query_input], user_vec) return model
def get_model(cfg, encoder_inputs, encoder_outputs): decoder_inputs = layers.Input(shape=(None, ), name='Decoder-Input') # for teacher forcing dec_emb = layers.Embedding(cfg.num_input_tokens, cfg.latent_dim, name='Decoder-Embedding', mask_zero=False)(decoder_inputs) dec_bn = layers.BatchNormalization(name='Decoder-Batchnorm-1')(dec_emb) decoder_gru = layers.GRU(cfg.latent_dim, return_state=True, return_sequences=True, name='Decoder-GRU') decoder_gru_output, _ = decoder_gru(dec_bn, initial_state=encoder_outputs) x = layers.BatchNormalization( name='Decoder-Batchnorm-2')(decoder_gru_output) decoder_dense = layers.Dense(cfg.num_output_tokens, activation='softmax', name='Final-Output-Dense') decoder_outputs = decoder_dense(x) model = models.Model([encoder_inputs, decoder_inputs], decoder_outputs) return model
def get_simple_rnn_model(event_dim, is_Training, temperature=1): # input_shape: (None, : different sequence lengths (per batch; every sequence in one batch does have the same dimension) # EVENT_DIM) : dimensionality of one event layer_one_args = { 'units': 128, 'input_shape': (None, event_dim), 'return_sequences': True, 'dropout': 0.5, 'recurrent_dropout': 0.5, } layer_two_args = { 'units': 128, 'return_sequences': True, 'dropout': 0.5, 'recurrent_dropout': 0.5, } # for generating if not is_Training: # we predict one by one event layer_one_args['input_shape'] = (1, event_dim) layer_one_args['batch_input_shape'] = (1, 1, event_dim) layer_one_args['stateful'] = True layer_two_args['stateful'] = True model = keras.Sequential() model.add(layers.LSTM(**layer_one_args)) # second LSTM layer model.add(layers.LSTM(**layer_two_args)) model.add(layers.Lambda(lambda x: x / temperature)) model.add(layers.Dense(units=event_dim, activation='softmax')) return model
def _build_userencoder(self, titleencoder, type="ini"): """The main function to create user encoder of LSTUR. Args: titleencoder (object): the news encoder of LSTUR. Return: object: the user encoder of LSTUR. """ hparams = self.hparams his_input_title = keras.Input( shape=(hparams.his_size, hparams.title_size), dtype="int32" ) user_indexes = keras.Input(shape=(1,), dtype="int32") user_embedding_layer = layers.Embedding( len(self.train_iterator.uid2index), hparams.gru_unit, trainable=True, embeddings_initializer="zeros", ) long_u_emb = layers.Reshape((hparams.gru_unit,))( user_embedding_layer(user_indexes) ) click_title_presents = layers.TimeDistributed(titleencoder)(his_input_title) if type == "ini": user_present = layers.GRU( hparams.gru_unit, kernel_initializer=keras.initializers.glorot_uniform(seed=self.seed), recurrent_initializer=keras.initializers.glorot_uniform(seed=self.seed), bias_initializer=keras.initializers.Zeros(), )( layers.Masking(mask_value=0.0)(click_title_presents), initial_state=[long_u_emb], ) elif type == "con": short_uemb = layers.GRU( hparams.gru_unit, kernel_initializer=keras.initializers.glorot_uniform(seed=self.seed), recurrent_initializer=keras.initializers.glorot_uniform(seed=self.seed), bias_initializer=keras.initializers.Zeros(), )(layers.Masking(mask_value=0.0)(click_title_presents)) user_present = layers.Concatenate()([short_uemb, long_u_emb]) user_present = layers.Dense( hparams.gru_unit, bias_initializer=keras.initializers.Zeros(), kernel_initializer=keras.initializers.glorot_uniform(seed=self.seed), )(user_present) model = keras.Model( [his_input_title, user_indexes], user_present, name="user_encoder" ) return model
def __init__(self, obs_size, n_actions, model=None, use_target_network=False, learning_rate=1e-3, reward_discount=0.99): self.n_actions = n_actions self.lr = learning_rate self.rd = reward_discount # Init model if model is None: model = models.Sequential() model.add(layers.Dense(20, input_shape=obs_size, activation='elu')) model.add(layers.Dense(10, activation='elu')) model.add(layers.Dense(n_actions)) model.summary() self.model = model self.use_target_network = use_target_network self.state_ph = tf.placeholder(tf.float32, shape=(None, *obs_size)) self.actions_ph = tf.placeholder(tf.int32, shape=(None)) self.rewards_ph = tf.placeholder(tf.float32, shape=(None)) self.next_states_ph = tf.placeholder(tf.float32, shape=(None, *obs_size)) self.is_done_ph = tf.placeholder(tf.float32, shape=(None)) self.trainable_variables = self.model.trainable_variables if use_target_network: self.target_network = tf.keras.models.clone_model(self.model) self.loss = self.lossf(self.state_ph, self.actions_ph, self.rewards_ph, self.next_states_ph, self.is_done_ph) self.set_learning_rate(self.lr) self.update_target_network()
def stp_transformation(current_image, stp_input, num_masks): """Apply spatial transformer predictor (STP) to previous image. Args: current_image: previous image to be transformed. stp_input: hidden layer to be used for computing STN parameters. num_masks: number of masks and hence the number of STP transformations. Returns: List of images transformed by the predicted STP parameters. """ identity_params = tf.convert_to_tensor( np.array([1.0, 0.0, 0.0, 0.0, 1.0, 0.0], np.float32)) transformed = [] for i in range(num_masks - 1): params = Layers.Dense(6)(stp_input) + identity_params transformed.append(spatial_transformer(current_image, params)) return transformed
def cdna_transformation(current_image, cdna_input, num_masks=10, color_channels=3, dna_kernel_size=5, relu_shift=1e-12): """Apply convolutional dynamic neural advection to previous image. Args: current_image: previous image to be transformed. cdna_input: hidden lyaer to be used for computing CDNA kernels. num_masks: the number of masks and hence the number of CDNA transformations. color_channels: the number of color channels in the images. Returns: List of images transformed by the predicted CDNA kernels. """ batch_size = int(cdna_input.get_shape()[0]) height = int(current_image.get_shape()[1]) width = int(current_image.get_shape()[2]) # Predict kernels using linear function of last hidden layer. cdna_kerns = Layers.Dense(dna_kernel_size * dna_kernel_size * num_masks)(cdna_input) # Reshape and normalize. cdna_kerns = tf.reshape( cdna_kerns, [batch_size, dna_kernel_size, dna_kernel_size, 1, num_masks]) cdna_kerns = tf.nn.relu(cdna_kerns - relu_shift) + relu_shift norm_factor = tf.reduce_sum(cdna_kerns, [1, 2, 3], keepdims=True) cdna_kerns /= norm_factor # Treat the color channel dimension as the batch dimension since the same # transformation is applied to each color channel. # Treat the batch dimension as the channel dimension so that # depthwise_conv2d can apply a different transformation to each sample. cdna_kerns = tf.transpose(cdna_kerns, [1, 2, 0, 4, 3]) cdna_kerns = tf.reshape(cdna_kerns, [dna_kernel_size, dna_kernel_size, batch_size, num_masks]) # Swap the batch and channel dimensions. current_image = tf.transpose(current_image, [3, 1, 2, 0]) # Transform image. transformed = tf.nn.depthwise_conv2d(current_image, cdna_kerns, [1, 1, 1, 1], 'SAME') # Transpose the dimensions to where they belong. transformed = tf.reshape(transformed, [color_channels, height, width, batch_size, num_masks]) transformed = tf.transpose(transformed, [3, 1, 2, 0, 4]) transformed = tf.unstack(transformed, axis=-1) return transformed
def build_model( n_classes: int, n_packet_features: int, n_meta_features: int = 7, dilations: bool = True, tag: str = "varcnn", ): """Build the Var-CNN model. The resulting model takes a single input of shape (n_samples, n_packet_features + n_meta_features). The meta features must be the rightmost (last) features in the matrix. The model handles separating the two types of features and reshaping them as necessary. Parameters: ----------- n_classes : The number of classes to be predicted. n_packet_features : The number of packet features such as the number of interarrival times or the number of packet directions or sizes. n_meta_features: The number of meta features such as total packet counts, total transmission duration, etc. """ use_metadata = n_meta_features > 0 # Constructs dir or time ResNet input_layer = keras.Input( shape=(n_packet_features + n_meta_features, ), name="input") layer = (Crop(end=n_packet_features)(input_layer) if use_metadata else input_layer) layer = layers.Reshape((n_packet_features, 1))(layer) output_layer = ResNet18( layer, tag, block=(dilated_basic_1d if dilations else basic_1d)) concat_params = [output_layer] combined = concat_params[0] # Construct MLP for metadata if use_metadata: metadata_output = Crop(start=-n_meta_features)(input_layer) # consider this the embedding of all the metadata metadata_output = layers.Dense(32)(metadata_output) metadata_output = layers.BatchNormalization()( metadata_output) metadata_output = layers.Activation('relu')(metadata_output) concat_params.append(metadata_output) combined = layers.Concatenate()(concat_params) # Better to have final fc layer if combining multiple models if len(concat_params) > 1: combined = layers.Dense(1024)(combined) combined = layers.BatchNormalization()(combined) combined = layers.Activation('relu')(combined) combined = layers.Dropout(0.5)(combined) model_output = layers.Dense(units=n_classes, activation='softmax', name='model_output')(combined) model = keras.Model(inputs=input_layer, outputs=model_output) model.compile( loss='categorical_crossentropy', metrics=['accuracy'], optimizer=keras.optimizers.Adam(0.001)) return model
def generator_network(x, data_dim, base_n_count): x = layers.Dense(base_n_count, activation='relu')(x) x = layers.Dense(base_n_count * 2, activation='relu')(x) x = layers.Dense(base_n_count * 4, activation='relu')(x) x = layers.Dense(data_dim)(x) return x
def ResNet50(method, num_classes, num_updates, dropout_rate): """Instantiates the ResNet50 architecture. Args: method: `str`, method for accounting for uncertainty. Must be one of ['vanilla', 'll_dropout', 'll_svi', 'dropout', 'svi', 'dropout_nofirst'] num_classes: `int` number of classes for image classification. num_updates: integer, total steps in an epoch (for weighting the loss) dropout_rate: Dropout rate for ll_dropout, dropout methods. Returns: A Keras model instance. pylint: disable=invalid-name """ # Determine proper input shape if backend.image_data_format() == 'channels_first': input_shape = (3, 224, 224) bn_axis = 1 else: input_shape = (224, 224, 3) bn_axis = 3 if (method in ['dropout', 'll_dropout', 'dropout_nofirst' ]) != (dropout_rate > 0.): raise ValueError( 'Dropout rate should be nonzero iff a dropout method is used.' 'Method is {}, dropout is {}.'.format(method, dropout_rate)) use_variational_layers = method == 'svi' hidden_layer_dropout = dropout_rate if method in [ 'dropout', 'dropout_nofirst' ] else 0. img_input = layers.Input(shape=input_shape) x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(img_input) if (dropout_rate > 0.) and (method != 'dropout_nofirst'): x = layers.Dropout(hidden_layer_dropout)(x, training=True) x = layers.Conv2D(64, (7, 7), use_bias=False, strides=(2, 2), padding='valid', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name='conv1')(x) x = layers.BatchNormalization(axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, name='bn_conv1')(x) x = layers.Activation('relu')(x) x = layers.ZeroPadding2D(padding=(1, 1), name='pool1_pad')(x) x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x) conv_block = functools.partial( conv_block_base, num_updates=num_updates, dropout_rate=hidden_layer_dropout, use_variational_layers=use_variational_layers) identity_block = functools.partial( identity_block_base, num_updates=num_updates, dropout_rate=hidden_layer_dropout, use_variational_layers=use_variational_layers) x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') x = identity_block(x, 3, [128, 128, 512], stage=3, block='b') x = identity_block(x, 3, [128, 128, 512], stage=3, block='c') x = identity_block(x, 3, [128, 128, 512], stage=3, block='d') x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f') x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') x = layers.GlobalAveragePooling2D(name='avg_pool')(x) if dropout_rate > 0.: x = layers.Dropout(dropout_rate)(x, training=True) if method in ['ll_svi', 'svi']: x = tfpl.dense_variational_v2.DenseVariational( units=num_classes, make_posterior_fn=posterior_mean_field, make_prior_fn=functools.partial(prior_trainable, num_updates=num_updates), use_bias=True, kl_weight=1. / num_updates, kl_use_exact=True, name='fc1000')(x) else: x = layers.Dense(num_classes, kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name='fc1000')(x) # Create model. return models.Model(img_input, x, name='resnet50')
if USE_AMP: tf.keras.mixed_precision.experimental.set_policy('infer_float32_vars') in_id = layers.Input(shape=(MAX_SEQ_LEN, ), name="input_ids") in_mask = layers.Input(shape=(MAX_SEQ_LEN, ), name="input_masks") in_segment = layers.Input(shape=(MAX_SEQ_LEN, ), name="segment_ids") in_bert = [in_id, in_mask, in_segment] l_bert = bert_utils.BERT(fine_tune_layers=TUNE_LAYERS, bert_path=BERT_PATH, return_sequence=False, output_size=H_SIZE, debug=False)(in_bert) out_pred = layers.Dense(num_classes, activation="softmax")(l_bert) model = tf.keras.models.Model(inputs=in_bert, outputs=out_pred) # In[10]: opt = tf.keras.optimizers.Adam(lr=LEARNING_RATE) if USE_AMP: opt = tf.keras.mixed_precision.experimental.LossScaleOptimizer( opt, "dynamic") # In[11]: model.compile(loss="sparse_categorical_crossentropy", optimizer=opt,
def __init__(self, word_embedding, data, use_cudnn_lstm=False, plot_model_architecture=True): self.hidden_units = 300 self.embed_model = word_embedding self.input_dim = word_embedding.embed_dim self.vocab_size = data.vocab_size self.left = data.premise self.right = data.hypothesis self.max_len = data.max_len self.dense_units = 32 self.name = '{}_glove{}_lstm{}_dense{}'.format(str(int(time.time())), self.input_dim, self.hidden_units, self.dense_units) embedding_matrix = np.zeros((self.vocab_size, self.input_dim)) for word, i in data.vocab: embedding_vector = self.embed_model.get_vector(word) if embedding_vector is not None: embedding_matrix[i] = embedding_vector embed = layers.Embedding( input_dim=self.vocab_size, output_dim=self.input_dim, embeddings_initializer=Constant(embedding_matrix), input_length=self.max_len, mask_zero=True, trainable=False) #embed.trainable=False if use_cudnn_lstm: lstm = layers.CuDNNLSTM(self.hidden_units, input_shape=(None, self.input_dim), unit_forget_bias=True, kernel_initializer='he_normal', kernel_regularizer='l2', name='lstm_layer') else: lstm = layers.LSTM(self.hidden_units, input_shape=(None, self.input_dim), unit_forget_bias=True, activation='relu', kernel_initializer='he_normal', kernel_regularizer='l2', name='lstm_layer') left_input = Input(shape=(self.max_len), name='input_1') right_input = Input(shape=(self.max_len), name='input_2') embed_left = embed(left_input) embed_right = embed(right_input) print('embed:', embed_right.shape) left_output = lstm(embed_left) right_output = lstm(embed_right) print('lstm:', right_output.shape) l1_norm = lambda x: 1 - K.abs(x[0] - x[1]) merged = layers.Lambda(function=l1_norm, output_shape=lambda x: x[0], name='L1_distance')([left_output, right_output]) #merged = layers.concatenate([left_output, right_output]) #lstm_2 = layers.LSTM(hidden_units, unit_forget_bias=True, # activation = 'relu', kernel_regularizer='l2', name='lstm_layer2' )(merged) print('merged:', merged.shape) dense_1 = layers.Dense(self.dense_units, activation='relu')(merged) print('dense1:', dense_1.shape) output = layers.Dense(3, activation='softmax', name='output_layer')(dense_1) print('output:', output.shape) self.model = Model(inputs=[left_input, right_input], outputs=output) self.compile()
def __init__(self, Model, num_mixtures=1): """Gaussian_MIxtures Initializer. Turns a neural network into an GMN. Args: Model: Input Keras Model. num_mixtures: how many total gaussians would you like to fit the output space to. Returns: Nothing lol """ self.model = Model() layer = self.model.layers[-1] self.output_dim = layer.units layer.output_dim = layer.units self.num_mix = num_mixtures layer.num_mix = num_mixtures with tf.name_scope('MDN'): layer.mdn_mus = layers.Dense( layer.num_mix * layer.output_dim, name='mdn_mus') # mix*output vals, no activation layer.mdn_sigmas = layers.Dense( self.num_mix * self.output_dim, activation=self.elu_plus_one_plus_epsilon, name='mdn_sigmas') # mix*output vals exp activation layer.mdn_pi = layers.Dense(self.num_mix, name='mdn_pi') # mix vals, logits def build(self, input_shape): with tf.name_scope('mus'): self.mdn_mus.build(input_shape) with tf.name_scope('sigmas'): self.mdn_sigmas.build(input_shape) with tf.name_scope('pis'): self.mdn_pi.build(input_shape) def call_func(self, x): with tf.name_scope('MDN'): mdn_out = layers.concatenate( [self.mdn_mus(x), self.mdn_sigmas(x), self.mdn_pi(x)], name='mdn_outputs') return mdn_out def compute_output_shape(self, input_shape): """Returns output shape, showing the number of mixture parameters.""" return (input_shape[0], (2 * self.output_dim * self.num_mix) + self.num_mix) def get_config(self): config = { "output_dimension": self.output_dim, "num_mixtures": self.num_mix } base_config = super(Dense, self).get_config() return dict(list(base_config.items()) + list(config.items())) layer.build = types.MethodType(build, layer) layer.call = types.MethodType(call_func, layer) layer._trainable_weights = layer.mdn_mus.trainable_weights + layer.mdn_sigmas.trainable_weights + layer.mdn_pi.trainable_weights layer._non_trainable_weights = layer.mdn_mus.non_trainable_weights + layer.mdn_sigmas.non_trainable_weights + layer.mdn_pi.non_trainable_weights layer.compute_output_shape = types.MethodType(compute_output_shape, layer) layer.get_config = types.MethodType(get_config, layer)
def build_model(n_features: int, n_classes: int): """Create and return the DeepFingerprinting Model.""" model = keras.Sequential() # Block1 filter_num = ['None', 32, 64, 128, 256] kernel_size = ['None', 8, 8, 8, 8] conv_stride_size = ['None', 1, 1, 1, 1] pool_stride_size = ['None', 4, 4, 4, 4] pool_size = ['None', 8, 8, 8, 8] model.add(layers.Reshape((n_features, 1), input_shape=(n_features, ))) model.add( layers.Conv1D(filters=filter_num[1], kernel_size=kernel_size[1], strides=conv_stride_size[1], padding='same', name='block1_conv1')) model.add(layers.BatchNormalization(axis=-1)) model.add(layers.ELU(alpha=1.0, name='block1_adv_act1')) model.add( layers.Conv1D(filters=filter_num[1], kernel_size=kernel_size[1], strides=conv_stride_size[1], padding='same', name='block1_conv2')) model.add(layers.BatchNormalization(axis=-1)) model.add(layers.ELU(alpha=1.0, name='block1_adv_act2')) model.add( layers.MaxPooling1D(pool_size=pool_size[1], strides=pool_stride_size[1], padding='same', name='block1_pool')) model.add(layers.Dropout(0.1, name='block1_dropout')) model.add( layers.Conv1D(filters=filter_num[2], kernel_size=kernel_size[2], strides=conv_stride_size[2], padding='same', name='block2_conv1')) model.add(layers.BatchNormalization()) model.add(layers.Activation('relu', name='block2_act1')) model.add( layers.Conv1D(filters=filter_num[2], kernel_size=kernel_size[2], strides=conv_stride_size[2], padding='same', name='block2_conv2')) model.add(layers.BatchNormalization()) model.add(layers.Activation('relu', name='block2_act2')) model.add( layers.MaxPooling1D(pool_size=pool_size[2], strides=pool_stride_size[3], padding='same', name='block2_pool')) model.add(layers.Dropout(0.1, name='block2_dropout')) model.add( layers.Conv1D(filters=filter_num[3], kernel_size=kernel_size[3], strides=conv_stride_size[3], padding='same', name='block3_conv1')) model.add(layers.BatchNormalization()) model.add(layers.Activation('relu', name='block3_act1')) model.add( layers.Conv1D(filters=filter_num[3], kernel_size=kernel_size[3], strides=conv_stride_size[3], padding='same', name='block3_conv2')) model.add(layers.BatchNormalization()) model.add(layers.Activation('relu', name='block3_act2')) model.add( layers.MaxPooling1D(pool_size=pool_size[3], strides=pool_stride_size[3], padding='same', name='block3_pool')) model.add(layers.Dropout(0.1, name='block3_dropout')) model.add( layers.Conv1D(filters=filter_num[4], kernel_size=kernel_size[4], strides=conv_stride_size[4], padding='same', name='block4_conv1')) model.add(layers.BatchNormalization()) model.add(layers.Activation('relu', name='block4_act1')) model.add( layers.Conv1D(filters=filter_num[4], kernel_size=kernel_size[4], strides=conv_stride_size[4], padding='same', name='block4_conv2')) model.add(layers.BatchNormalization()) model.add(layers.Activation('relu', name='block4_act2')) model.add( layers.MaxPooling1D(pool_size=pool_size[4], strides=pool_stride_size[4], padding='same', name='block4_pool')) model.add(layers.Dropout(0.1, name='block4_dropout')) model.add(layers.Flatten(name='flatten')) model.add( layers.Dense(512, kernel_initializer=initializers.glorot_uniform(seed=0), name='fc1')) model.add(layers.BatchNormalization()) model.add(layers.Activation('relu', name='fc1_act')) model.add(layers.Dropout(0.7, name='fc1_dropout')) model.add( layers.Dense(512, kernel_initializer=initializers.glorot_uniform(seed=0), name='fc2')) model.add(layers.BatchNormalization()) model.add(layers.Activation('relu', name='fc2_act')) model.add(layers.Dropout(0.5, name='fc2_dropout')) model.add( layers.Dense(n_classes, kernel_initializer=initializers.glorot_uniform(seed=0), name='fc3')) model.add(layers.Activation('softmax', name="softmax")) model.compile(loss="categorical_crossentropy", optimizer=keras.optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0), metrics=["accuracy"]) return model
#testing = testing.prefetch(tf.data.experimental.AUTOTUNE) model = tf.compat.v1.keras.Sequential() model.add( layers.Conv2D(filters=64, kernel_size=4, strides=2, padding='valid', use_bias=True, input_shape=(32, 32, 3))) model.add(layers.BatchNormalization()) model.add(layers.Activation(tf.nn.leaky_relu)) model.add(layers.Conv2D(128, 4, 2, 'valid', use_bias=True)) model.add(layers.BatchNormalization()) model.add(layers.Activation(tf.nn.leaky_relu)) model.add(layers.Conv2D(256, 1, 1, 'valid', use_bias=True)) model.add(layers.BatchNormalization()) model.add(layers.Conv2D(256, 1, 1, 'valid', use_bias=True)) model.add(layers.Flatten()) model.add(layers.Dense(100, activation='softmax')) model.compile(optimizer='Adam', loss=tf.compat.v1.keras.losses.SparseCategoricalCrossentropy(), metrics=['accuracy']) print(model) print(model.summary()) model.fit(xtrain, ytrain, batch_size=64, epochs=50, verbose=1, shuffle=True) test_loss, test_accuracy = model.evaluate(xtest, ytest, verbose=1) print(test_accuracy) print(test_loss)
def call(self, inputs, mode="train"): """ Forward function for pixel advection network Parameters: inputs: input dictionary including "image", "r_state" and "action" mode: specify training or validating/testing Return: gen_images: list of generated images gen_states: list of generated states """ ##### preparations ##### # get dimensions/global steps global_step = tf.cast(v1.train.get_or_create_global_step(), tf.float32) isTrain = True if mode == "train" else False batch_size, image_height, image_width, color_ch = inputs["image"][ 0].get_shape().as_list()[0:4] state_dim = inputs["r_state"][0].get_shape().as_list()[1] # placeholder for generated robot states and images gen_states, gen_images = [], [] # initial r state will use ground truth current_r_state = inputs["r_state"][0] # placeholder for conv-lstm states lstm_state1, lstm_state2, lstm_state3, lstm_state4 = None, None, None, None lstm_state5, lstm_state6, lstm_state7 = None, None, None # get number of ground truth images used for each mini-batch num_ground_truth = tf.cast( tf.round( tf.cast(batch_size, tf.float32) * (self.k / (self.k + tf.exp(global_step / self.k)))), tf.int32) ###### begin time-step loop (total_len - 1 steps) ###### for image, action in zip(inputs["image"][:-1], inputs["action"][:-1]): ##### sampling and updating values ##### # reuse parameters after the first step reuse = None if not bool(gen_images) else True # warm start(use ground truth frames) in first context_len steps done_warm_start = len(gen_images) > self.context_len - 1 # if using context frames (during warm start), always use ground truth input # else, if not explicitly specified by "use_predict_frame", choose to use generated image # or ground truth input based on sampling function if self.use_predict_frame and done_warm_start: current_image = gen_images[-1] elif done_warm_start: current_image = self.scheduled_sample(image, gen_images[-1], batch_size, num_ground_truth) else: current_image = image # concat state and action, always use ground truth action, but use current state current_state_action = tf.concat([action, current_r_state], axis=1) ##### begin U-net ##### # 1th conv with v1.variable_scope("conv1", reuse=reuse): enc0 = Layers.Conv2D(self.layer_ch_specs[0], kernel_size=(5, 5), strides=(2, 2), padding="same")(current_image) enc0 = Layers.LayerNormalization()(enc0) # 1th conv lstm with v1.variable_scope("conv_lstm1", reuse=reuse): hidden1, lstm_state1 = basic_conv_lstm_cell( enc0, lstm_state1, self.layer_ch_specs[1]) hidden1 = Layers.LayerNormalization()(hidden1) # 2th conv lstm with v1.variable_scope("conv_lstm2", reuse=reuse): hidden2, lstm_state2 = basic_conv_lstm_cell( hidden1, lstm_state2, self.layer_ch_specs[2]) hidden2 = Layers.LayerNormalization()(hidden2) enc1 = Layers.Conv2D(self.layer_ch_specs[2], kernel_size=(3, 3), strides=(2, 2), padding="same")\ (hidden2) # 3th conv lstm with v1.variable_scope("conv_lstm3", reuse=reuse): hidden3, lstm_state3 = basic_conv_lstm_cell( enc1, lstm_state3, self.layer_ch_specs[3]) hidden3 = Layers.LayerNormalization()(hidden3) # 4th conv lstm with v1.variable_scope("conv_lstm4", reuse=reuse): hidden4, lstm_state4 = basic_conv_lstm_cell( hidden3, lstm_state4, self.layer_ch_specs[4]) hidden4 = Layers.LayerNormalization()(hidden4) enc2 = Layers.Conv2D(self.layer_ch_specs[4], kernel_size=(3, 3), strides=(2, 2), padding="same")\ (hidden4) # Pass in state and action. smear = tf.reshape(current_state_action, [batch_size, 1, 1, state_dim * 2]) smear = tf.tile( smear, [1, int(enc2.get_shape()[1]), int(enc2.get_shape()[2]), 1]) if self.use_state: enc2 = tf.concat(axis=3, values=[enc2, smear]) enc3 = Layers.Conv2D(self.layer_ch_specs[4], kernel_size=(1, 1), strides=(1, 1), padding="same")\ (enc2) # 5th conv lstm with v1.variable_scope("conv_lstm5", reuse=reuse): hidden5, lstm_state5 = basic_conv_lstm_cell( enc3, lstm_state5, self.layer_ch_specs[5]) hidden5 = Layers.LayerNormalization()(hidden5) enc4 = Layers.Conv2DTranspose(self.layer_ch_specs[5], kernel_size=(3, 3), strides=(2, 2), padding="same")\ (hidden5) # 6th conv lstm with v1.variable_scope("conv_lstm6", reuse=reuse): hidden6, lstm_state6 = basic_conv_lstm_cell( enc4, lstm_state6, self.layer_ch_specs[6]) hidden6 = Layers.LayerNormalization()(hidden6) # Skip connection. hidden6 = tf.concat(axis=3, values=[hidden6, enc1]) # both 16x16 enc5 = Layers.Conv2DTranspose(self.layer_ch_specs[6], kernel_size=(3, 3), strides=(2, 2), padding="same")\ (hidden6) # 7th conv lstm with v1.variable_scope("conv_lstm7", reuse=reuse): hidden7, lstm_state7 = basic_conv_lstm_cell( enc5, lstm_state7, self.layer_ch_specs[7]) # 32x32 hidden7 = Layers.LayerNormalization()(hidden7) # Skip connection. hidden7 = tf.concat(axis=3, values=[hidden7, enc0]) # both 32x32 enc6 = Layers.Conv2DTranspose(self.layer_ch_specs[7], kernel_size=(3, 3), strides=(2, 2), padding="same")\ (hidden7) enc6 = Layers.LayerNormalization()(enc6) ###### motion transform part ##### # dna if self.dna: from video_prediction.models.building_blocks import dna_transformation with v1.variable_scope("dna", reuse=reuse): if self.num_mask != 1: raise ValueError( 'Only one mask is supported for DNA model.') dna_input = Layers.Conv2DTranspose(self.dna_kernel_size ** 2, kernel_size=(1, 1), strides=(1, 1),\ padding="same")(enc6) transformed = [dna_transformation(current_image, dna_input, dna_kernel_size=self.dna_kernel_size, \ relu_shift=self.relu_shift)] # cdna elif self.cdna: from video_prediction.models.building_blocks import cdna_transformation with v1.variable_scope("cdna", reuse=reuse): last_hidden_input = Layers.Conv2DTranspose(color_ch, kernel_size=(1, 1), strides=(1, 1),\ padding="same")(enc6) transformed = [ keras.activations.sigmoid(last_hidden_input) ] cdna_input = tf.reshape(hidden5, [batch_size, -1]) transformed += cdna_transformation(current_image, cdna_input, num_masks=self.num_mask, \ color_channels=color_ch, dna_kernel_size=self.dna_kernel_size, relu_shift=self.relu_shift) # stp elif self.stp: assert (0) from video_prediction.models.building_blocks import stp_transformation with v1.variable_scope("stp", reuse=reuse): last_hidden_input = Layers.Conv2DTranspose(color_ch, kernel_size=(1, 1), strides=(1, 1),\ padding="same")(enc6) transformed = [ keras.activations.sigmoid(last_hidden_input) ] stp_input = tf.reshape(hidden5, [batch_size, -1]) stp_input = Layers.Dense(100)(stp_input) transformed += stp_transformation(current_image, stp_input, self.num_mask) # compute mask with v1.variable_scope("mask", reuse=reuse): mask = Layers.Conv2DTranspose(self.num_mask + 1, kernel_size=(1, 1), strides=(1, 1), padding="same")\ (enc6) mask = tf.reshape( tf.nn.softmax(tf.reshape(mask, [-1, self.num_mask + 1])), [batch_size, image_height, image_width, self.num_mask + 1]) #layers.append(("softmax_mask", mask)) mask_list = tf.split(axis=3, num_or_size_splits=self.num_mask + 1, value=mask) # mask output # first mask applies to current_image new_gen_image = mask_list[0] * current_image for layer, mask in zip(transformed, mask_list[1:]): new_gen_image += layer * mask gen_images.append(new_gen_image) ###### compute new r state ##### new_gen_r_state = Layers.Dense(state_dim)(current_state_action) gen_states.append(new_gen_r_state) # update current state current_r_state = new_gen_r_state return gen_images, gen_states
def stack_layers(inputs, net_layers, kernel_initializer='glorot_uniform'): """Builds the architecture of the network by applying each layer specified in net_layers to inputs. Args: inputs: a dict containing input_types and input_placeholders for each key and value pair, respecively. net_layers: a list of dicts containing all layers to be used in the network, where each dict describes one such layer. each dict requires the key 'type'. all other keys are dependent on the layer type. kernel_initializer: initialization configuration passed to keras (see keras initializers). Returns: outputs: a dict formatted in much the same way as inputs. it contains input_types and output_tensors for each key and value pair, respectively, where output_tensors are the outputs of the input_placeholders in inputs after each layer in net_layers is applied. """ outputs = dict() for key in inputs: outputs[key] = inputs[key] for layer in net_layers: # check for l2_reg argument l2_reg = layer.get('l2_reg') if l2_reg: l2_reg = l2(layer['l2_reg']) # create the layer if layer['type'] in [ 'softplus', 'softsign', 'softmax', 'tanh', 'sigmoid', 'relu', 'selu' ]: l = layers.Dense( layer['size'], activation=layer['type'], kernel_initializer=kernel_initializer, kernel_regularizer=l2_reg, name=layer.get('name')) elif layer['type'] == 'None': l = layers.Dense( layer['size'], kernel_initializer=kernel_initializer, kernel_regularizer=l2_reg, name=layer.get('name')) elif layer['type'] == 'Conv2D': l = layers.Conv2D( layer['channels'], kernel_size=layer['kernel'], activation='relu', data_format='channels_last', kernel_regularizer=l2_reg, name=layer.get('name')) elif layer['type'] == 'BatchNormalization': l = layers.BatchNormalization(name=layer.get('name')) elif layer['type'] == 'MaxPooling2D': l = layers.MaxPooling2D( pool_size=layer['pool_size'], data_format='channels_first', name=layer.get('name')) elif layer['type'] == 'Dropout': l = layers.Dropout(layer['rate'], name=layer.get('name')) elif layer['type'] == 'Flatten': l = layers.Flatten(name=layer.get('name')) else: raise ValueError("Invalid layer type '{}'".format(layer['type'])) # apply the layer to each input in inputs for k in outputs: outputs[k] = l(outputs[k]) return outputs
def __init__(self, obs_size, action_size, actor_model=None, critic_model=None, use_target_network=False, learning_rate=1e-3, reward_discount=0.99, tau=0.001): self.obs_size = obs_size self.action_size = action_size self.use_target_network = use_target_network self.lr = learning_rate self.rd = reward_discount self.tau = tau # Create models if not provided if actor_model is None: actor_model = models.Sequential() actor_model.add( layers.Dense(16, input_shape=obs_size, activation='relu')) actor_model.add(layers.Dense(16, activation='relu')) actor_model.add(layers.Dense(16, activation='relu')) actor_model.add( layers.Dense(action_size, name='action', activation='tanh')) actor_model.summary() self.actor_model = actor_model if critic_model is None: state_input = layers.Input(shape=obs_size) action_input = layers.Input(shape=action_size) all_input = layers.Concatenate()([state_input, action_input]) h1 = layers.Dense(32, activation='relu')(all_input) h2 = layers.Dense(32, activation='relu')(h1) h3 = layers.Dense(32, activation='relu')(h2) output = layers.Dense(1, name='q-value')(h3) critic_model = models.Model(inputs=[state_input, action_input], outputs=output) critic_model.summary() self.critic_model = critic_model if use_target_network: self.target_network_critic = tf.keras.models.clone_model( self.critic_model) self.target_network_actor = tf.keras.models.clone_model( self.actor_model) self.state_ph = tf.placeholder(tf.float32, shape=(None, *obs_size)) self.actions_ph = tf.placeholder(tf.float32, shape=(None, action_size)) self.rewards_ph = tf.placeholder(tf.float32, shape=(None)) self.next_states_ph = tf.placeholder(tf.float32, shape=(None, *obs_size)) self.is_done_ph = tf.placeholder(tf.float32, shape=(None)) self.loss = self.q_loss(self.state_ph, self.actions_ph, self.rewards_ph, self.next_states_ph, self.is_done_ph) aer = self.action_expected_reward(self.state_ph) self.train_critic_step = tf.train.AdamOptimizer( learning_rate).minimize( self.loss, var_list=self.critic_model.trainable_variables) self.train_actor_step = tf.train.AdamOptimizer( learning_rate / 10).minimize( -aer, var_list=self.actor_model.trainable_variables) sess.run(tf.global_variables_initializer())