コード例 #1
0
def build_cnn_model(n_features: int, n_classes: int):
    """Build the P1FP(C) model using Keras."""
    model = keras.Sequential()
    model.add(layers.Reshape((1, n_features, 1), input_shape=(n_features, ),
              name="input"))

    model.add(layers.Conv2D(
        128, 12, activation="relu", kernel_regularizer="l2", padding="same"))
    model.add(layers.MaxPool2D(10, padding="same"))
    model.add(layers.Lambda(nn.local_response_normalization))

    model.add(layers.Conv2D(
        128, 12, activation="relu", kernel_regularizer="l2", padding="same"))
    model.add(layers.MaxPool2D(10, padding="same"))
    model.add(layers.Lambda(nn.local_response_normalization))

    # It is flattened for the computation regardless, however tflearn retained
    # the flattened result whereas keras does not
    model.add(layers.Flatten())
    model.add(layers.Dense(256, activation="tanh"))
    model.add(layers.Dropout(rate=0.2))
    model.add(layers.Dense(n_classes, activation="softmax", name="target"))

    learning_rate = keras.optimizers.schedules.ExponentialDecay(
        0.05, decay_steps=1000, decay_rate=0.96)
    model.compile(
        optimizer=keras.optimizers.SGD(learning_rate=learning_rate),
        loss="categorical_crossentropy",
        metrics=[keras.metrics.TopKCategoricalAccuracy(3), "accuracy"])

    return model
コード例 #2
0
def classification_dnn(input_features):
    # Creating the initializer
    initializer = tf.compat.v1.keras.initializers.random_normal(0.0, 0.01)
    model = tf.compat.v1.keras.Sequential([
        keraslayers.Dense(1024,
                          activation=tf.nn.tanh,
                          input_shape=(input_features, ),
                          kernel_initializer=initializer,
                          bias_initializer='zeros'),
        keraslayers.Dense(512,
                          activation=tf.nn.tanh,
                          kernel_initializer=initializer,
                          bias_initializer='zeros'),
        keraslayers.Dense(256,
                          activation=tf.nn.tanh,
                          kernel_initializer=initializer,
                          bias_initializer='zeros'),
        keraslayers.Dense(128,
                          activation=tf.nn.tanh,
                          kernel_initializer=initializer,
                          bias_initializer='zeros'),
        keraslayers.Dense(100,
                          kernel_initializer=initializer,
                          bias_initializer='zeros')
    ])
    return model
コード例 #3
0
 def __init__(self, output_dimension, num_mixtures, **kwargs):
     self.output_dim = output_dimension
     self.num_mix = num_mixtures
     with tf.name_scope('MDN'):
         self.mdn_mus = layers.Dense(self.num_mix * self.output_dim, name='mdn_mus')  # mix*output vals, no activation
         self.mdn_sigmas = layers.Dense(self.num_mix * self.output_dim, activation=elu_plus_one_plus_epsilon, name='mdn_sigmas')  # mix*output vals exp activation
         self.mdn_pi = layers.Dense(self.num_mix, name='mdn_pi')  # mix vals, logits
     super(MDN, self).__init__(**kwargs)
コード例 #4
0
ファイル: GAN_171103.py プロジェクト: sheston/tabularGANs
def discriminator_network(x, data_dim, base_n_count):
    x = layers.Dense(base_n_count * 4, activation='relu')(x)
    # x = layers.Dropout(0.1)(x)
    x = layers.Dense(base_n_count * 2, activation='relu')(x)
    # x = layers.Dropout(0.1)(x)
    x = layers.Dense(base_n_count, activation='relu')(x)
    x = layers.Dense(1, activation='sigmoid')(x)
    # x = layers.Dense(1)(x)
    return x
コード例 #5
0
ファイル: GAN_171103.py プロジェクト: sheston/tabularGANs
def generator_network_w_label(x, labels, data_dim, label_dim, base_n_count):
    x = layers.concatenate([x, labels])
    x = layers.Dense(base_n_count * 1, activation='relu')(x)  # 1
    x = layers.Dense(base_n_count * 2, activation='relu')(x)  # 2
    x = layers.Dense(base_n_count * 4, activation='relu')(x)
    # x = layers.Dense(base_n_count*4, activation='relu')(x) # extra
    # x = layers.Dense(base_n_count*4, activation='relu')(x) # extra
    x = layers.Dense(data_dim)(x)
    x = layers.concatenate([x, labels])
    return x
コード例 #6
0
ファイル: GAN_171103.py プロジェクト: sheston/tabularGANs
def critic_network(x, data_dim, base_n_count):
    x = layers.Dense(base_n_count * 4, activation='relu')(x)
    # x = layers.Dropout(0.1)(x)
    x = layers.Dense(base_n_count * 2, activation='relu')(x)  # 2
    # x = layers.Dropout(0.1)(x)
    x = layers.Dense(base_n_count * 1, activation='relu')(x)  # 1
    # x = layers.Dense(base_n_count*4, activation='relu')(x) # extra
    # x = layers.Dense(base_n_count*4, activation='relu')(x) # extra
    # x = layers.Dense(1, activation='sigmoid')(x)
    x = layers.Dense(1)(x)
    return x
コード例 #7
0
def rnn_model(params, training_dr_lstm=True, training_dr_ll=True):
    """RNN model for text."""
    input_shape = (params['fix_len'])
    seq_input = layers.Input(shape=input_shape)
    # vocab+1 because of padding
    seq_emb = layers.Embedding(params['vocab_size'] + 1,
                               params['emb_size'],
                               input_length=params['fix_len'])(seq_input)
    lstm_out = layers.LSTM(params['hidden_lstm_size'],
                           dropout=params['dropout_rate_lstm'])(
                               seq_emb, training=training_dr_lstm)
    out = layers.Dropout(rate=params['dropout_rate'],
                         seed=params['random_seed'])(lstm_out,
                                                     training=training_dr_ll)
    if params['variational']:
        # scale kl loss by number of training examples.
        # larger training dataset depends less on prior
        def scaled_kl_fn(p, q, _):
            return tfp.distributions.kl_divergence(q, p) / params['n_train']

        logits = tfpl.DenseReparameterization(
            params['n_class_in'],
            activation=None,
            kernel_divergence_fn=scaled_kl_fn,
            bias_posterior_fn=tfpl.util.default_mean_field_normal_fn(),
            name='last_layer')(out)
    else:
        logits = layers.Dense(
            params['n_class_in'],
            activation=None,
            kernel_regularizer=regularizers.l2(params['reg_weight']),
            bias_regularizer=regularizers.l2(params['reg_weight']),
            name='last_layer')(out)
    probs = layers.Softmax(axis=1)(logits)
    return models.Model(seq_input, probs, name='rnn')
コード例 #8
0
 def testConfigurableDenseFunctionality(self):
     out = ops.ConfigurableDense(units=5)(self.inputs)
     expected = keras_layers.Dense(units=5)(self.inputs)
     self.assertAllEqual(out.shape, expected.shape)
     self.assertIn(
         'configurable_dense/Tensordot/MatMul',
         [op.name for op in tf.get_default_graph().get_operations()])
コード例 #9
0
    def __init__(self,
                 numbers_of_units,
                 hidden_activation=None,
                 out_activation=None):
        """Constructor.

    Args:
      numbers_of_units: (list of int) number of hidden units for every layer
        (including the output layer)
      hidden_activation: activation function to apply after each hidden layer,
        ignored if there are no hidden layers.
      out_activation: activation function to apply at the output layer.
    """

        num_layers = len(numbers_of_units)
        assert num_layers >= 1

        activations = [hidden_activation] * (num_layers - 1) + [out_activation]

        network = models.Sequential([
            layers.Dense(units, activation=activation)
            for units, activation in zip(numbers_of_units, activations)
        ])

        super(FCLComponent, self).__init__('%sFCL' % num_layers, network,
                                           [numbers_of_units[-1]])
コード例 #10
0
def PersonalizedAttentivePooling(dim1, dim2, dim3, seed=0):
    """Soft alignment attention implement.

    Attributes:
        dim1 (int): first dimention of value shape.
        dim2 (int): second dimention of value shape.
        dim3 (int): shape of query

    Returns:
        object: weighted summary of inputs value.
    """
    vecs_input = keras.Input(shape=(dim1, dim2), dtype="float32")
    query_input = keras.Input(shape=(dim3, ), dtype="float32")

    user_vecs = layers.Dropout(0.2)(vecs_input)
    user_att = layers.Dense(
        dim3,
        activation="tanh",
        kernel_initializer=keras.initializers.glorot_uniform(seed=seed),
        bias_initializer=keras.initializers.Zeros(),
    )(user_vecs)
    user_att2 = layers.Dot(axes=-1)([query_input, user_att])
    user_att2 = layers.Activation("softmax")(user_att2)
    user_vec = layers.Dot((1, 1))([user_vecs, user_att2])

    model = keras.Model([vecs_input, query_input], user_vec)
    return model
コード例 #11
0
def get_model(cfg, encoder_inputs, encoder_outputs):

    decoder_inputs = layers.Input(shape=(None, ),
                                  name='Decoder-Input')  # for teacher forcing

    dec_emb = layers.Embedding(cfg.num_input_tokens,
                               cfg.latent_dim,
                               name='Decoder-Embedding',
                               mask_zero=False)(decoder_inputs)

    dec_bn = layers.BatchNormalization(name='Decoder-Batchnorm-1')(dec_emb)

    decoder_gru = layers.GRU(cfg.latent_dim,
                             return_state=True,
                             return_sequences=True,
                             name='Decoder-GRU')

    decoder_gru_output, _ = decoder_gru(dec_bn, initial_state=encoder_outputs)

    x = layers.BatchNormalization(
        name='Decoder-Batchnorm-2')(decoder_gru_output)
    decoder_dense = layers.Dense(cfg.num_output_tokens,
                                 activation='softmax',
                                 name='Final-Output-Dense')

    decoder_outputs = decoder_dense(x)

    model = models.Model([encoder_inputs, decoder_inputs], decoder_outputs)

    return model
コード例 #12
0
def get_simple_rnn_model(event_dim, is_Training, temperature=1):
    # input_shape: (None,         : different sequence lengths (per batch; every sequence in one batch does have the same dimension)
    #               EVENT_DIM)    : dimensionality of one event
    layer_one_args = {
        'units': 128,
        'input_shape': (None, event_dim),
        'return_sequences': True,
        'dropout': 0.5,
        'recurrent_dropout': 0.5,
    }
    layer_two_args = {
        'units': 128,
        'return_sequences': True,
        'dropout': 0.5,
        'recurrent_dropout': 0.5,
    }
    # for generating
    if not is_Training:
        # we predict one by one event
        layer_one_args['input_shape'] = (1, event_dim)
        layer_one_args['batch_input_shape'] = (1, 1, event_dim)
        layer_one_args['stateful'] = True
        layer_two_args['stateful'] = True

    model = keras.Sequential()
    model.add(layers.LSTM(**layer_one_args))
    # second LSTM layer
    model.add(layers.LSTM(**layer_two_args))
    model.add(layers.Lambda(lambda x: x / temperature))
    model.add(layers.Dense(units=event_dim, activation='softmax'))

    return model
コード例 #13
0
ファイル: lstur.py プロジェクト: l294265421/recommenders
    def _build_userencoder(self, titleencoder, type="ini"):
        """The main function to create user encoder of LSTUR.

        Args:
            titleencoder (object): the news encoder of LSTUR.

        Return:
            object: the user encoder of LSTUR.
        """
        hparams = self.hparams
        his_input_title = keras.Input(
            shape=(hparams.his_size, hparams.title_size), dtype="int32"
        )
        user_indexes = keras.Input(shape=(1,), dtype="int32")

        user_embedding_layer = layers.Embedding(
            len(self.train_iterator.uid2index),
            hparams.gru_unit,
            trainable=True,
            embeddings_initializer="zeros",
        )

        long_u_emb = layers.Reshape((hparams.gru_unit,))(
            user_embedding_layer(user_indexes)
        )
        click_title_presents = layers.TimeDistributed(titleencoder)(his_input_title)

        if type == "ini":
            user_present = layers.GRU(
                hparams.gru_unit,
                kernel_initializer=keras.initializers.glorot_uniform(seed=self.seed),
                recurrent_initializer=keras.initializers.glorot_uniform(seed=self.seed),
                bias_initializer=keras.initializers.Zeros(),
            )(
                layers.Masking(mask_value=0.0)(click_title_presents),
                initial_state=[long_u_emb],
            )
        elif type == "con":
            short_uemb = layers.GRU(
                hparams.gru_unit,
                kernel_initializer=keras.initializers.glorot_uniform(seed=self.seed),
                recurrent_initializer=keras.initializers.glorot_uniform(seed=self.seed),
                bias_initializer=keras.initializers.Zeros(),
            )(layers.Masking(mask_value=0.0)(click_title_presents))

            user_present = layers.Concatenate()([short_uemb, long_u_emb])
            user_present = layers.Dense(
                hparams.gru_unit,
                bias_initializer=keras.initializers.Zeros(),
                kernel_initializer=keras.initializers.glorot_uniform(seed=self.seed),
            )(user_present)

        model = keras.Model(
            [his_input_title, user_indexes], user_present, name="user_encoder"
        )
        return model
コード例 #14
0
    def __init__(self,
                 obs_size,
                 n_actions,
                 model=None,
                 use_target_network=False,
                 learning_rate=1e-3,
                 reward_discount=0.99):
        self.n_actions = n_actions
        self.lr = learning_rate
        self.rd = reward_discount

        # Init model
        if model is None:
            model = models.Sequential()
            model.add(layers.Dense(20, input_shape=obs_size, activation='elu'))
            model.add(layers.Dense(10, activation='elu'))
            model.add(layers.Dense(n_actions))
            model.summary()
        self.model = model
        self.use_target_network = use_target_network

        self.state_ph = tf.placeholder(tf.float32, shape=(None, *obs_size))
        self.actions_ph = tf.placeholder(tf.int32, shape=(None))
        self.rewards_ph = tf.placeholder(tf.float32, shape=(None))
        self.next_states_ph = tf.placeholder(tf.float32,
                                             shape=(None, *obs_size))
        self.is_done_ph = tf.placeholder(tf.float32, shape=(None))

        self.trainable_variables = self.model.trainable_variables
        if use_target_network:
            self.target_network = tf.keras.models.clone_model(self.model)

        self.loss = self.lossf(self.state_ph, self.actions_ph, self.rewards_ph,
                               self.next_states_ph, self.is_done_ph)
        self.set_learning_rate(self.lr)
        self.update_target_network()
コード例 #15
0
def stp_transformation(current_image, stp_input, num_masks):
    """Apply spatial transformer predictor (STP) to previous image.
    Args:
        current_image: previous image to be transformed.
        stp_input: hidden layer to be used for computing STN parameters.
        num_masks: number of masks and hence the number of STP transformations.
    Returns:
        List of images transformed by the predicted STP parameters.
    """

    identity_params = tf.convert_to_tensor(
        np.array([1.0, 0.0, 0.0, 0.0, 1.0, 0.0], np.float32))
    transformed = []
    for i in range(num_masks - 1):
        params = Layers.Dense(6)(stp_input) + identity_params
        transformed.append(spatial_transformer(current_image, params))

    return transformed
コード例 #16
0
def cdna_transformation(current_image, cdna_input, num_masks=10, color_channels=3, 
    dna_kernel_size=5, relu_shift=1e-12):
    """Apply convolutional dynamic neural advection to previous image.
    Args:
        current_image: previous image to be transformed.
        cdna_input: hidden lyaer to be used for computing CDNA kernels.
        num_masks: the number of masks and hence the number of CDNA transformations.
        color_channels: the number of color channels in the images.
    Returns:
        List of images transformed by the predicted CDNA kernels.
    """
    batch_size = int(cdna_input.get_shape()[0])
    height = int(current_image.get_shape()[1])
    width = int(current_image.get_shape()[2])

    # Predict kernels using linear function of last hidden layer.
    cdna_kerns = Layers.Dense(dna_kernel_size * dna_kernel_size * num_masks)(cdna_input)

    # Reshape and normalize.
    cdna_kerns = tf.reshape(
        cdna_kerns, [batch_size, dna_kernel_size, dna_kernel_size, 1, num_masks])
    cdna_kerns = tf.nn.relu(cdna_kerns - relu_shift) + relu_shift
    norm_factor = tf.reduce_sum(cdna_kerns, [1, 2, 3], keepdims=True)
    cdna_kerns /= norm_factor

    # Treat the color channel dimension as the batch dimension since the same
    # transformation is applied to each color channel.
    # Treat the batch dimension as the channel dimension so that
    # depthwise_conv2d can apply a different transformation to each sample.
    cdna_kerns = tf.transpose(cdna_kerns, [1, 2, 0, 4, 3])
    cdna_kerns = tf.reshape(cdna_kerns, [dna_kernel_size, dna_kernel_size, batch_size, num_masks])
    # Swap the batch and channel dimensions.
    current_image = tf.transpose(current_image, [3, 1, 2, 0])

    # Transform image.
    transformed = tf.nn.depthwise_conv2d(current_image, cdna_kerns, [1, 1, 1, 1], 'SAME')

    # Transpose the dimensions to where they belong.
    transformed = tf.reshape(transformed, [color_channels, height, width, batch_size, num_masks])
    transformed = tf.transpose(transformed, [3, 1, 2, 0, 4])
    transformed = tf.unstack(transformed, axis=-1)
    return transformed
コード例 #17
0
def build_model(
    n_classes: int,
    n_packet_features: int,
    n_meta_features: int = 7,
    dilations: bool = True,
    tag: str = "varcnn",
):
    """Build the Var-CNN model.

    The resulting model takes a single input of shape
    (n_samples, n_packet_features + n_meta_features). The meta features
    must be the rightmost (last) features in the matrix.  The model
    handles separating the two types of features and reshaping them
    as necessary.

    Parameters:
    -----------
    n_classes :
        The number of classes to be predicted.

    n_packet_features :
        The number of packet features such as the number of interarrival
        times or the number of packet directions or sizes.

    n_meta_features:
        The number of meta features such as total packet counts, total
        transmission duration, etc.
    """
    use_metadata = n_meta_features > 0

    # Constructs dir or time ResNet
    input_layer = keras.Input(
        shape=(n_packet_features + n_meta_features, ), name="input")

    layer = (Crop(end=n_packet_features)(input_layer)
             if use_metadata else input_layer)
    layer = layers.Reshape((n_packet_features, 1))(layer)
    output_layer = ResNet18(
        layer, tag, block=(dilated_basic_1d if dilations else basic_1d))

    concat_params = [output_layer]
    combined = concat_params[0]

    # Construct MLP for metadata
    if use_metadata:
        metadata_output = Crop(start=-n_meta_features)(input_layer)
        # consider this the embedding of all the metadata
        metadata_output = layers.Dense(32)(metadata_output)
        metadata_output = layers.BatchNormalization()(
            metadata_output)
        metadata_output = layers.Activation('relu')(metadata_output)

        concat_params.append(metadata_output)
        combined = layers.Concatenate()(concat_params)

    # Better to have final fc layer if combining multiple models
    if len(concat_params) > 1:
        combined = layers.Dense(1024)(combined)
        combined = layers.BatchNormalization()(combined)
        combined = layers.Activation('relu')(combined)
        combined = layers.Dropout(0.5)(combined)

    model_output = layers.Dense(units=n_classes, activation='softmax',
                                name='model_output')(combined)

    model = keras.Model(inputs=input_layer, outputs=model_output)
    model.compile(
        loss='categorical_crossentropy', metrics=['accuracy'],
        optimizer=keras.optimizers.Adam(0.001))

    return model
コード例 #18
0
ファイル: GAN_171103.py プロジェクト: sheston/tabularGANs
def generator_network(x, data_dim, base_n_count):
    x = layers.Dense(base_n_count, activation='relu')(x)
    x = layers.Dense(base_n_count * 2, activation='relu')(x)
    x = layers.Dense(base_n_count * 4, activation='relu')(x)
    x = layers.Dense(data_dim)(x)
    return x
コード例 #19
0
def ResNet50(method, num_classes, num_updates, dropout_rate):
    """Instantiates the ResNet50 architecture.

  Args:
    method: `str`, method for accounting for uncertainty. Must be one of
      ['vanilla', 'll_dropout', 'll_svi', 'dropout', 'svi', 'dropout_nofirst']
    num_classes: `int` number of classes for image classification.
    num_updates: integer, total steps in an epoch (for weighting the loss)
    dropout_rate: Dropout rate for ll_dropout, dropout methods.

  Returns:
      A Keras model instance.
  pylint: disable=invalid-name
  """

    # Determine proper input shape
    if backend.image_data_format() == 'channels_first':
        input_shape = (3, 224, 224)
        bn_axis = 1
    else:
        input_shape = (224, 224, 3)
        bn_axis = 3

    if (method in ['dropout', 'll_dropout', 'dropout_nofirst'
                   ]) != (dropout_rate > 0.):
        raise ValueError(
            'Dropout rate should be nonzero iff a dropout method is used.'
            'Method is {}, dropout is {}.'.format(method, dropout_rate))

    use_variational_layers = method == 'svi'
    hidden_layer_dropout = dropout_rate if method in [
        'dropout', 'dropout_nofirst'
    ] else 0.

    img_input = layers.Input(shape=input_shape)
    x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(img_input)
    if (dropout_rate > 0.) and (method != 'dropout_nofirst'):
        x = layers.Dropout(hidden_layer_dropout)(x, training=True)
    x = layers.Conv2D(64, (7, 7),
                      use_bias=False,
                      strides=(2, 2),
                      padding='valid',
                      kernel_initializer='he_normal',
                      kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                      name='conv1')(x)
    x = layers.BatchNormalization(axis=bn_axis,
                                  momentum=BATCH_NORM_DECAY,
                                  epsilon=BATCH_NORM_EPSILON,
                                  name='bn_conv1')(x)
    x = layers.Activation('relu')(x)
    x = layers.ZeroPadding2D(padding=(1, 1), name='pool1_pad')(x)
    x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)

    conv_block = functools.partial(
        conv_block_base,
        num_updates=num_updates,
        dropout_rate=hidden_layer_dropout,
        use_variational_layers=use_variational_layers)
    identity_block = functools.partial(
        identity_block_base,
        num_updates=num_updates,
        dropout_rate=hidden_layer_dropout,
        use_variational_layers=use_variational_layers)

    x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
    x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
    x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')
    x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')

    x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f')

    x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
    x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
    x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')

    x = layers.GlobalAveragePooling2D(name='avg_pool')(x)

    if dropout_rate > 0.:
        x = layers.Dropout(dropout_rate)(x, training=True)

    if method in ['ll_svi', 'svi']:

        x = tfpl.dense_variational_v2.DenseVariational(
            units=num_classes,
            make_posterior_fn=posterior_mean_field,
            make_prior_fn=functools.partial(prior_trainable,
                                            num_updates=num_updates),
            use_bias=True,
            kl_weight=1. / num_updates,
            kl_use_exact=True,
            name='fc1000')(x)
    else:
        x = layers.Dense(num_classes,
                         kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                         bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                         name='fc1000')(x)

    # Create model.
    return models.Model(img_input, x, name='resnet50')
コード例 #20
0
if USE_AMP:
    tf.keras.mixed_precision.experimental.set_policy('infer_float32_vars')

in_id = layers.Input(shape=(MAX_SEQ_LEN, ), name="input_ids")
in_mask = layers.Input(shape=(MAX_SEQ_LEN, ), name="input_masks")
in_segment = layers.Input(shape=(MAX_SEQ_LEN, ), name="segment_ids")

in_bert = [in_id, in_mask, in_segment]

l_bert = bert_utils.BERT(fine_tune_layers=TUNE_LAYERS,
                         bert_path=BERT_PATH,
                         return_sequence=False,
                         output_size=H_SIZE,
                         debug=False)(in_bert)

out_pred = layers.Dense(num_classes, activation="softmax")(l_bert)

model = tf.keras.models.Model(inputs=in_bert, outputs=out_pred)

# In[10]:

opt = tf.keras.optimizers.Adam(lr=LEARNING_RATE)

if USE_AMP:
    opt = tf.keras.mixed_precision.experimental.LossScaleOptimizer(
        opt, "dynamic")

# In[11]:

model.compile(loss="sparse_categorical_crossentropy",
              optimizer=opt,
    def __init__(self,
                 word_embedding,
                 data,
                 use_cudnn_lstm=False,
                 plot_model_architecture=True):
        self.hidden_units = 300
        self.embed_model = word_embedding
        self.input_dim = word_embedding.embed_dim
        self.vocab_size = data.vocab_size
        self.left = data.premise
        self.right = data.hypothesis
        self.max_len = data.max_len
        self.dense_units = 32
        self.name = '{}_glove{}_lstm{}_dense{}'.format(str(int(time.time())),
                                                       self.input_dim,
                                                       self.hidden_units,
                                                       self.dense_units)

        embedding_matrix = np.zeros((self.vocab_size, self.input_dim))
        for word, i in data.vocab:
            embedding_vector = self.embed_model.get_vector(word)
            if embedding_vector is not None:
                embedding_matrix[i] = embedding_vector

        embed = layers.Embedding(
            input_dim=self.vocab_size,
            output_dim=self.input_dim,
            embeddings_initializer=Constant(embedding_matrix),
            input_length=self.max_len,
            mask_zero=True,
            trainable=False)
        #embed.trainable=False

        if use_cudnn_lstm:
            lstm = layers.CuDNNLSTM(self.hidden_units,
                                    input_shape=(None, self.input_dim),
                                    unit_forget_bias=True,
                                    kernel_initializer='he_normal',
                                    kernel_regularizer='l2',
                                    name='lstm_layer')
        else:
            lstm = layers.LSTM(self.hidden_units,
                               input_shape=(None, self.input_dim),
                               unit_forget_bias=True,
                               activation='relu',
                               kernel_initializer='he_normal',
                               kernel_regularizer='l2',
                               name='lstm_layer')
        left_input = Input(shape=(self.max_len), name='input_1')
        right_input = Input(shape=(self.max_len), name='input_2')

        embed_left = embed(left_input)
        embed_right = embed(right_input)

        print('embed:', embed_right.shape)

        left_output = lstm(embed_left)
        right_output = lstm(embed_right)
        print('lstm:', right_output.shape)
        l1_norm = lambda x: 1 - K.abs(x[0] - x[1])
        merged = layers.Lambda(function=l1_norm,
                               output_shape=lambda x: x[0],
                               name='L1_distance')([left_output, right_output])
        #merged = layers.concatenate([left_output, right_output])
        #lstm_2 = layers.LSTM(hidden_units, unit_forget_bias=True,
        #                      activation = 'relu', kernel_regularizer='l2', name='lstm_layer2' )(merged)
        print('merged:', merged.shape)
        dense_1 = layers.Dense(self.dense_units, activation='relu')(merged)
        print('dense1:', dense_1.shape)
        output = layers.Dense(3, activation='softmax',
                              name='output_layer')(dense_1)
        print('output:', output.shape)
        self.model = Model(inputs=[left_input, right_input], outputs=output)

        self.compile()
コード例 #22
0
    def __init__(self, Model, num_mixtures=1):
        """Gaussian_MIxtures Initializer. Turns a neural network into an GMN.

        Args:
            Model: Input Keras Model.
            num_mixtures: how many total gaussians would you like to fit the output space to.

        Returns:
            Nothing lol

        """

        self.model = Model()

        layer = self.model.layers[-1]

        self.output_dim = layer.units
        layer.output_dim = layer.units
        self.num_mix = num_mixtures
        layer.num_mix = num_mixtures
        with tf.name_scope('MDN'):
            layer.mdn_mus = layers.Dense(
                layer.num_mix * layer.output_dim,
                name='mdn_mus')  # mix*output vals, no activation
            layer.mdn_sigmas = layers.Dense(
                self.num_mix * self.output_dim,
                activation=self.elu_plus_one_plus_epsilon,
                name='mdn_sigmas')  # mix*output vals exp activation
            layer.mdn_pi = layers.Dense(self.num_mix,
                                        name='mdn_pi')  # mix vals, logits

        def build(self, input_shape):
            with tf.name_scope('mus'):
                self.mdn_mus.build(input_shape)
            with tf.name_scope('sigmas'):
                self.mdn_sigmas.build(input_shape)
            with tf.name_scope('pis'):
                self.mdn_pi.build(input_shape)

        def call_func(self, x):
            with tf.name_scope('MDN'):
                mdn_out = layers.concatenate(
                    [self.mdn_mus(x),
                     self.mdn_sigmas(x),
                     self.mdn_pi(x)],
                    name='mdn_outputs')
            return mdn_out

        def compute_output_shape(self, input_shape):
            """Returns output shape, showing the number of mixture parameters."""
            return (input_shape[0],
                    (2 * self.output_dim * self.num_mix) + self.num_mix)

        def get_config(self):
            config = {
                "output_dimension": self.output_dim,
                "num_mixtures": self.num_mix
            }
            base_config = super(Dense, self).get_config()
            return dict(list(base_config.items()) + list(config.items()))

        layer.build = types.MethodType(build, layer)
        layer.call = types.MethodType(call_func, layer)
        layer._trainable_weights = layer.mdn_mus.trainable_weights + layer.mdn_sigmas.trainable_weights + layer.mdn_pi.trainable_weights
        layer._non_trainable_weights = layer.mdn_mus.non_trainable_weights + layer.mdn_sigmas.non_trainable_weights + layer.mdn_pi.non_trainable_weights
        layer.compute_output_shape = types.MethodType(compute_output_shape,
                                                      layer)
        layer.get_config = types.MethodType(get_config, layer)
コード例 #23
0
ファイル: dfnet.py プロジェクト: jpcsmith/wf-tools
def build_model(n_features: int, n_classes: int):
    """Create and return the DeepFingerprinting Model."""
    model = keras.Sequential()
    # Block1
    filter_num = ['None', 32, 64, 128, 256]
    kernel_size = ['None', 8, 8, 8, 8]
    conv_stride_size = ['None', 1, 1, 1, 1]
    pool_stride_size = ['None', 4, 4, 4, 4]
    pool_size = ['None', 8, 8, 8, 8]

    model.add(layers.Reshape((n_features, 1), input_shape=(n_features, )))
    model.add(
        layers.Conv1D(filters=filter_num[1],
                      kernel_size=kernel_size[1],
                      strides=conv_stride_size[1],
                      padding='same',
                      name='block1_conv1'))
    model.add(layers.BatchNormalization(axis=-1))
    model.add(layers.ELU(alpha=1.0, name='block1_adv_act1'))
    model.add(
        layers.Conv1D(filters=filter_num[1],
                      kernel_size=kernel_size[1],
                      strides=conv_stride_size[1],
                      padding='same',
                      name='block1_conv2'))
    model.add(layers.BatchNormalization(axis=-1))
    model.add(layers.ELU(alpha=1.0, name='block1_adv_act2'))
    model.add(
        layers.MaxPooling1D(pool_size=pool_size[1],
                            strides=pool_stride_size[1],
                            padding='same',
                            name='block1_pool'))
    model.add(layers.Dropout(0.1, name='block1_dropout'))

    model.add(
        layers.Conv1D(filters=filter_num[2],
                      kernel_size=kernel_size[2],
                      strides=conv_stride_size[2],
                      padding='same',
                      name='block2_conv1'))
    model.add(layers.BatchNormalization())
    model.add(layers.Activation('relu', name='block2_act1'))

    model.add(
        layers.Conv1D(filters=filter_num[2],
                      kernel_size=kernel_size[2],
                      strides=conv_stride_size[2],
                      padding='same',
                      name='block2_conv2'))
    model.add(layers.BatchNormalization())
    model.add(layers.Activation('relu', name='block2_act2'))
    model.add(
        layers.MaxPooling1D(pool_size=pool_size[2],
                            strides=pool_stride_size[3],
                            padding='same',
                            name='block2_pool'))
    model.add(layers.Dropout(0.1, name='block2_dropout'))

    model.add(
        layers.Conv1D(filters=filter_num[3],
                      kernel_size=kernel_size[3],
                      strides=conv_stride_size[3],
                      padding='same',
                      name='block3_conv1'))
    model.add(layers.BatchNormalization())
    model.add(layers.Activation('relu', name='block3_act1'))
    model.add(
        layers.Conv1D(filters=filter_num[3],
                      kernel_size=kernel_size[3],
                      strides=conv_stride_size[3],
                      padding='same',
                      name='block3_conv2'))
    model.add(layers.BatchNormalization())
    model.add(layers.Activation('relu', name='block3_act2'))
    model.add(
        layers.MaxPooling1D(pool_size=pool_size[3],
                            strides=pool_stride_size[3],
                            padding='same',
                            name='block3_pool'))
    model.add(layers.Dropout(0.1, name='block3_dropout'))

    model.add(
        layers.Conv1D(filters=filter_num[4],
                      kernel_size=kernel_size[4],
                      strides=conv_stride_size[4],
                      padding='same',
                      name='block4_conv1'))
    model.add(layers.BatchNormalization())
    model.add(layers.Activation('relu', name='block4_act1'))
    model.add(
        layers.Conv1D(filters=filter_num[4],
                      kernel_size=kernel_size[4],
                      strides=conv_stride_size[4],
                      padding='same',
                      name='block4_conv2'))
    model.add(layers.BatchNormalization())
    model.add(layers.Activation('relu', name='block4_act2'))
    model.add(
        layers.MaxPooling1D(pool_size=pool_size[4],
                            strides=pool_stride_size[4],
                            padding='same',
                            name='block4_pool'))
    model.add(layers.Dropout(0.1, name='block4_dropout'))

    model.add(layers.Flatten(name='flatten'))
    model.add(
        layers.Dense(512,
                     kernel_initializer=initializers.glorot_uniform(seed=0),
                     name='fc1'))
    model.add(layers.BatchNormalization())
    model.add(layers.Activation('relu', name='fc1_act'))

    model.add(layers.Dropout(0.7, name='fc1_dropout'))

    model.add(
        layers.Dense(512,
                     kernel_initializer=initializers.glorot_uniform(seed=0),
                     name='fc2'))
    model.add(layers.BatchNormalization())
    model.add(layers.Activation('relu', name='fc2_act'))

    model.add(layers.Dropout(0.5, name='fc2_dropout'))

    model.add(
        layers.Dense(n_classes,
                     kernel_initializer=initializers.glorot_uniform(seed=0),
                     name='fc3'))
    model.add(layers.Activation('softmax', name="softmax"))
    model.compile(loss="categorical_crossentropy",
                  optimizer=keras.optimizers.Adamax(lr=0.002,
                                                    beta_1=0.9,
                                                    beta_2=0.999,
                                                    epsilon=1e-08,
                                                    decay=0.0),
                  metrics=["accuracy"])

    return model
コード例 #24
0
#testing = testing.prefetch(tf.data.experimental.AUTOTUNE)

model = tf.compat.v1.keras.Sequential()
model.add(
    layers.Conv2D(filters=64,
                  kernel_size=4,
                  strides=2,
                  padding='valid',
                  use_bias=True,
                  input_shape=(32, 32, 3)))
model.add(layers.BatchNormalization())
model.add(layers.Activation(tf.nn.leaky_relu))
model.add(layers.Conv2D(128, 4, 2, 'valid', use_bias=True))
model.add(layers.BatchNormalization())
model.add(layers.Activation(tf.nn.leaky_relu))
model.add(layers.Conv2D(256, 1, 1, 'valid', use_bias=True))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(256, 1, 1, 'valid', use_bias=True))
model.add(layers.Flatten())
model.add(layers.Dense(100, activation='softmax'))

model.compile(optimizer='Adam',
              loss=tf.compat.v1.keras.losses.SparseCategoricalCrossentropy(),
              metrics=['accuracy'])
print(model)
print(model.summary())
model.fit(xtrain, ytrain, batch_size=64, epochs=50, verbose=1, shuffle=True)

test_loss, test_accuracy = model.evaluate(xtest, ytest, verbose=1)
print(test_accuracy)
print(test_loss)
コード例 #25
0
    def call(self, inputs, mode="train"):
        """
        Forward function for pixel advection network
        Parameters:
            inputs:         input dictionary including "image", "r_state" and "action"
              mode:         specify training or validating/testing
        Return:
            gen_images:     list of generated images
            gen_states:     list of generated states
        """
        ##### preparations #####
        # get dimensions/global steps
        global_step = tf.cast(v1.train.get_or_create_global_step(), tf.float32)
        isTrain = True if mode == "train" else False
        batch_size, image_height, image_width, color_ch = inputs["image"][
            0].get_shape().as_list()[0:4]
        state_dim = inputs["r_state"][0].get_shape().as_list()[1]

        # placeholder for generated robot states and images
        gen_states, gen_images = [], []

        # initial r state will use ground truth
        current_r_state = inputs["r_state"][0]

        # placeholder for conv-lstm states
        lstm_state1, lstm_state2, lstm_state3, lstm_state4 = None, None, None, None
        lstm_state5, lstm_state6, lstm_state7 = None, None, None

        # get number of ground truth images used for each mini-batch
        num_ground_truth = tf.cast(
            tf.round(
                tf.cast(batch_size, tf.float32) *
                (self.k / (self.k + tf.exp(global_step / self.k)))), tf.int32)

        ###### begin time-step loop (total_len - 1 steps) ######
        for image, action in zip(inputs["image"][:-1], inputs["action"][:-1]):

            ##### sampling and updating values #####
            # reuse parameters after the first step
            reuse = None if not bool(gen_images) else True

            # warm start(use ground truth frames) in first context_len steps
            done_warm_start = len(gen_images) > self.context_len - 1

            # if using context frames (during warm start), always use ground truth input
            # else, if not explicitly specified by "use_predict_frame", choose to use generated image
            # or ground truth input based on sampling function
            if self.use_predict_frame and done_warm_start:
                current_image = gen_images[-1]
            elif done_warm_start:
                current_image = self.scheduled_sample(image, gen_images[-1],
                                                      batch_size,
                                                      num_ground_truth)
            else:
                current_image = image

            # concat state and action, always use ground truth action, but use current state
            current_state_action = tf.concat([action, current_r_state], axis=1)

            ##### begin U-net #####
            # 1th conv
            with v1.variable_scope("conv1", reuse=reuse):
                enc0 = Layers.Conv2D(self.layer_ch_specs[0],
                                     kernel_size=(5, 5),
                                     strides=(2, 2),
                                     padding="same")(current_image)
                enc0 = Layers.LayerNormalization()(enc0)

            # 1th conv lstm
            with v1.variable_scope("conv_lstm1", reuse=reuse):
                hidden1, lstm_state1 = basic_conv_lstm_cell(
                    enc0, lstm_state1, self.layer_ch_specs[1])
                hidden1 = Layers.LayerNormalization()(hidden1)

            # 2th conv lstm
            with v1.variable_scope("conv_lstm2", reuse=reuse):
                hidden2, lstm_state2 = basic_conv_lstm_cell(
                    hidden1, lstm_state2, self.layer_ch_specs[2])
                hidden2 = Layers.LayerNormalization()(hidden2)
                enc1 = Layers.Conv2D(self.layer_ch_specs[2], kernel_size=(3, 3), strides=(2, 2), padding="same")\
                            (hidden2)
            # 3th conv lstm
            with v1.variable_scope("conv_lstm3", reuse=reuse):
                hidden3, lstm_state3 = basic_conv_lstm_cell(
                    enc1, lstm_state3, self.layer_ch_specs[3])
                hidden3 = Layers.LayerNormalization()(hidden3)

            # 4th conv lstm
            with v1.variable_scope("conv_lstm4", reuse=reuse):
                hidden4, lstm_state4 = basic_conv_lstm_cell(
                    hidden3, lstm_state4, self.layer_ch_specs[4])
                hidden4 = Layers.LayerNormalization()(hidden4)
                enc2 = Layers.Conv2D(self.layer_ch_specs[4], kernel_size=(3, 3), strides=(2, 2), padding="same")\
                            (hidden4)
                # Pass in state and action.
                smear = tf.reshape(current_state_action,
                                   [batch_size, 1, 1, state_dim * 2])
                smear = tf.tile(
                    smear,
                    [1,
                     int(enc2.get_shape()[1]),
                     int(enc2.get_shape()[2]), 1])

                if self.use_state:
                    enc2 = tf.concat(axis=3, values=[enc2, smear])
                enc3 = Layers.Conv2D(self.layer_ch_specs[4], kernel_size=(1, 1), strides=(1, 1), padding="same")\
                            (enc2)

            # 5th conv lstm
            with v1.variable_scope("conv_lstm5", reuse=reuse):
                hidden5, lstm_state5 = basic_conv_lstm_cell(
                    enc3, lstm_state5, self.layer_ch_specs[5])
                hidden5 = Layers.LayerNormalization()(hidden5)
                enc4 = Layers.Conv2DTranspose(self.layer_ch_specs[5], kernel_size=(3, 3), strides=(2, 2), padding="same")\
                    (hidden5)

            # 6th conv lstm
            with v1.variable_scope("conv_lstm6", reuse=reuse):
                hidden6, lstm_state6 = basic_conv_lstm_cell(
                    enc4, lstm_state6, self.layer_ch_specs[6])
                hidden6 = Layers.LayerNormalization()(hidden6)
                # Skip connection.
                hidden6 = tf.concat(axis=3, values=[hidden6,
                                                    enc1])  # both 16x16
                enc5 = Layers.Conv2DTranspose(self.layer_ch_specs[6], kernel_size=(3, 3), strides=(2, 2), padding="same")\
                    (hidden6)

            # 7th conv lstm
            with v1.variable_scope("conv_lstm7", reuse=reuse):
                hidden7, lstm_state7 = basic_conv_lstm_cell(
                    enc5, lstm_state7, self.layer_ch_specs[7])  # 32x32
                hidden7 = Layers.LayerNormalization()(hidden7)

                # Skip connection.
                hidden7 = tf.concat(axis=3, values=[hidden7,
                                                    enc0])  # both 32x32
                enc6 = Layers.Conv2DTranspose(self.layer_ch_specs[7], kernel_size=(3, 3), strides=(2, 2), padding="same")\
                    (hidden7)
                enc6 = Layers.LayerNormalization()(enc6)

            ###### motion transform part #####
            # dna
            if self.dna:
                from video_prediction.models.building_blocks import dna_transformation
                with v1.variable_scope("dna", reuse=reuse):
                    if self.num_mask != 1:
                        raise ValueError(
                            'Only one mask is supported for DNA model.')
                    dna_input = Layers.Conv2DTranspose(self.dna_kernel_size ** 2, kernel_size=(1, 1), strides=(1, 1),\
                        padding="same")(enc6)
                    transformed = [dna_transformation(current_image, dna_input, dna_kernel_size=self.dna_kernel_size, \
                        relu_shift=self.relu_shift)]
            # cdna
            elif self.cdna:
                from video_prediction.models.building_blocks import cdna_transformation
                with v1.variable_scope("cdna", reuse=reuse):
                    last_hidden_input = Layers.Conv2DTranspose(color_ch, kernel_size=(1, 1), strides=(1, 1),\
                        padding="same")(enc6)
                    transformed = [
                        keras.activations.sigmoid(last_hidden_input)
                    ]
                    cdna_input = tf.reshape(hidden5, [batch_size, -1])
                    transformed += cdna_transformation(current_image, cdna_input, num_masks=self.num_mask, \
                        color_channels=color_ch, dna_kernel_size=self.dna_kernel_size, relu_shift=self.relu_shift)
            # stp
            elif self.stp:
                assert (0)
                from video_prediction.models.building_blocks import stp_transformation
                with v1.variable_scope("stp", reuse=reuse):
                    last_hidden_input = Layers.Conv2DTranspose(color_ch, kernel_size=(1, 1), strides=(1, 1),\
                        padding="same")(enc6)
                    transformed = [
                        keras.activations.sigmoid(last_hidden_input)
                    ]
                    stp_input = tf.reshape(hidden5, [batch_size, -1])
                    stp_input = Layers.Dense(100)(stp_input)
                    transformed += stp_transformation(current_image, stp_input,
                                                      self.num_mask)

            # compute mask
            with v1.variable_scope("mask", reuse=reuse):
                mask = Layers.Conv2DTranspose(self.num_mask + 1, kernel_size=(1, 1), strides=(1, 1), padding="same")\
                    (enc6)
                mask = tf.reshape(
                    tf.nn.softmax(tf.reshape(mask, [-1, self.num_mask + 1])),
                    [batch_size, image_height, image_width, self.num_mask + 1])
                #layers.append(("softmax_mask", mask))
                mask_list = tf.split(axis=3,
                                     num_or_size_splits=self.num_mask + 1,
                                     value=mask)

            # mask output
            # first mask applies to current_image
            new_gen_image = mask_list[0] * current_image
            for layer, mask in zip(transformed, mask_list[1:]):
                new_gen_image += layer * mask

            gen_images.append(new_gen_image)

            ###### compute new r state #####
            new_gen_r_state = Layers.Dense(state_dim)(current_state_action)
            gen_states.append(new_gen_r_state)
            # update current state
            current_r_state = new_gen_r_state

        return gen_images, gen_states
コード例 #26
0
def stack_layers(inputs, net_layers, kernel_initializer='glorot_uniform'):
  """Builds the architecture of the network by applying each layer specified in net_layers to inputs.

  Args:
    inputs: a dict containing input_types and input_placeholders for each key
      and value pair, respecively.
    net_layers:  a list of dicts containing all layers to be used in the
      network, where each dict describes one such layer. each dict requires the
      key 'type'. all other keys are dependent on the layer type.
    kernel_initializer: initialization configuration passed to keras (see keras
      initializers).

  Returns:
    outputs: a dict formatted in much the same way as inputs. it
      contains input_types and output_tensors for each key and value pair,
      respectively, where output_tensors are the outputs of the
      input_placeholders in inputs after each layer in net_layers is applied.
  """
  outputs = dict()

  for key in inputs:
    outputs[key] = inputs[key]

  for layer in net_layers:
    # check for l2_reg argument
    l2_reg = layer.get('l2_reg')
    if l2_reg:
      l2_reg = l2(layer['l2_reg'])

    # create the layer
    if layer['type'] in [
        'softplus', 'softsign', 'softmax', 'tanh', 'sigmoid', 'relu', 'selu'
    ]:
      l = layers.Dense(
          layer['size'],
          activation=layer['type'],
          kernel_initializer=kernel_initializer,
          kernel_regularizer=l2_reg,
          name=layer.get('name'))
    elif layer['type'] == 'None':
      l = layers.Dense(
          layer['size'],
          kernel_initializer=kernel_initializer,
          kernel_regularizer=l2_reg,
          name=layer.get('name'))
    elif layer['type'] == 'Conv2D':
      l = layers.Conv2D(
          layer['channels'],
          kernel_size=layer['kernel'],
          activation='relu',
          data_format='channels_last',
          kernel_regularizer=l2_reg,
          name=layer.get('name'))
    elif layer['type'] == 'BatchNormalization':
      l = layers.BatchNormalization(name=layer.get('name'))
    elif layer['type'] == 'MaxPooling2D':
      l = layers.MaxPooling2D(
          pool_size=layer['pool_size'],
          data_format='channels_first',
          name=layer.get('name'))
    elif layer['type'] == 'Dropout':
      l = layers.Dropout(layer['rate'], name=layer.get('name'))
    elif layer['type'] == 'Flatten':
      l = layers.Flatten(name=layer.get('name'))
    else:
      raise ValueError("Invalid layer type '{}'".format(layer['type']))

    # apply the layer to each input in inputs
    for k in outputs:
      outputs[k] = l(outputs[k])

  return outputs
コード例 #27
0
    def __init__(self,
                 obs_size,
                 action_size,
                 actor_model=None,
                 critic_model=None,
                 use_target_network=False,
                 learning_rate=1e-3,
                 reward_discount=0.99,
                 tau=0.001):
        self.obs_size = obs_size
        self.action_size = action_size
        self.use_target_network = use_target_network
        self.lr = learning_rate
        self.rd = reward_discount
        self.tau = tau

        # Create models if not provided
        if actor_model is None:
            actor_model = models.Sequential()
            actor_model.add(
                layers.Dense(16, input_shape=obs_size, activation='relu'))
            actor_model.add(layers.Dense(16, activation='relu'))
            actor_model.add(layers.Dense(16, activation='relu'))
            actor_model.add(
                layers.Dense(action_size, name='action', activation='tanh'))
            actor_model.summary()
        self.actor_model = actor_model

        if critic_model is None:
            state_input = layers.Input(shape=obs_size)
            action_input = layers.Input(shape=action_size)
            all_input = layers.Concatenate()([state_input, action_input])
            h1 = layers.Dense(32, activation='relu')(all_input)
            h2 = layers.Dense(32, activation='relu')(h1)
            h3 = layers.Dense(32, activation='relu')(h2)
            output = layers.Dense(1, name='q-value')(h3)
            critic_model = models.Model(inputs=[state_input, action_input],
                                        outputs=output)
            critic_model.summary()
        self.critic_model = critic_model

        if use_target_network:
            self.target_network_critic = tf.keras.models.clone_model(
                self.critic_model)
            self.target_network_actor = tf.keras.models.clone_model(
                self.actor_model)

        self.state_ph = tf.placeholder(tf.float32, shape=(None, *obs_size))
        self.actions_ph = tf.placeholder(tf.float32, shape=(None, action_size))
        self.rewards_ph = tf.placeholder(tf.float32, shape=(None))
        self.next_states_ph = tf.placeholder(tf.float32,
                                             shape=(None, *obs_size))
        self.is_done_ph = tf.placeholder(tf.float32, shape=(None))

        self.loss = self.q_loss(self.state_ph, self.actions_ph,
                                self.rewards_ph, self.next_states_ph,
                                self.is_done_ph)
        aer = self.action_expected_reward(self.state_ph)

        self.train_critic_step = tf.train.AdamOptimizer(
            learning_rate).minimize(
                self.loss, var_list=self.critic_model.trainable_variables)
        self.train_actor_step = tf.train.AdamOptimizer(
            learning_rate / 10).minimize(
                -aer, var_list=self.actor_model.trainable_variables)
        sess.run(tf.global_variables_initializer())