Exemple #1
0
class BatchNormalization(Template):

    @Template.init_name_scope
    def __init__(self, input_shape):
        '''
        REFERENCE:
            Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift
        PARAMS:
            input_shape (list): shape of the input, do not need the batch dimension

        # To use this normalization, apply update ops below to update the mean and variance
        from tensorflow.python.framework import ops
        optimizer = tf.train.AdamOptimizer(learning_rate)
        update_ops = ops.get_collection(ops.GraphKeys.UPDATE_OPS)
        with ops.control_dependencies(update_ops):
            train_op = optimizer.minimize(train_cost_sb)
        '''
        self.bn = TFBatchNorm()
        self.bn.build(input_shape=[None] + list(input_shape))

    def _train_fprop(self, state_below):
        return self.bn.apply(state_below, training=True)

    def _test_fprop(self, state_below):
        return self.bn.apply(state_below, training=False)
Exemple #2
0
def build_model(n_hidden=1, n_neurons=30, learning_rate=3e-3, input_shape=[2]):
    model = keras.models.Sequential()
    model.add(keras.layers.InputLayer(input_shape=input_shape))
    for layer in range(n_hidden):
        model.add(BatchNormalization())
        model.add(keras.layers.Dense(n_neurons, activation="relu"))
    model.add(keras.layers.Dense(1))
    optimizer = tf.keras.optimizers.SGD(lr=learning_rate)
    model.compile(loss="mse", optimizer=optimizer)
    return model
Exemple #3
0
def createModel():
    image_size = IMAGE_SIZE

    image_input = Input(shape=(image_size, image_size, 3), name='input_layer')

    conv_1 = Conv2D(filters=64, kernel_size=(3, 3),
                    use_bias=False)(image_input)
    conv_1_normalized = BatchNormalization()(conv_1)
    conv_1_activation = Activation('relu')(conv_1_normalized)
    conv_1_pooled = MaxPooling2D(padding='same')(conv_1_activation)

    conv_2 = Conv2D(filters=128, kernel_size=(3, 3),
                    use_bias=False)(conv_1_pooled)
    conv_2_normalized = BatchNormalization()(conv_2)
    conv_2_activation = Activation('relu')(conv_2_normalized)
    conv_2_pooled = MaxPooling2D(padding='same')(conv_2_activation)

    conv_3 = Conv2D(filters=128, kernel_size=(3, 3),
                    use_bias=False)(conv_2_pooled)
    conv_3_normalized = BatchNormalization()(conv_3)
    conv_3_activation = Activation('relu')(conv_3_normalized)
    conv_3_pooled = MaxPooling2D(padding='same')(conv_3_activation)

    conv_4 = Conv2D(filters=256, kernel_size=(3, 3),
                    use_bias=False)(conv_3_pooled)
    conv_4_normalized = BatchNormalization()(conv_4)
    conv_4_activation = Activation('relu')(conv_4_normalized)
    conv_4_pooled = MaxPooling2D(padding='same')(conv_4_activation)

    conv_5 = Conv2D(filters=512, kernel_size=(3, 3),
                    use_bias=False)(conv_4_pooled)
    conv_5_normalized = BatchNormalization()(conv_5)
    conv_5_activation = Activation('relu')(conv_5_normalized)
    conv_5_pooled = MaxPooling2D(padding='same')(conv_5_activation)

    conv_flattened = Flatten()(conv_5_pooled)

    dense_layer_1 = Dense(512, use_bias=False)(conv_flattened)
    dense_normalized = BatchNormalization()(dense_layer_1)
    dense_activation = Activation('relu')(dense_normalized)

    output = Dense(43, activation='softmax',
                   name='output_layer')(dense_activation)

    model = tf.keras.Model(inputs=image_input, outputs=[output])

    model.compile(optimizer=rmsprop(1e-3),
                  loss={'output_layer': 'categorical_crossentropy'},
                  metrics=['accuracy'])
    model.summary()
    return model
Exemple #4
0
 def __init_var__(self, state_below):
     scope_ = tf.get_default_graph().get_name_scope()
     self.bn = TFBatchNorm(
         axis=self.axis,
         momentum=self.momentum,
         epsilon=self.epsilon,
         center=self.center,
         scale=self.scale,
         beta_initializer=self.beta_initializer,
         gamma_initializer=self.gamma_initializer,
         moving_mean_initializer=self.moving_mean_initializer,
         moving_variance_initializer=self.moving_variance_initializer,
         beta_regularizer=self.beta_regularizer,
         gamma_regularizer=self.gamma_regularizer,
         beta_constraint=self.beta_constraint,
         gamma_constraint=self.gamma_constraint,
         renorm=self.renorm,
         renorm_clipping=self.renorm_clipping,
         renorm_momentum=self.renorm_momentum,
         fused=self.fused,
         name=str(scope_))
     input_shape = [int(dim) for dim in state_below.shape[1:]]
     self.bn.build(input_shape=[None] + list(input_shape))
Exemple #5
0
    def build(self, inputs_shape):
        if inputs_shape[1].value is None:
            raise ValueError(
                "Expected inputs.shape[-1] to be known, saw shape: %s" %
                inputs_shape)

        input_depth = inputs_shape[1].value
        h_depth = self._num_units if self._num_proj is None else self._num_proj
        maybe_partitioner = (partitioned_variables.fixed_size_partitioner(
            self._num_unit_shards)
                             if self._num_unit_shards is not None else None)

        if self._normalize_in_to_hidden or self._normalize_cell:
            if self._normalize_config is None:
                self._normalize_config = {
                    'center':
                    False,
                    'scale':
                    True,
                    'gamma_initializer':
                    init_ops.constant_initializer(0.1, dtype=self.dtype)
                }
            else:
                self._normalize_config['center'] = False

        if not self._normalize_in_to_hidden or self._normalize_in_together:
            self._kernel = self.add_variable(
                _WEIGHTS_VARIABLE_NAME,
                shape=[input_depth + h_depth, 4 * self._num_units],
                initializer=self._initializer,
                partitioner=maybe_partitioner)
            if self._normalize_in_to_hidden:
                self._bn = BatchNormalization(**self._normalize_config)
        else:
            self._kernel_m = self.add_variable(
                "i_scope/%s" % _WEIGHTS_VARIABLE_NAME,
                shape=[input_depth, 4 * self._num_units],
                initializer=self._initializer,
                partitioner=maybe_partitioner)
            with vs.variable_scope(None, "i_scope"):
                self._bn_i = BatchNormalization(**self._normalize_config)

            self._kernel_m = self.add_variable(
                "m_scope/%s" % _WEIGHTS_VARIABLE_NAME,
                shape=[h_depth, 4 * self._num_units],
                initializer=self._initializer,
                partitioner=maybe_partitioner)
            with vs.variable_scope(None, "m_scope"):
                self._bn_m = BatchNormalization(**self._normalize_config)

        self._bias = self.add_variable(
            _BIAS_VARIABLE_NAME,
            shape=[4 * self._num_units],
            initializer=init_ops.zeros_initializer(dtype=self.dtype))

        if self._normalize_cell:
            self._normalize_config_cell = self._normalize_config
            self._normalize_config_cell['center'] = True
            self._bn_c = BatchNormalization(**self._normalize_config_cell)

        if self._use_peepholes:
            self._w_f_diag = self.add_variable("w_f_diag",
                                               shape=[self._num_units],
                                               initializer=self._initializer)
            self._w_i_diag = self.add_variable("w_i_diag",
                                               shape=[self._num_units],
                                               initializer=self._initializer)
            self._w_o_diag = self.add_variable("w_o_diag",
                                               shape=[self._num_units],
                                               initializer=self._initializer)

        if self._num_proj is not None:
            maybe_proj_partitioner = (
                partitioned_variables.fixed_size_partitioner(
                    self._num_proj_shards)
                if self._num_proj_shards is not None else None)
            self._proj_kernel = self.add_variable(
                "projection/%s" % _WEIGHTS_VARIABLE_NAME,
                shape=[self._num_units, self._num_proj],
                initializer=self._initializer,
                partitioner=maybe_proj_partitioner)

        self.built = True
Exemple #6
0
class BatchNormalization(BaseLayer):
    @BaseLayer.init_name_scope
    def __init__(self,
                 axis=-1,
                 momentum=0.99,
                 epsilon=1e-3,
                 center=True,
                 scale=True,
                 beta_initializer=init_ops.zeros_initializer(),
                 gamma_initializer=init_ops.ones_initializer(),
                 moving_mean_initializer=init_ops.zeros_initializer(),
                 moving_variance_initializer=init_ops.ones_initializer(),
                 beta_regularizer=None,
                 gamma_regularizer=None,
                 beta_constraint=None,
                 gamma_constraint=None,
                 renorm=False,
                 renorm_clipping=None,
                 renorm_momentum=0.99,
                 fused=None):
        '''
        Reference:
            Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift
            http://arxiv.org/abs/1502.03167

        Args:
            axis: Integer, the axis that should be normalized (typically the features
                axis). For instance, after a `Conv2D` layer with
                `data_format="channels_first"`, set `axis=1` in `BatchNormalization`.
            momentum: Momentum for the moving average.
            epsilon: Small float added to variance to avoid dividing by zero.
            center: If True, add offset of `beta` to normalized tensor. If False, `beta`
                is ignored.
            scale: If True, multiply by `gamma`. If False, `gamma` is
                not used. When the next layer is linear (also e.g. `nn.relu`), this can be
                disabled since the scaling can be done by the next layer.
            beta_initializer: Initializer for the beta weight.
            gamma_initializer: Initializer for the gamma weight.
            moving_mean_initializer: Initializer for the moving mean.
            moving_variance_initializer: Initializer for the moving variance.
            beta_regularizer: Optional regularizer for the beta weight.
            gamma_regularizer: Optional regularizer for the gamma weight.
            beta_constraint: An optional projection function to be applied to the `beta`
                weight after being updated by an `Optimizer` (e.g. used to implement
                norm constraints or value constraints for layer weights). The function
                must take as input the unprojected variable and must return the
                projected variable (which must have the same shape). Constraints are
                not safe to use when doing asynchronous distributed training.
            gamma_constraint: An optional projection function to be applied to the
                `gamma` weight after being updated by an `Optimizer`.
            renorm: Whether to use Batch Renormalization
                (https://arxiv.org/abs/1702.03275). This adds extra variables during
                training. The inference is the same for either value of this parameter.
            renorm_clipping: A dictionary that may map keys 'rmax', 'rmin', 'dmax' to
                scalar `Tensors` used to clip the renorm correction. The correction
                `(r, d)` is used as `corrected_value = normalized_value * r + d`, with
                `r` clipped to [rmin, rmax], and `d` to [-dmax, dmax]. Missing rmax, rmin,
                dmax are set to inf, 0, inf, respectively.
            renorm_momentum: Momentum used to update the moving means and standard
                deviations with renorm. Unlike `momentum`, this affects training
                and should be neither too small (which would add noise) nor too large
                (which would give stale estimates). Note that `momentum` is still applied
                to get the means and variances for inference.
            fused: if `True`, use a faster, fused implementation if possible.
                If `None`, use the system recommended implementation.

        Note:
            >>> # To use this normalization, apply update ops below to update the mean and variance
            >>> from tensorflow.python.framework import ops
            >>> optimizer = tf.train.AdamOptimizer(learning_rate)
            >>> update_ops = ops.get_collection(ops.GraphKeys.UPDATE_OPS)
            >>> with ops.control_dependencies(update_ops):
            >>>     train_op = optimizer.minimize(train_cost_sb)

        '''
        self.axis = axis
        self.momentum = momentum
        self.epsilon = epsilon
        self.center = center
        self.scale = scale
        self.beta_initializer = beta_initializer
        self.gamma_initializer = gamma_initializer
        self.moving_mean_initializer = moving_mean_initializer
        self.moving_variance_initializer = moving_variance_initializer
        self.beta_regularizer = beta_regularizer
        self.gamma_regularizer = gamma_regularizer
        self.beta_constraint = beta_constraint
        self.gamma_constraint = gamma_constraint
        self.renorm = renorm
        self.renorm_clipping = renorm_clipping
        self.renorm_momentum = renorm_momentum
        self.fused = fused

    @BaseLayer.init_name_scope
    def __init_var__(self, state_below):
        scope_ = tf.get_default_graph().get_name_scope()
        self.bn = TFBatchNorm(
            axis=self.axis,
            momentum=self.momentum,
            epsilon=self.epsilon,
            center=self.center,
            scale=self.scale,
            beta_initializer=self.beta_initializer,
            gamma_initializer=self.gamma_initializer,
            moving_mean_initializer=self.moving_mean_initializer,
            moving_variance_initializer=self.moving_variance_initializer,
            beta_regularizer=self.beta_regularizer,
            gamma_regularizer=self.gamma_regularizer,
            beta_constraint=self.beta_constraint,
            gamma_constraint=self.gamma_constraint,
            renorm=self.renorm,
            renorm_clipping=self.renorm_clipping,
            renorm_momentum=self.renorm_momentum,
            fused=self.fused,
            name=str(scope_))
        input_shape = [int(dim) for dim in state_below.shape[1:]]
        self.bn.build(input_shape=[None] + list(input_shape))

    def _train_fprop(self, state_below):
        return self.bn.apply(state_below, training=True)

    def _test_fprop(self, state_below):
        return self.bn.apply(state_below, training=False)
Exemple #7
0
def main():
    print("Loading samples and labels")
    samples, labels, _ = load_files("data")
    print("Loaded {} samples".format(samples.shape[0]))

    sequence_dim = 100
    print("Converting to sequences of length {}".format(sequence_dim))
    samples, labels = make_sequences(samples, labels, sequence_dim)

    print("Number of samples from sequences: {}".format(samples.shape[0]))

    lb = LabelBinarizer()
    labels = lb.fit_transform(labels)

    # flattened samples for Decision Tree
    flatSamples = samples.reshape(samples.shape[0], -1)  #tree!
    (trainSamples, testSamples, trainLabels,
     testLabels) = train_test_split(flatSamples,
                                    labels,
                                    test_size=0.25,
                                    random_state=42)

    print("=" * 20)
    print("Building DecisionTree model")
    model = DecisionTreeClassifier()
    model.fit(trainSamples, trainLabels)
    treeResults = model.predict(testSamples)
    print(
        confusion_matrix(testLabels.argmax(axis=1),
                         treeResults.argmax(axis=1)))
    print(
        classification_report(testLabels.argmax(axis=1),
                              treeResults.argmax(axis=1)))
    treeAcc = accuracy_score(testLabels.argmax(axis=1),
                             treeResults.argmax(axis=1))
    print("Accuracy Tree: {:.2f}".format(treeAcc))
    print("Cohen's Kappa {:.2f}".format(
        cohen_kappa_score(testLabels.argmax(axis=1),
                          treeResults.argmax(axis=1))))

    print("=" * 20)
    print("Building CNN model")

    (trainSamples, testSamples, trainLabels,
     testLabels) = train_test_split(samples,
                                    labels,
                                    test_size=0.25,
                                    random_state=42)
    inputShape = (samples.shape[1], samples.shape[2])
    model = Sequential()
    model.add(Conv1D(32, 10, padding="same", input_shape=inputShape))
    model.add(Activation("relu"))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))

    model.add(Conv1D(64, 10, padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    model.add(Conv1D(128, 10, padding="same"))
    model.add(Activation("relu"))
    model.add(Dropout(0.2))
    model.add(Flatten(input_shape=inputShape))
    model.add(Dense(128, activation='sigmoid'))
    model.add(Dense(64, activation='sigmoid'))
    model.add(Dense(labels.shape[1], activation='softmax'))

    model.compile(loss='categorical_crossentropy',
                  optimizer="adam",
                  metrics=['accuracy'])

    EPOCHS = 10
    BATCH = 128
    model.fit(trainSamples,
              trainLabels,
              batch_size=BATCH,
              epochs=EPOCHS,
              validation_data=(testSamples, testLabels))

    cnnResults = model.predict(testSamples)

    print(
        confusion_matrix(testLabels.argmax(axis=1), cnnResults.argmax(axis=1)))
    print(
        classification_report(testLabels.argmax(axis=1),
                              cnnResults.argmax(axis=1),
                              target_names=lb.classes_))
    print("CNN Accuracy: {:.2f}".format(
        accuracy_score(testLabels.argmax(axis=1), cnnResults.argmax(axis=1))))
    print("Cohen's Kappa {:.2f}".format(
        cohen_kappa_score(testLabels.argmax(axis=1),
                          cnnResults.argmax(axis=1))))
    input("")
def batchnorm(state_below, input_shape):
    bn = TFBatchNorm()
    bn.build(input_shape=[None] + list(input_shape))
    return bn.apply(state_below, training=True)
Exemple #9
0
    def __init__(self,
                 caption_max_length,
                 vocabulary_size,
                 dropout_rate,
                 start_encoding,
                 image_features_dimensions,
                 embedding_size=512,
                 hidden_size=1024,
                 alpha_reg=.005,
                 use_max_sampler=True):
        """ arguments """
        self.caption_max_length = caption_max_length
        self.vocabulary_size = vocabulary_size
        self.dropout_rate = dropout_rate
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.image_features_dimensions = image_features_dimensions
        """ the input captions for teacher forcing during training (these are ignored in inference mode)"""
        self.input_captions = Input(name="input_captions",
                                    shape=(caption_max_length, 1))
        self.input_image_normalize = BatchNormalization(
            momentum=.95, name="shatt_image_batch_normalize")
        """ initial image projection layer """
        self.image_projector_shape = Reshape(target_shape=(embedding_size, ))
        self.image_projector = Dense(embedding_size, use_bias=False)
        """ attention layer (attend) """
        self.state_projector = Dense(embedding_size,
                                     activation=None,
                                     name="shatt_image_projector")
        self.state_projector_add = Add(name="shatt_image_projection_sum")
        self.state_projector_activation = Activation(
            "relu", name="shatt_image_projection")

        spatial_kernel = Dense(1,
                               activation=None,
                               name="shatt_image_attention_kernel")
        self.spatial_reductor = TimeDistributed(spatial_kernel,
                                                name="shatt_image_reduction")
        self.spatial_flatten = Flatten(name="shatt_image_attention_flatten")
        self.spatial_attention = Activation("softmax",
                                            name="shatt_image_attention")
        self.spatial_attention_feature = Dot(
            axes=(1, 1), name="shatt_image_attention_feature")

        self.attention_regularizer = AlphaRegularization(
            alpha_reg,
            caption_max_length,
            image_features_dimensions,
            name="shatt_image_attention_regularizer")
        """ select layer (beta) """
        self.image_context_attention = Dense(
            1, activation="sigmoid", name="shatt_image_context_attention")
        self.image_context_attention_feature = Multiply(
            name="shatt_image_context_attention_feature")
        """ decode layer (tell) """
        self.decode_state_dropout = Dropout(rate=dropout_rate,
                                            name="shatt_decode_state_dropout")
        self.decode_state_predictor = Dense(
            embedding_size,
            activation=None,
            name="shatt_decode_state_predictor")
        self.decode_attention_predictor = Dense(
            embedding_size,
            activation=None,
            use_bias=False,
            name="shatt_decode_attention_predictor")
        self.decode_combiner = Add(name="shatt_decode_caption_embedding")
        self.decode_caption_predictor = Dense(
            vocabulary_size + 1,
            "softmax",
            name="shatt_decode_caption_predictor")
        self.decode_caption_sampler = SamplingLayer(
            use_argmax=use_max_sampler, name="shatt_decode_caption_sampling")
        """ embedding layer """
        self.embedding = Embedding(
            input_dim=vocabulary_size + 1,  # b.c. of padding value 0
            output_dim=embedding_size,
            mask_zero=False,
            name="shatt_word_embeddings")
        self.embedding_flatten_layer = Flatten(
            name="shatt_cell_embedding_flatten")
        """ recurrent layer """
        self.lstm = LSTMCell(hidden_size, name="shatt_internal_lstm")
        self.lstm_input_layer = Concatenate(name="shatt_cell_lstm_inputs")
        """ zero like layer to dynamically initialize the previous caption in the first time step based on the batch size"""
        self.zeros_layer = Lambda(
            lambda x: K.ones_like(x, dtype="float32") * start_encoding,
            name="shatt_cell_caption_initial")
        """ reshaping layer for the output, so that they can be concatenated easily """
        self.output_reshape_caption_layer = Reshape(
            target_shape=(1, 1), name="shatt_cell_outputs_caption_reshape")
        self.output_reshape_probs_layer = Reshape(
            target_shape=(1, vocabulary_size + 1),
            name="shatt_cell_outputs_probs_reshape")
        self.output_concatenate_layer = Concatenate(
            axis=1, name="shatt_cell_outputs_concatenate")
        """ reshaping layer for the output, so that they can be concatenated easily """
        self.output_attention_reshape_layer = Reshape(
            target_shape=(1, -1), name="shatt_cell_outputs_attention_reshape")
        self.output_attention_concatenate_layer = Concatenate(
            axis=1, name="shatt_cell_outputs_attention_concatenate")
        """ initalizer layers """
        self.input_features_reductor = ReduceMean(
            axis=1, name="average_image_features")
        self.generator_state_initializer = Dense(
            hidden_size, "tanh", name="initial_generator_state")
        self.generator_context_initializer = Dense(
            hidden_size, "tanh", name="initial_generator_context")