class BatchNormalization(Template): @Template.init_name_scope def __init__(self, input_shape): ''' REFERENCE: Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift PARAMS: input_shape (list): shape of the input, do not need the batch dimension # To use this normalization, apply update ops below to update the mean and variance from tensorflow.python.framework import ops optimizer = tf.train.AdamOptimizer(learning_rate) update_ops = ops.get_collection(ops.GraphKeys.UPDATE_OPS) with ops.control_dependencies(update_ops): train_op = optimizer.minimize(train_cost_sb) ''' self.bn = TFBatchNorm() self.bn.build(input_shape=[None] + list(input_shape)) def _train_fprop(self, state_below): return self.bn.apply(state_below, training=True) def _test_fprop(self, state_below): return self.bn.apply(state_below, training=False)
def build_model(n_hidden=1, n_neurons=30, learning_rate=3e-3, input_shape=[2]): model = keras.models.Sequential() model.add(keras.layers.InputLayer(input_shape=input_shape)) for layer in range(n_hidden): model.add(BatchNormalization()) model.add(keras.layers.Dense(n_neurons, activation="relu")) model.add(keras.layers.Dense(1)) optimizer = tf.keras.optimizers.SGD(lr=learning_rate) model.compile(loss="mse", optimizer=optimizer) return model
def createModel(): image_size = IMAGE_SIZE image_input = Input(shape=(image_size, image_size, 3), name='input_layer') conv_1 = Conv2D(filters=64, kernel_size=(3, 3), use_bias=False)(image_input) conv_1_normalized = BatchNormalization()(conv_1) conv_1_activation = Activation('relu')(conv_1_normalized) conv_1_pooled = MaxPooling2D(padding='same')(conv_1_activation) conv_2 = Conv2D(filters=128, kernel_size=(3, 3), use_bias=False)(conv_1_pooled) conv_2_normalized = BatchNormalization()(conv_2) conv_2_activation = Activation('relu')(conv_2_normalized) conv_2_pooled = MaxPooling2D(padding='same')(conv_2_activation) conv_3 = Conv2D(filters=128, kernel_size=(3, 3), use_bias=False)(conv_2_pooled) conv_3_normalized = BatchNormalization()(conv_3) conv_3_activation = Activation('relu')(conv_3_normalized) conv_3_pooled = MaxPooling2D(padding='same')(conv_3_activation) conv_4 = Conv2D(filters=256, kernel_size=(3, 3), use_bias=False)(conv_3_pooled) conv_4_normalized = BatchNormalization()(conv_4) conv_4_activation = Activation('relu')(conv_4_normalized) conv_4_pooled = MaxPooling2D(padding='same')(conv_4_activation) conv_5 = Conv2D(filters=512, kernel_size=(3, 3), use_bias=False)(conv_4_pooled) conv_5_normalized = BatchNormalization()(conv_5) conv_5_activation = Activation('relu')(conv_5_normalized) conv_5_pooled = MaxPooling2D(padding='same')(conv_5_activation) conv_flattened = Flatten()(conv_5_pooled) dense_layer_1 = Dense(512, use_bias=False)(conv_flattened) dense_normalized = BatchNormalization()(dense_layer_1) dense_activation = Activation('relu')(dense_normalized) output = Dense(43, activation='softmax', name='output_layer')(dense_activation) model = tf.keras.Model(inputs=image_input, outputs=[output]) model.compile(optimizer=rmsprop(1e-3), loss={'output_layer': 'categorical_crossentropy'}, metrics=['accuracy']) model.summary() return model
def __init_var__(self, state_below): scope_ = tf.get_default_graph().get_name_scope() self.bn = TFBatchNorm( axis=self.axis, momentum=self.momentum, epsilon=self.epsilon, center=self.center, scale=self.scale, beta_initializer=self.beta_initializer, gamma_initializer=self.gamma_initializer, moving_mean_initializer=self.moving_mean_initializer, moving_variance_initializer=self.moving_variance_initializer, beta_regularizer=self.beta_regularizer, gamma_regularizer=self.gamma_regularizer, beta_constraint=self.beta_constraint, gamma_constraint=self.gamma_constraint, renorm=self.renorm, renorm_clipping=self.renorm_clipping, renorm_momentum=self.renorm_momentum, fused=self.fused, name=str(scope_)) input_shape = [int(dim) for dim in state_below.shape[1:]] self.bn.build(input_shape=[None] + list(input_shape))
def build(self, inputs_shape): if inputs_shape[1].value is None: raise ValueError( "Expected inputs.shape[-1] to be known, saw shape: %s" % inputs_shape) input_depth = inputs_shape[1].value h_depth = self._num_units if self._num_proj is None else self._num_proj maybe_partitioner = (partitioned_variables.fixed_size_partitioner( self._num_unit_shards) if self._num_unit_shards is not None else None) if self._normalize_in_to_hidden or self._normalize_cell: if self._normalize_config is None: self._normalize_config = { 'center': False, 'scale': True, 'gamma_initializer': init_ops.constant_initializer(0.1, dtype=self.dtype) } else: self._normalize_config['center'] = False if not self._normalize_in_to_hidden or self._normalize_in_together: self._kernel = self.add_variable( _WEIGHTS_VARIABLE_NAME, shape=[input_depth + h_depth, 4 * self._num_units], initializer=self._initializer, partitioner=maybe_partitioner) if self._normalize_in_to_hidden: self._bn = BatchNormalization(**self._normalize_config) else: self._kernel_m = self.add_variable( "i_scope/%s" % _WEIGHTS_VARIABLE_NAME, shape=[input_depth, 4 * self._num_units], initializer=self._initializer, partitioner=maybe_partitioner) with vs.variable_scope(None, "i_scope"): self._bn_i = BatchNormalization(**self._normalize_config) self._kernel_m = self.add_variable( "m_scope/%s" % _WEIGHTS_VARIABLE_NAME, shape=[h_depth, 4 * self._num_units], initializer=self._initializer, partitioner=maybe_partitioner) with vs.variable_scope(None, "m_scope"): self._bn_m = BatchNormalization(**self._normalize_config) self._bias = self.add_variable( _BIAS_VARIABLE_NAME, shape=[4 * self._num_units], initializer=init_ops.zeros_initializer(dtype=self.dtype)) if self._normalize_cell: self._normalize_config_cell = self._normalize_config self._normalize_config_cell['center'] = True self._bn_c = BatchNormalization(**self._normalize_config_cell) if self._use_peepholes: self._w_f_diag = self.add_variable("w_f_diag", shape=[self._num_units], initializer=self._initializer) self._w_i_diag = self.add_variable("w_i_diag", shape=[self._num_units], initializer=self._initializer) self._w_o_diag = self.add_variable("w_o_diag", shape=[self._num_units], initializer=self._initializer) if self._num_proj is not None: maybe_proj_partitioner = ( partitioned_variables.fixed_size_partitioner( self._num_proj_shards) if self._num_proj_shards is not None else None) self._proj_kernel = self.add_variable( "projection/%s" % _WEIGHTS_VARIABLE_NAME, shape=[self._num_units, self._num_proj], initializer=self._initializer, partitioner=maybe_proj_partitioner) self.built = True
class BatchNormalization(BaseLayer): @BaseLayer.init_name_scope def __init__(self, axis=-1, momentum=0.99, epsilon=1e-3, center=True, scale=True, beta_initializer=init_ops.zeros_initializer(), gamma_initializer=init_ops.ones_initializer(), moving_mean_initializer=init_ops.zeros_initializer(), moving_variance_initializer=init_ops.ones_initializer(), beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None, renorm=False, renorm_clipping=None, renorm_momentum=0.99, fused=None): ''' Reference: Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift http://arxiv.org/abs/1502.03167 Args: axis: Integer, the axis that should be normalized (typically the features axis). For instance, after a `Conv2D` layer with `data_format="channels_first"`, set `axis=1` in `BatchNormalization`. momentum: Momentum for the moving average. epsilon: Small float added to variance to avoid dividing by zero. center: If True, add offset of `beta` to normalized tensor. If False, `beta` is ignored. scale: If True, multiply by `gamma`. If False, `gamma` is not used. When the next layer is linear (also e.g. `nn.relu`), this can be disabled since the scaling can be done by the next layer. beta_initializer: Initializer for the beta weight. gamma_initializer: Initializer for the gamma weight. moving_mean_initializer: Initializer for the moving mean. moving_variance_initializer: Initializer for the moving variance. beta_regularizer: Optional regularizer for the beta weight. gamma_regularizer: Optional regularizer for the gamma weight. beta_constraint: An optional projection function to be applied to the `beta` weight after being updated by an `Optimizer` (e.g. used to implement norm constraints or value constraints for layer weights). The function must take as input the unprojected variable and must return the projected variable (which must have the same shape). Constraints are not safe to use when doing asynchronous distributed training. gamma_constraint: An optional projection function to be applied to the `gamma` weight after being updated by an `Optimizer`. renorm: Whether to use Batch Renormalization (https://arxiv.org/abs/1702.03275). This adds extra variables during training. The inference is the same for either value of this parameter. renorm_clipping: A dictionary that may map keys 'rmax', 'rmin', 'dmax' to scalar `Tensors` used to clip the renorm correction. The correction `(r, d)` is used as `corrected_value = normalized_value * r + d`, with `r` clipped to [rmin, rmax], and `d` to [-dmax, dmax]. Missing rmax, rmin, dmax are set to inf, 0, inf, respectively. renorm_momentum: Momentum used to update the moving means and standard deviations with renorm. Unlike `momentum`, this affects training and should be neither too small (which would add noise) nor too large (which would give stale estimates). Note that `momentum` is still applied to get the means and variances for inference. fused: if `True`, use a faster, fused implementation if possible. If `None`, use the system recommended implementation. Note: >>> # To use this normalization, apply update ops below to update the mean and variance >>> from tensorflow.python.framework import ops >>> optimizer = tf.train.AdamOptimizer(learning_rate) >>> update_ops = ops.get_collection(ops.GraphKeys.UPDATE_OPS) >>> with ops.control_dependencies(update_ops): >>> train_op = optimizer.minimize(train_cost_sb) ''' self.axis = axis self.momentum = momentum self.epsilon = epsilon self.center = center self.scale = scale self.beta_initializer = beta_initializer self.gamma_initializer = gamma_initializer self.moving_mean_initializer = moving_mean_initializer self.moving_variance_initializer = moving_variance_initializer self.beta_regularizer = beta_regularizer self.gamma_regularizer = gamma_regularizer self.beta_constraint = beta_constraint self.gamma_constraint = gamma_constraint self.renorm = renorm self.renorm_clipping = renorm_clipping self.renorm_momentum = renorm_momentum self.fused = fused @BaseLayer.init_name_scope def __init_var__(self, state_below): scope_ = tf.get_default_graph().get_name_scope() self.bn = TFBatchNorm( axis=self.axis, momentum=self.momentum, epsilon=self.epsilon, center=self.center, scale=self.scale, beta_initializer=self.beta_initializer, gamma_initializer=self.gamma_initializer, moving_mean_initializer=self.moving_mean_initializer, moving_variance_initializer=self.moving_variance_initializer, beta_regularizer=self.beta_regularizer, gamma_regularizer=self.gamma_regularizer, beta_constraint=self.beta_constraint, gamma_constraint=self.gamma_constraint, renorm=self.renorm, renorm_clipping=self.renorm_clipping, renorm_momentum=self.renorm_momentum, fused=self.fused, name=str(scope_)) input_shape = [int(dim) for dim in state_below.shape[1:]] self.bn.build(input_shape=[None] + list(input_shape)) def _train_fprop(self, state_below): return self.bn.apply(state_below, training=True) def _test_fprop(self, state_below): return self.bn.apply(state_below, training=False)
def main(): print("Loading samples and labels") samples, labels, _ = load_files("data") print("Loaded {} samples".format(samples.shape[0])) sequence_dim = 100 print("Converting to sequences of length {}".format(sequence_dim)) samples, labels = make_sequences(samples, labels, sequence_dim) print("Number of samples from sequences: {}".format(samples.shape[0])) lb = LabelBinarizer() labels = lb.fit_transform(labels) # flattened samples for Decision Tree flatSamples = samples.reshape(samples.shape[0], -1) #tree! (trainSamples, testSamples, trainLabels, testLabels) = train_test_split(flatSamples, labels, test_size=0.25, random_state=42) print("=" * 20) print("Building DecisionTree model") model = DecisionTreeClassifier() model.fit(trainSamples, trainLabels) treeResults = model.predict(testSamples) print( confusion_matrix(testLabels.argmax(axis=1), treeResults.argmax(axis=1))) print( classification_report(testLabels.argmax(axis=1), treeResults.argmax(axis=1))) treeAcc = accuracy_score(testLabels.argmax(axis=1), treeResults.argmax(axis=1)) print("Accuracy Tree: {:.2f}".format(treeAcc)) print("Cohen's Kappa {:.2f}".format( cohen_kappa_score(testLabels.argmax(axis=1), treeResults.argmax(axis=1)))) print("=" * 20) print("Building CNN model") (trainSamples, testSamples, trainLabels, testLabels) = train_test_split(samples, labels, test_size=0.25, random_state=42) inputShape = (samples.shape[1], samples.shape[2]) model = Sequential() model.add(Conv1D(32, 10, padding="same", input_shape=inputShape)) model.add(Activation("relu")) model.add(BatchNormalization()) model.add(Dropout(0.2)) model.add(Conv1D(64, 10, padding="same")) model.add(Activation("relu")) model.add(BatchNormalization()) model.add(Dropout(0.2)) model.add(Conv1D(128, 10, padding="same")) model.add(Activation("relu")) model.add(Dropout(0.2)) model.add(Flatten(input_shape=inputShape)) model.add(Dense(128, activation='sigmoid')) model.add(Dense(64, activation='sigmoid')) model.add(Dense(labels.shape[1], activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer="adam", metrics=['accuracy']) EPOCHS = 10 BATCH = 128 model.fit(trainSamples, trainLabels, batch_size=BATCH, epochs=EPOCHS, validation_data=(testSamples, testLabels)) cnnResults = model.predict(testSamples) print( confusion_matrix(testLabels.argmax(axis=1), cnnResults.argmax(axis=1))) print( classification_report(testLabels.argmax(axis=1), cnnResults.argmax(axis=1), target_names=lb.classes_)) print("CNN Accuracy: {:.2f}".format( accuracy_score(testLabels.argmax(axis=1), cnnResults.argmax(axis=1)))) print("Cohen's Kappa {:.2f}".format( cohen_kappa_score(testLabels.argmax(axis=1), cnnResults.argmax(axis=1)))) input("")
def batchnorm(state_below, input_shape): bn = TFBatchNorm() bn.build(input_shape=[None] + list(input_shape)) return bn.apply(state_below, training=True)
def __init__(self, caption_max_length, vocabulary_size, dropout_rate, start_encoding, image_features_dimensions, embedding_size=512, hidden_size=1024, alpha_reg=.005, use_max_sampler=True): """ arguments """ self.caption_max_length = caption_max_length self.vocabulary_size = vocabulary_size self.dropout_rate = dropout_rate self.embedding_size = embedding_size self.hidden_size = hidden_size self.image_features_dimensions = image_features_dimensions """ the input captions for teacher forcing during training (these are ignored in inference mode)""" self.input_captions = Input(name="input_captions", shape=(caption_max_length, 1)) self.input_image_normalize = BatchNormalization( momentum=.95, name="shatt_image_batch_normalize") """ initial image projection layer """ self.image_projector_shape = Reshape(target_shape=(embedding_size, )) self.image_projector = Dense(embedding_size, use_bias=False) """ attention layer (attend) """ self.state_projector = Dense(embedding_size, activation=None, name="shatt_image_projector") self.state_projector_add = Add(name="shatt_image_projection_sum") self.state_projector_activation = Activation( "relu", name="shatt_image_projection") spatial_kernel = Dense(1, activation=None, name="shatt_image_attention_kernel") self.spatial_reductor = TimeDistributed(spatial_kernel, name="shatt_image_reduction") self.spatial_flatten = Flatten(name="shatt_image_attention_flatten") self.spatial_attention = Activation("softmax", name="shatt_image_attention") self.spatial_attention_feature = Dot( axes=(1, 1), name="shatt_image_attention_feature") self.attention_regularizer = AlphaRegularization( alpha_reg, caption_max_length, image_features_dimensions, name="shatt_image_attention_regularizer") """ select layer (beta) """ self.image_context_attention = Dense( 1, activation="sigmoid", name="shatt_image_context_attention") self.image_context_attention_feature = Multiply( name="shatt_image_context_attention_feature") """ decode layer (tell) """ self.decode_state_dropout = Dropout(rate=dropout_rate, name="shatt_decode_state_dropout") self.decode_state_predictor = Dense( embedding_size, activation=None, name="shatt_decode_state_predictor") self.decode_attention_predictor = Dense( embedding_size, activation=None, use_bias=False, name="shatt_decode_attention_predictor") self.decode_combiner = Add(name="shatt_decode_caption_embedding") self.decode_caption_predictor = Dense( vocabulary_size + 1, "softmax", name="shatt_decode_caption_predictor") self.decode_caption_sampler = SamplingLayer( use_argmax=use_max_sampler, name="shatt_decode_caption_sampling") """ embedding layer """ self.embedding = Embedding( input_dim=vocabulary_size + 1, # b.c. of padding value 0 output_dim=embedding_size, mask_zero=False, name="shatt_word_embeddings") self.embedding_flatten_layer = Flatten( name="shatt_cell_embedding_flatten") """ recurrent layer """ self.lstm = LSTMCell(hidden_size, name="shatt_internal_lstm") self.lstm_input_layer = Concatenate(name="shatt_cell_lstm_inputs") """ zero like layer to dynamically initialize the previous caption in the first time step based on the batch size""" self.zeros_layer = Lambda( lambda x: K.ones_like(x, dtype="float32") * start_encoding, name="shatt_cell_caption_initial") """ reshaping layer for the output, so that they can be concatenated easily """ self.output_reshape_caption_layer = Reshape( target_shape=(1, 1), name="shatt_cell_outputs_caption_reshape") self.output_reshape_probs_layer = Reshape( target_shape=(1, vocabulary_size + 1), name="shatt_cell_outputs_probs_reshape") self.output_concatenate_layer = Concatenate( axis=1, name="shatt_cell_outputs_concatenate") """ reshaping layer for the output, so that they can be concatenated easily """ self.output_attention_reshape_layer = Reshape( target_shape=(1, -1), name="shatt_cell_outputs_attention_reshape") self.output_attention_concatenate_layer = Concatenate( axis=1, name="shatt_cell_outputs_attention_concatenate") """ initalizer layers """ self.input_features_reductor = ReduceMean( axis=1, name="average_image_features") self.generator_state_initializer = Dense( hidden_size, "tanh", name="initial_generator_state") self.generator_context_initializer = Dense( hidden_size, "tanh", name="initial_generator_context")