def build(self, input_shape): # responsible for trainable self.kernel weights super(_ConvBatchNorm2D, self).build(input_shape) # resposible for trainable gamma and beta weights self.batchnorm.build(self.compute_output_shape(input_shape)) if self.is_quantized: self._weight_min_var = self.add_variable( 'weight_min', initializer=initializers.Constant(-6.0), trainable=False) self._weight_max_var = self.add_variable( 'weight_max', initializer=initializers.Constant(6.0), trainable=False) self.optimizer_step = self.add_weight( 'optimizer_step', initializer=initializers.Constant(-1), dtype=dtypes.int32, trainable=False) self.post_activation = quantize_aware_activation.QuantizeAwareActivation( self.post_activation, self.activation_quantizer, self.optimizer_step, self)
def build(self, input_shape): assert len(input_shape) >= 2 self.input_dim = input_shape[-1] self.kernel = self.add_weight(shape=(self.input_dim, self.units), initializer=self.kernel_initializer, name='kernel', regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) self.sigma_kernel = self.add_weight( shape=(self.input_dim, self.units), initializer=initializers.Constant(value=self.sigma_init), name='sigma_kernel') if self.use_bias: self.bias = self.add_weight(shape=(self.units, ), initializer=self.bias_initializer, name='bias', regularizer=self.bias_regularizer, constraint=self.bias_constraint) self.sigma_bias = self.add_weight( shape=(self.units, ), initializer=initializers.Constant(value=self.sigma_init), name='sigma_bias') else: self.bias = None self.epsilon_bias = None # self.sample_noise() super(NoisyDense, self).build(input_shape)
def _build_for_quantization(self): """All Keras build() logic for quantization for fused layers.""" if not self.is_quantized: return self._weight_min_var = self.add_variable( # pylint: disable=protected-access 'weight_min', initializer=initializers.Constant(-6.0), trainable=False) self._weight_max_var = self.add_variable( # pylint: disable=protected-access 'weight_max', initializer=initializers.Constant(6.0), trainable=False) self.optimizer_step = self.add_weight( 'optimizer_step', initializer=initializers.Constant(-1), dtype=dtypes.int32, trainable=False) # TODO(alanchiao): re-explore if we can handle this with # QuantizeAwareActivation. self._activation_min_var = self.add_variable( # pylint: disable=protected-access 'activation_min', initializer=initializers.Constant(-6.0), trainable=False) self._activation_max_var = self.add_variable( # pylint: disable=protected-access 'activation_max', initializer=initializers.Constant(6.0), trainable=False)
def build(self, input_shape): super(QuantizeEmulateWrapper, self).build(input_shape) min_weights, max_weights = [], [] # For each of the quantizable_weights, construct the necessary variables. # TODO(alanchiao): when validated, add per-channel as parameter, which # affects shape and other factors. for weight in self.layer.get_quantizable_weights(): min_var = self.add_variable( 'weight_min', initializer=initializers.Constant(-6.0), trainable=False) max_var = self.add_variable( 'weight_max', initializer=initializers.Constant(6.0), trainable=False) self._unquantized_kernels.append(weight) min_weights.append(min_var) max_weights.append(max_var) # set_quantizable_weights on the wrapped layer removes unquantized_kernel # from _trainable_weights. We add it to the wrappers _trainable_weights # to ensure it gets gradient updates. self._trainable_weights.append(weight) self._weight_vars = list( zip(self._unquantized_kernels, min_weights, max_weights)) self._min_activation = self.add_variable( 'activation_min', initializer=initializers.Constant(-6.0), trainable=False) self._max_activation = self.add_variable( 'activation_max', initializer=initializers.Constant(6.0), trainable=False)
def _add_range_weights(self, layer, name): min_weight = layer.add_weight(name + '_min', initializer=initializers.Constant(-6.0), trainable=False) max_weight = layer.add_weight(name + '_max', initializer=initializers.Constant(6.0), trainable=False) return [min_weight, max_weight]
def _add_range_weights(self, name): min_var = self.quantize_wrapper.add_weight( name + '_min', initializer=initializers.Constant(-6.0), trainable=False) max_var = self.quantize_wrapper.add_weight( name + '_max', initializer=initializers.Constant(6.0), trainable=False) return min_var, max_var
def build(self, tensor_shape, name, layer): min_weight = layer.add_weight( name + '_min', shape=(tensor_shape[-1],), initializer=initializers.Constant(-6.0), trainable=False) max_weight = layer.add_weight( name + '_max', shape=(tensor_shape[-1],), initializer=initializers.Constant(6.0), trainable=False) return [min_weight, max_weight]
def test_forward_works_with_mask(numpy_crf): logits = np.array([ [[0, 0, .5, .5, .2], [0, 0, .3, .3, .1], [0, 0, .9, 10, 1]], [[0, 0, .2, .5, .2], [0, 0, 3, .3, .1], [0, 0, .9, 1, 1]], ]) transitions = np.array([ [0.1, 0.2, 0.3, 0.4, 0.5], [0.8, 0.3, 0.1, 0.7, 0.9], [-0.3, 2.1, -5.6, 3.4, 4.0], [0.2, 0.4, 0.6, -0.3, -0.4], [1.0, 1.0, 1.0, 1.0, 1.0] ]) boundary_transitions = np.array([0.1, 0.2, 0.3, 0.4, 0.6]) tags = np.array([ [2, 3, 4], [3, 2, 2] ]) # Use the CRF Module with fixed transitions to compute the log_likelihood crf = CRF( units=5, use_kernel=False, # disable kernel transform chain_initializer=initializers.Constant(transitions), use_boundary=True, boundary_initializer=initializers.Constant(boundary_transitions), name="crf_layer" ) # Use a non-trivial mask mask = np.array([ [1, 1, 1], [1, 1, 0] ]) crf_loss_instance = ConditionalRandomFieldLoss() model = Sequential() model.add(layers.Input(shape=(3, 5))) model.add(MockMasking(mask_shape=(2, 3), mask_value=mask)) model.add(crf) model.compile('adam', loss={"crf_layer": crf_loss_instance}) result = model.train_on_batch(logits, tags) numpy_crf_instance = numpy_crf(logits, mask, transitions, boundary_transitions, boundary_transitions) expected = numpy_crf_instance.compute_log_likehood(tags) / -2 assert result == approx(expected)
def _strip_clustering_wrapper(layer): if isinstance(layer, cluster_wrapper.ClusterWeights): if not hasattr(layer.layer, '_batch_input_shape') and\ hasattr(layer, '_batch_input_shape'): layer.layer._batch_input_shape = layer._batch_input_shape # We reset both arrays of weights, so that we can guarantee the correct # order of newly created weights layer.layer._trainable_weights = [] layer.layer._non_trainable_weights = [] for i in range(len(layer.restore)): # This is why we used integers as keys name, weight = layer.restore[i] # In both cases we use k.batch_get_value since we need physical copies # of the arrays to initialize a new tensor if i in layer.gone_variables: # If the variable was removed because it was clustered, we restore it # by using updater we created earlier new_weight_value = k.batch_get_value([weight()])[0] else: # If the value was not clustered(e.g. bias), we still store a valid # reference to the tensor. We use this reference to get the value new_weight_value = k.batch_get_value([weight])[0] layer.layer.add_weight( name=name, shape=new_weight_value.shape, initializer=initializers.Constant(new_weight_value), trainable=True) # When all weights are filled with the values, just return the underlying # layer since it is now fully autonomous from its wrapper return layer.layer return layer
def build(self, input_shape): super(QuantizeWrapper, self).build(input_shape) self.optimizer_step = self.add_weight( 'optimizer_step', initializer=initializers.Constant(-1), dtype=dtypes.int32, trainable=False) self._weight_vars = [] for weight, quantizer in \ self.quantize_provider.get_weights_and_quantizers(self.layer): min_var, max_var = quantizer.build(weight.shape, self._weight_name(weight.name), self) self._weight_vars.append((weight, quantizer, min_var, max_var)) # Needed to ensure unquantized weights get trained as part of the wrapper. self._trainable_weights.append(weight) self._quantize_activations = [] for activation, quantizer in \ self.quantize_provider.get_activations_and_quantizers(self.layer): quantize_activation = quantize_aware_activation.QuantizeAwareActivation( activation, quantizer, self.optimizer_step, self) self._quantize_activations.append(quantize_activation) self._output_quantizers = self.quantize_provider.get_output_quantizers( self.layer) if self._output_quantizers: self._output_min_max = self._output_quantizers[0].build( self.layer.compute_output_shape(input_shape), 'output', self)
def __init__(self, bias=-3, **kwargs): super(HighwayNetStep, self).__init__(**kwargs) self.bias = initializers.Constant(value=bias) self.multiply1 = Multiply() self.multiply2 = Multiply() self.add = Add()
def _train_CNN_Glove(self, X_train, y_train, epochs=5, batch_size=64, learning_rate=0.001, regularization=0.01): """ Trains CNN - X_train: Input sequence - y_train: Target sequence - epochs - batch_size - learning_rate = Adam optimizer's learning rate - reg: Regularization Returns : - history: Scalar loss """ flatten_y = [category for sublist in y_train for category in sublist] class_weights = class_weight.compute_class_weight( 'balanced', np.unique(flatten_y), flatten_y) optim = tf.keras.optimizers.Adam(learning_rate=learning_rate) embedding_matrix = self.create_embedding_matrix() model = models.Sequential() model.add( Embedding( input_dim=self.max_word_count, output_dim=100, embeddings_initializer=initializers.Constant(embedding_matrix), input_length=self.max_sequence_len, trainable=False)) model.add( Conv1D(filters=300, kernel_size=3, padding='valid', activation='relu', strides=1)) model.add(GlobalMaxPool1D()) model.add(Dense(8, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer=optim, metrics=[BinaryAccuracy()]) history = model.fit(X_train, y_train, class_weight=class_weight, epochs=epochs, batch_size=batch_size, validation_split=0.25, verbose=self.verbose, callbacks=[ EarlyStopping(monitor='val_loss', patience=3, min_delta=0.0001) ]) self.model = model self.history = history.history
def test_viterbi_tags(numpy_crf): logits = np.array([ [[0, 0, .5, .5, .2], [0, 0, .3, .3, .1], [0, 0, .9, 10, 1]], [[0, 0, .2, .5, .2], [0, 0, 3, .3, .1], [0, 0, .9, 1, 1]], ]) transitions = np.array([ [0.1, 0.2, 0.3, 0.4, 0.5], [0.8, 0.3, 0.1, 0.7, 0.9], [-0.3, 2.1, -5.6, 3.4, 4.0], [0.2, 0.4, 0.6, -0.3, -0.4], [1.0, 1.0, 1.0, 1.0, 1.0] ]) boundary_transitions = np.array([0.1, 0.2, 0.3, 0.4, 0.6]) # Use the CRF Module with fixed transitions to compute the log_likelihood crf = CRF( units=5, use_kernel=False, # disable kernel transform chain_initializer=initializers.Constant(transitions), use_boundary=True, boundary_initializer=initializers.Constant(boundary_transitions), name="crf_layer" ) mask = np.array([ [1, 1, 1], [1, 1, 0] ]) crf_loss_instance = ConditionalRandomFieldLoss() model = Sequential() model.add(layers.Input(shape=(3, 5))) model.add(MockMasking(mask_shape=(2, 3), mask_value=mask)) model.add(crf) model.compile('adam', loss={"crf_layer": crf_loss_instance}) # Separate the tags and scores. result = model.predict(logits) numpy_crf_instance = numpy_crf(logits, mask, transitions, boundary_transitions, boundary_transitions) expected, _ = numpy_crf_instance.decode() np.testing.assert_equal(result, expected)
def setUp(self): super().setUp() self.logits = np.array([ [[0, 0, .5, .5, .2], [0, 0, .3, .3, .1], [0, 0, .9, 10, 1]], [[0, 0, .2, .5, .2], [0, 0, 3, .3, .1], [0, 0, .9, 1, 1]], ]) self.tags = np.array([ [2, 3, 4], [3, 2, 2] ]) self.transitions = np.array([ [0.1, 0.2, 0.3, 0.4, 0.5], [0.8, 0.3, 0.1, 0.7, 0.9], [-0.3, 2.1, -5.6, 3.4, 4.0], [0.2, 0.4, 0.6, -0.3, -0.4], [1.0, 1.0, 1.0, 1.0, 1.0] ]) self.transitions_from_start = np.array([0.1, 0.2, 0.3, 0.4, 0.6]) self.transitions_to_end = np.array([-0.1, -0.2, 0.3, -0.4, -0.4]) # Use the CRF Module with fixed transitions to compute the log_likelihood self.crf = CRF( units=5, use_kernel=False, # disable kernel transform chain_initializer=initializers.Constant(self.transitions), use_boundary=True, # left_boundary_initializer=initializers.Constant(self.transitions_from_start), # right_boundary_initializer=initializers.Constant(self.transitions_to_end), name="crf_layer" ) self.crf.left_boundary = self.crf.add_weight( shape=(self.crf.units,), name="left_boundary", initializer=initializers.Constant(self.transitions_from_start), ) self.crf.right_boundary = self.crf.add_weight( shape=(self.crf.units,), name="right_boundary", initializer=initializers.Constant(self.transitions_to_end), )
def build(self, input_shape): batch_size, input_dim, input_atoms = input_shape self.kernel = self.add_weight(name="kernel", initializer=initializers.VarianceScaling(scale=0.1), shape=[*self.kernel_size, input_atoms, self.output_dim * self.output_atoms]) self.bias = self.add_weight(name="bias", initializer=initializers.Constant(value=0.1), shape=[self.output_dim, self.output_atoms]) self.input_dim = input_dim self.input_atoms = input_atoms
def build(self, input_shape): if self._requires_pre_quant(): self._min_pre_activation = self.add_variable( 'min_pre_activation', initializer=initializers.Constant(-6.0), trainable=False) self._max_pre_activation = self.add_variable( 'max_pre_activation', initializer=initializers.Constant(6.0), trainable=False) self._min_post_activation = self.add_variable( 'min_post_activation', initializer=initializers.Constant(-6.0), trainable=False) self._max_post_activation = self.add_variable( 'max_post_activation', initializer=initializers.Constant(6.0), trainable=False)
def build(self, input_shape): input_shape = tuple(tf.TensorShape(input_shape).as_list()) self.input_spec = [tf.keras.layers.InputSpec(shape=input_shape)] self.input_dim = input_shape[-1] self.mask = self.add_weight(shape=self.mask_shape, name='transition_constraint_mask', initializer=initializers.Constant( self.mask_value), trainable=False) # or directly call self.built = True super(MockMasking, self).build(input_shape)
def __init__(self, rank, filters, kernel_size, strides=1, padding='valid', data_format=None, dilation_rate=1, activation=None, is_mc=True, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, trainable=True, name=None, **kwargs): super(VWNConv, self).__init__( trainable=trainable, name=name, activity_regularizer=regularizers.get(activity_regularizer), **kwargs) self.rank = rank self.filters = filters self.kernel_size = conv_utils.normalize_tuple(kernel_size, rank, 'kernel_size') self.strides = conv_utils.normalize_tuple(strides, rank, 'strides') self.padding = conv_utils.normalize_padding(padding) if (self.padding == 'causal' and not isinstance(self, (Conv1D, SeparableConv1D))): raise ValueError('Causal padding is only supported for `Conv1D`' 'and ``SeparableConv1D`.') self.data_format = conv_utils.normalize_data_format(data_format) self.dilation_rate = conv_utils.normalize_tuple( dilation_rate, rank, 'dilation_rate') self.is_mc = tf.cast(is_mc, dtype=tf.bool) self.activation = activations.get(activation) self.use_bias = use_bias self.kernel_initializer = initializers.get(kernel_initializer) self.bias_initializer = initializers.get(bias_initializer) self.kernel_regularizer = regularizers.get(kernel_regularizer) self.bias_regularizer = regularizers.get(bias_regularizer) self.kernel_constraint = constraints.get(kernel_constraint) self.bias_constraint = constraints.get(bias_constraint) self.input_spec = InputSpec(ndim=self.rank + 2) self.a_initializer = initializers.Constant( 1e-04) # ADDED (what is a) (use keras initializers??)
def build(self, input_shape): assert len(input_shape) == 3 d_model = int(input_shape[-1]) self.act_weights['halting_kernel'] = self.add_weight( name='halting_kernel', shape=(d_model, 1), initializer='glorot_uniform', trainable=True) self.act_weights['halting_biases'] = self.add_weight( name='halting_biases', shape=(1, ), initializer=initializers.Constant(0.1), trainable=True) self.act_weights['time_penalty_t'] = K.constant(self.time_penalty, dtype=K.floatx()) return super().build(input_shape)
def build(self, input_shape): super(PruneLowMagnitude, self).build(input_shape) weight_vars, mask_vars, threshold_vars = [], [], [] self.prunable_weights = self.layer.get_prunable_weights() # For each of the prunable weights, add mask and threshold variables for weight in self.prunable_weights: mask = self.add_variable( 'mask', shape=weight.shape, initializer=initializers.get('ones'), dtype=weight.dtype, trainable=False, aggregation=tf_variables.VariableAggregation.MEAN) threshold = self.add_variable( 'threshold', shape=[], initializer=initializers.get('zeros'), dtype=weight.dtype, trainable=False, aggregation=tf_variables.VariableAggregation.MEAN) weight_vars.append(weight) mask_vars.append(mask) threshold_vars.append(threshold) self.pruning_vars = list(zip(weight_vars, mask_vars, threshold_vars)) # Add a scalar tracking the number of updates to the wrapped layer. self.pruning_step = self.add_variable( 'pruning_step', shape=[], initializer=initializers.Constant(-1), dtype=dtypes.int64, trainable=False) def training_step_fn(): return self.pruning_step # Create a pruning object self.pruning_obj = pruning_impl.Pruning( training_step_fn=training_step_fn, pruning_vars=self.pruning_vars, pruning_schedule=self.pruning_schedule, block_size=self.block_size, block_pooling_type=self.block_pooling_type)
def build(self, input_shape): assert len(input_shape) == 3 _, sequence_length, d_model = input_shape if not isinstance(d_model, int): d_model = d_model.value self.halting_kernel = self.add_weight( name='halting_kernel', shape=(d_model, 1), initializer='glorot_uniform', trainable=True) self.halting_biases = self.add_weight( name='halting_biases', shape=(1,), initializer=initializers.Constant(0.1), trainable=True) self.time_penalty_t = K.constant(self.time_penalty, dtype=K.floatx()) return super().build(input_shape)
def test_unmasked_constrained_viterbi_tags(self): # TODO: using BILUO tag scheme instead of BIO. # So that, transition from tags to end can be tested. raw_constraints = np.array([ # O B-X I-X B-Y I-Y start end [ 1, 1, 0, 1, 0, 0, 1], # O [ 1, 1, 1, 1, 0, 0, 1], # B-X [ 1, 1, 1, 1, 0, 0, 1], # I-X [ 1, 1, 0, 1, 1, 0, 1], # B-Y [ 1, 1, 0, 1, 1, 0, 1], # I-Y [ 1, 1, 0, 1, 0, 0, 0], # start [ 0, 0, 0, 0, 0, 0, 0], # end ]) constraints = np.argwhere(raw_constraints > 0).tolist() # transitions = np.array([ # # O B-X I-X B-Y I-Y # [ 0.1, 0.2, 0.3, 0.4, 0.5], # O # [ 0.8, 0.3, 0.1, 0.7, 0.9], # B-X # [ -0.3, 2.1, -5.6, 3.4, 4.0], # I-X # [ 0.2, 0.4, 0.6, -0.3, -0.4], # B-Y # [ 1.0, 1.0, 1.0, 1.0, 1.0] # I-Y # ]) transitions = np.ones([5, 5]) # transitions_from_start = np.array( # # O B-X I-X B-Y I-Y # [ 0.1, 0.2, 0.3, 0.4, 0.6] # start # ) transitions_from_start = np.ones(5) # transitions_to_end = np.array( # [ # # end # -0.1, # O # -0.2, # B-X # 0.3, # I-X # -0.4, # B-Y # -0.4 # I-Y # ] # ) transitions_to_end = np.ones(5) logits = np.array([ [ # constraint transition from start to tags # O B-X I-X B-Y I-Y [ 0., .1, 1., 0., 0.], [ 0., 0., 1., 0., 0.], [ 0., 0., 1., 0., 0.] ], [ # constraint transition from tags to tags # O B-X I-X B-Y I-Y [ 0., 1., 0., 0., 0.], [ 0., 0., .1, 1., 0.], [ 0., 0., 1., 0., 0.] ] ]) crf = CRF( units=5, use_kernel=False, # disable kernel transform chain_initializer=initializers.Constant(transitions), use_boundary=True, # left_boundary_initializer=initializers.Constant(transitions_from_start), # right_boundary_initializer=initializers.Constant(transitions_to_end), transition_constraint=constraints, name="crf_layer" ) crf.left_boundary = crf.add_weight( shape=(5,), name="left_boundary", initializer=initializers.Constant(self.transitions_from_start), ) crf.right_boundary = crf.add_weight( shape=(5,), name="right_boundary", initializer=initializers.Constant(self.transitions_to_end), ) crf_loss_instance = ConditionalRandomFieldLoss() model = Sequential() model.add(layers.Input(shape=(3, 5))) model.add(crf) model.compile('adam', loss={"crf_layer": crf_loss_instance}) for layer in model.layers: print(layer.get_config()) print(dict(zip(layer.weights, layer.get_weights()))) # Get just the tags from each tuple of (tags, score). viterbi_tags = model.predict(logits) # Now the tags should respect the constraints expected_tags = [ [1, 2, 2], # B-X I-X I-X [1, 2, 2] # B-X I-X I-X ] # if constrain not work it should be: # [ # [2, 4, 3], # [2, 3, 0] # ] # test assert np.testing.assert_equal(viterbi_tags, expected_tags)
def test_masked_viterbi_decode(): transitions = np.ones([5, 5]) transitions_from_start = np.ones(5) transitions_to_end = np.ones(5) logits = np.array([ [ # O B-X I-X B-Y I-Y [ 0., 1., 0., 0., 0.], [ 0., 0., 1., 0., 0.], [ 0., 0., 1., 0., 0.] ], [ # O B-X I-X B-Y I-Y [ 0., 1., 0., 0., 0.], [ 0., 1., 0., 0., 0.], [ 0., 1., 0., 0., 0.] ] ]) # TODO: this test case is right padding mask only # due to the underline crf function only support sequence length mask = np.array([ [1, 1, 0], [1, 1, 0] ]) crf = CRF( units=5, use_kernel=False, # disable kernel transform chain_initializer=initializers.Constant(transitions), use_boundary=True, # left_boundary_initializer=initializers.Constant(transitions_from_start), # right_boundary_initializer=initializers.Constant(transitions_to_end), name="crf_layer" ) crf_loss_instance = ConditionalRandomFieldLoss() model = Sequential() model.add(layers.Input(shape=(3, 5))) model.add(MockMasking(mask_shape=(2, 3), mask_value=mask)) model.add(crf) model.compile('adam', loss={"crf_layer": crf_loss_instance}) # for layer in model.layers: # print(layer.get_config()) # print(dict(zip(layer.weights, layer.get_weights()))) # Get just the tags from each tuple of (tags, score). result = model.predict(logits) # Now the tags should respect the constraints expected = [ [1, 2, 0], # B-X I-X NA [1, 1, 0] # B-X B-X NA ] # if constrain not work it should be: # [ # [2, 4, 3], # [2, 3, 0] # ] # test assert np.testing.assert_equal(result, expected)
def test_constrained_viterbi_tags(self): constraints = {(0, 0), (0, 1), (1, 1), (1, 2), (2, 2), (2, 3), (3, 3), (3, 4), (4, 4), (4, 0)} # Add the transitions to the end tag # and from the start tag. for i in range(5): constraints.add((5, i)) constraints.add((i, 6)) mask = np.array([ [1, 1, 1], [1, 1, 0] ]) crf = CRF( units=5, use_kernel=False, # disable kernel transform chain_initializer=initializers.Constant(self.transitions), use_boundary=True, # left_boundary_initializer=initializers.Constant(self.transitions_from_start), # right_boundary_initializer=initializers.Constant(self.transitions_to_end), transition_constraint=constraints, name="crf_layer" ) crf.left_boundary = crf.add_weight( shape=(5,), name="left_boundary", initializer=initializers.Constant(self.transitions_from_start), ) crf.right_boundary = crf.add_weight( shape=(5,), name="right_boundary", initializer=initializers.Constant(self.transitions_to_end), ) crf_loss_instance = ConditionalRandomFieldLoss() model = Sequential() model.add(layers.Input(shape=(3, 5))) model.add(MockMasking(mask_shape=(2, 3), mask_value=mask)) model.add(crf) model.compile('adam', loss={"crf_layer": crf_loss_instance}) for layer in model.layers: print(layer.get_config()) print(dict(zip(layer.weights, layer.get_weights()))) # Get just the tags from each tuple of (tags, score). viterbi_tags = model.predict(self.logits) # Now the tags should respect the constraints expected_tags = [ [2, 3, 3], [2, 3, 0] ] # if constrain not work it should be: # [ # [2, 4, 3], # [2, 3, 0] # ] # test assert np.testing.assert_equal(viterbi_tags, expected_tags)
def build(self, input_shape): super(ClusterWeights, self).build(input_shape) clusterable_weights = self.layer.get_clusterable_weights() # Map automatically assigned TF variable name (e.g. 'dense/kernel:0') to provided human readable name # (e.g. as in Dense(10).kernel) clusterable_weights_to_variables = {} for weight_name, weight in clusterable_weights: # If a variable appears in this loop, then it is going to be removed from self._trainable_weights. # We need to memorise what variables are going away so that later we are able to restore them. We have to do # this to maintain the original order of the weights in the underlying layer. Incorrect order results in the # incorrect OPs weights configurations. # We can be sure that weight will be found in this array since the variable is either in the # self._trainable_weights # or in self._non_trainable_weights and self.weights is the result of concatenation of those arrays original_index = self.layer.weights.index(weight) self.gone_variables.append(original_index) # Again, not sure if this is needed. Leaving for now. clusterable_weights_to_variables[self._weight_name( weight.name)] = weight_name # Build initial cluster centroids for a given tensor. Factory returns a class and we init an object immediately centroid_initializer = clustering_centroids.CentroidsInitializerFactory.get_centroid_initializer( self.cluster_centroids_init)(weight, self.number_of_clusters) cluster_centroids = centroid_initializer.get_cluster_centroids() # Use k.batch_get_value since we need to initialize the variables with an initial value taken from a Tensor object # For each weight there is a different set of cluster centroids self.cluster_centroids_tf[weight_name] = self.add_weight( 'cluster_centroids_tf', shape=(self.number_of_clusters, ), dtype=weight.dtype, trainable=True, initializer=initializers.Constant( value=k.batch_get_value([cluster_centroids])[0])) # There are vectorised implementations of look-ups, we use a new one for different number of dimensions. clustering_impl_cls = clustering_registry.ClusteringLookupRegistry( ).get_clustering_impl(self.layer, weight_name) self.clustering_impl[weight_name] = clustering_impl_cls( self.cluster_centroids_tf[weight_name]) # We find the nearest cluster centroids and store them so that ops can build their weights upon it # These indices are calculated once and stored forever. We use to make look-ups from self.cluster_centroids_tf pulling_indices = self.clustering_impl[ weight_name].get_pulling_indices(weight) self.pulling_indices_tf[weight_name] = self.add_weight( 'pulling_indices_tf', shape=pulling_indices.shape, dtype=tf.int32, trainable=False, initializer=initializers.Constant( value=k.batch_get_value([pulling_indices])[0])) # We store these pairs to easily update this variables later on self.clustered_vars.append((weight_name, weight)) # We use currying here to get an updater which can be triggered at any time in future and it would return # the latest version of clustered weights def get_updater(for_weight_name): def fn(): return self.clustering_impl[ for_weight_name].get_clustered_weight( self.pulling_indices_tf[for_weight_name]) return fn # This will allow us to restore the order of weights later # This loop stores pairs of weight names and how to restore them for ct, weight in enumerate(self.layer.weights): name = self._weight_name(weight.name) if ct in self.gone_variables: # Again, not sure if this is needed weight_name = clusterable_weights_to_variables[name] self.restore.append((name, get_updater(weight_name))) else: self.restore.append((name, weight))