def _map_variables(inputter_fn, vars_fn): mapping, _ = vocab.get_mapping( inputter_fn(self).vocabulary_file, inputter_fn(new_model).vocabulary_file, ) vars_a, vocab_axes = vars_fn(self) vars_b, _ = vars_fn(new_model) for var_a, var_b, vocab_axis in zip(vars_a, vars_b, vocab_axes): if new_optimizer is not None and optimizer is not None: variables = vocab.update_variable_and_slots( var_a, var_b, optimizer, new_optimizer, mapping, vocab_axis=vocab_axis, ) else: variables = [ vocab.update_variable(var_a, var_b, mapping, vocab_axis=vocab_axis) ] updated_variables.extend(variables) return vars_b
def testVocabMappingReplace(self): old = self._saveVocab("old", ["1", "2", "3", "4"]) new = self._saveVocab("new", ["1", "6", "5", "3", "7"]) mapping, new_vocab = vocab_lib.get_mapping(old, new, "replace") self.assertEqual(5 + 1, len(mapping)) # new + <unk> self.assertAllEqual([0, -1, -1, 2, -1, 4], mapping) self.assertAllEqual(["1", "6", "5", "3", "7"], new_vocab.words)
def testVocabMappingMerge(self): old = self._saveVocab("old", ["1", "2", "3", "4"]) new = self._saveVocab("new", ["1", "6", "3", "5", "7"]) mapping, new_vocab = vocab_lib.get_mapping(old, new, "merge") self.assertEqual(4 + 5 - 2 + 1, len(mapping)) # old + new - common + <unk> self.assertAllEqual([0, 1, 2, 3, -1, -1, -1, 4], mapping) self.assertAllEqual(["1", "2", "3", "4", "6", "5", "7"], new_vocab.words)
def transfer_weights(self, new_model, new_optimizer=None, optimizer=None, ignore_weights=None): updated_variables = [] def _map_variable(mapping, var_a, var_b, axis=0): if new_optimizer is not None and optimizer is not None: variables = vocab.update_variable_and_slots( var_a, var_b, optimizer, new_optimizer, mapping, vocab_axis=axis, ) else: variables = [ vocab.update_variable(var_a, var_b, mapping, vocab_axis=axis) ] updated_variables.extend(variables) source_mapping, _ = vocab.get_mapping( self.features_inputter.vocabulary_file, new_model.features_inputter.vocabulary_file, ) target_mapping, _ = vocab.get_mapping( self.labels_inputter.vocabulary_file, new_model.labels_inputter.vocabulary_file, ) _map_variable( source_mapping, self.features_inputter.embedding, new_model.features_inputter.embedding, ) _map_variable( target_mapping, self.decoder.output_layer.bias, new_model.decoder.output_layer.bias, ) if not EmbeddingsSharingLevel.share_input_embeddings( self.share_embeddings): _map_variable( target_mapping, self.labels_inputter.embedding, new_model.labels_inputter.embedding, ) if not EmbeddingsSharingLevel.share_target_embeddings( self.share_embeddings): _map_variable( target_mapping, self.decoder.output_layer.kernel, new_model.decoder.output_layer.kernel, axis=1, ) return super().transfer_weights( new_model, new_optimizer=new_optimizer, optimizer=optimizer, ignore_weights=updated_variables, )
def transfer_weights( self, new_model: "SILTransformer", new_optimizer: Any = None, optimizer: Any = None, ignore_weights: Optional[List[tf.Variable]] = None, ): updated_variables = [] def _map_variable(mapping, var_a, var_b, axis=0): if new_optimizer is not None and optimizer is not None: variables = update_variable_and_slots( var_a, var_b, optimizer, new_optimizer, mapping, vocab_axis=axis, ) else: variables = [ update_variable(var_a, var_b, mapping, vocab_axis=axis) ] updated_variables.extend(variables) source_mapping, _ = get_mapping( self.features_inputter.vocabulary_file, new_model.features_inputter.vocabulary_file, ) target_mapping, _ = get_mapping( self.labels_inputter.vocabulary_file, new_model.labels_inputter.vocabulary_file, ) _map_variable( source_mapping, self.features_inputter.embedding, new_model.features_inputter.embedding, ) _map_variable( target_mapping, self.decoder.output_layer.bias, new_model.decoder.output_layer.bias, ) if not EmbeddingsSharingLevel.share_input_embeddings( self.share_embeddings): _map_variable( target_mapping, self.labels_inputter.embedding, new_model.labels_inputter.embedding, ) if not EmbeddingsSharingLevel.share_target_embeddings( self.share_embeddings): _map_variable( target_mapping, self.decoder.output_layer.kernel, new_model.decoder.output_layer.kernel, axis=1, ) return super(SequenceToSequence, self).transfer_weights( new_model, new_optimizer=new_optimizer, optimizer=optimizer, ignore_weights=updated_variables + (ignore_weights if ignore_weights is not None else []), )