コード例 #1
0
 def test_embeddings_extraction_layer_custom_position_embeddings(self):
     layer = EmbeddingsExtractionLayer(
         EmbeddingsConfig(
             entities_count=3,
             relations_count=2,
             embeddings_dimension=4,
             use_special_token_embeddings=True,
             use_position_embeddings=True,
         ))
     inputs = {
         "object_ids":
         tf.constant([[1, 0], [2, 0]], dtype=tf.int32),
         "object_types":
         tf.constant(
             [[ObjectType.RELATION.value, ObjectType.ENTITY.value],
              [ObjectType.ENTITY.value, ObjectType.SPECIAL_TOKEN.value]],
             dtype=tf.int32),
         "positions":
         tf.constant([[0, 2], [1, 0]], dtype=tf.int32),
     }
     outputs = layer(inputs)
     position_embeddings = layer.position_embeddings_layer.position_embeddings
     self.assertAllEqual(
         layer.relation_embeddings[1] + position_embeddings[0], outputs[0,
                                                                        0])
     self.assertAllEqual(
         layer.entity_embeddings[0] + position_embeddings[2], outputs[0, 1])
     self.assertAllEqual(
         layer.entity_embeddings[2] + position_embeddings[1], outputs[1, 0])
     self.assertAllEqual(
         layer.special_token_embeddings[0] + position_embeddings[0],
         outputs[1, 1])
コード例 #2
0
def _create_embeddings_config(
    entity_embeddings_path=gin.REQUIRED,
    relation_embeddings_path=gin.REQUIRED,
    position_embeddings_path=gin.REQUIRED,
    special_token_embeddings_path=gin.REQUIRED,
):
    dataset = _create_inference_dataset(DatasetType.TRAINING,
                                        shuffle_dataset=False,
                                        batch_size=1,
                                        prefetched_samples=10,
                                        repeat_dataset=False)
    pretrained_entity_embeddings = (np.load(entity_embeddings_path)
                                    if entity_embeddings_path is not None else
                                    None)
    pretrained_relation_embeddings = (np.load(relation_embeddings_path)
                                      if relation_embeddings_path is not None
                                      else None)
    pretrained_position_embeddings = (np.load(position_embeddings_path)
                                      if position_embeddings_path is not None
                                      else None)
    pretrained_special_token_embeddings = (
        np.load(special_token_embeddings_path)
        if special_token_embeddings_path is not None else None)
    return EmbeddingsConfig(
        entities_count=dataset.entities_count,
        relations_count=dataset.relations_count,
        pretrained_entity_embeddings=pretrained_entity_embeddings,
        pretrained_relation_embeddings=pretrained_relation_embeddings,
        pretrained_position_embeddings=pretrained_position_embeddings,
        pretrained_special_token_embeddings=pretrained_special_token_embeddings,
    )
コード例 #3
0
 def test_embeddings_extraction_layer_non_trainable(self):
     layer = EmbeddingsExtractionLayer(
         EmbeddingsConfig(entities_count=3,
                          relations_count=2,
                          embeddings_dimension=4,
                          trainable_embeddings=False))
     self.assertFalse(layer.entity_embeddings.trainable)
     self.assertFalse(layer.relation_embeddings.trainable)
コード例 #4
0
 def test_embeddings_extraction_layer_position_embeddings_trainable(self):
     layer = EmbeddingsExtractionLayer(
         EmbeddingsConfig(entities_count=3,
                          relations_count=2,
                          embeddings_dimension=4,
                          use_position_embeddings=True,
                          position_embeddings_trainable=True))
     self.assertTrue(layer.position_embeddings_layer.trainable)
コード例 #5
0
 def test_embeddings_extraction_layer_special_token_embeddings_not_used(
         self):
     layer = EmbeddingsExtractionLayer(
         EmbeddingsConfig(entities_count=3,
                          relations_count=2,
                          embeddings_dimension=4,
                          use_special_token_embeddings=False))
     self.assertEqual((0, 4), layer.special_token_embeddings.shape)
コード例 #6
0
 def test_embeddings_extraction_layer_pretrained_relation_embeddings(self):
     initial_values = np.array([[2.0, 3.0, 1.0], [3.0, 4.0, 5.0]])
     layer = EmbeddingsExtractionLayer(
         EmbeddingsConfig(entities_count=3,
                          relations_count=2,
                          embeddings_dimension=4,
                          pretrained_relation_embeddings=initial_values))
     self.assertAllEqual(initial_values, layer.relation_embeddings)
コード例 #7
0
 def setUp(self):
     gin.enter_interactive_mode()
     gin.parse_config("""
         InputNeighboursDataset.dataset_id = "input_neighbours"
         InputNeighboursDataset.max_neighbours_count = 1
         InputNeighboursDataset.mask_source_entity_pbty = 0.5
         OutputNeighboursDataset.dataset_id = "output_neighbours"
         OutputNeighboursDataset.max_neighbours_count = 2
         OutputNeighboursDataset.mask_source_entity_pbty = 0.25
         
         StackedTransformerEncodersLayer.layers_count = 3
         StackedTransformerEncodersLayer.attention_heads_count = 4
         StackedTransformerEncodersLayer.attention_head_dimension = 5
         StackedTransformerEncodersLayer.pointwise_hidden_layer_dimension = 4
         StackedTransformerEncodersLayer.dropout_rate = 0.5
         StackedTransformerEncodersLayer.share_encoder_parameters = False
         StackedTransformerEncodersLayer.share_encoder_parameters = False
         StackedTransformerEncodersLayer.encoder_layer_type = %TransformerEncoderLayerType.PRE_LAYER_NORM
     """)
     template1 = InputNeighboursDataset
     template2 = OutputNeighboursDataset
     independent_losses_dataset = CombinedMaskedDataset(
         dataset_templates=[template1, template2],
         dataset_type=DatasetType.TRAINING,
         data_directory=self.DATASET_PATH,
         shuffle_dataset=True,
         batch_size=4,
         inference_mode=False,
         dataset_id="independent_losses_dataset",
         training_mode=CombinedMaskedDatasetTrainingMode.INDEPENDENT_LOSSES,
     )
     self.independent_losses_samples = next(
         iter(independent_losses_dataset.samples))
     joint_loss_dataset = CombinedMaskedDataset(
         dataset_templates=[template1, template2],
         dataset_type=DatasetType.TRAINING,
         data_directory=self.DATASET_PATH,
         shuffle_dataset=True,
         batch_size=4,
         inference_mode=False,
         dataset_id="joint_loss_dataset",
         training_mode=CombinedMaskedDatasetTrainingMode.JOINT_LOSS,
     )
     self.joint_loss_samples = next(iter(joint_loss_dataset.samples))
     embeddings_config = EmbeddingsConfig(
         entities_count=3,
         relations_count=2,
         embeddings_dimension=6,
         use_special_token_embeddings=True,
     )
     transformer_config = TransformerSoftmaxModelConfig(
         use_pre_normalization=True, pre_dropout_rate=0.5)
     self.submodel1 = TransformerSoftmaxModel(embeddings_config,
                                              transformer_config)
     self.submodel2 = TransformerSoftmaxModel(embeddings_config,
                                              transformer_config)
     self.default_model_config = HierarchicalTransformerModelConfig(
         dropout_rate=0.5, layers_count=2)
コード例 #8
0
 def test_using_edge_only_in_reduction_layer(self):
     embeddings_config = EmbeddingsConfig(entities_count=3,
                                          relations_count=2,
                                          embeddings_dimension=4)
     model = TransformerBinaryModel(embeddings_config,
                                    use_only_edge_in_reduction_layer=True)
     outputs = model(self.inputs)
     self.assertAllEqual((2, 1), outputs.shape)
     self.assertEqual(441, model.count_params())
コード例 #9
0
 def test_embeddings_extraction_layer_position_embeddings_fourier_transform(
         self):
     layer = EmbeddingsExtractionLayer(
         EmbeddingsConfig(
             entities_count=3,
             relations_count=2,
             embeddings_dimension=4,
             use_position_embeddings=True,
             use_fourier_series_in_position_embeddings=True,
         ))
     self.assertTrue(layer.position_embeddings_layer.use_fourier_series)
コード例 #10
0
 def test_embeddings_extraction_layer_pretrained_position_embeddings(self):
     initial_values = np.array([[2.0, 3.0, 1.0], [3.0, 4.0, 5.0],
                                [2.0, 5.0, 1.0]])
     layer = EmbeddingsExtractionLayer(
         EmbeddingsConfig(
             entities_count=3,
             relations_count=2,
             embeddings_dimension=4,
             use_position_embeddings=True,
             pretrained_position_embeddings=initial_values,
         ))
     layer.position_embeddings_layer.build(input_shape=(3, 3))
     self.assertAllEqual(
         initial_values,
         layer.position_embeddings_layer.position_embeddings)
コード例 #11
0
 def test_model(self):
     embeddings_config = EmbeddingsConfig(entities_count=3,
                                          relations_count=2,
                                          embeddings_dimension=4)
     model_config = ConvModelConfig(include_reduce_dim_layer=False)
     model = TransformerTranseModel(embeddings_config, model_config)
     edge_object_type = [
         ObjectType.ENTITY.value, ObjectType.RELATION.value,
         ObjectType.ENTITY.value, ObjectType.ENTITY.value,
         ObjectType.RELATION.value
     ]
     inputs = {
         "object_ids":
         tf.constant([[0, 0, 1, 1, 0], [1, 1, 2, 0, 1]], dtype=tf.int32),
         "object_types":
         tf.constant([edge_object_type, edge_object_type], dtype=tf.int32),
     }
     outputs = model(inputs)
     self.assertAllEqual((2, 4), outputs.shape)
コード例 #12
0
 def setUp(self):
     dataset = MaskedEntityOfEdgeDataset(dataset_id="dataset1",
                                         inference_mode=False,
                                         dataset_type=DatasetType.TRAINING,
                                         data_directory=self.DATASET_PATH,
                                         shuffle_dataset=False,
                                         batch_size=5)
     self.model_inputs = next(iter(dataset.samples))
     self.embeddings_config = EmbeddingsConfig(
         entities_count=3,
         relations_count=2,
         embeddings_dimension=6,
         use_special_token_embeddings=True,
     )
     self.model_config = ConvEModelConfig(
         embeddings_width=3,
         input_dropout_rate=0.5,
         conv_layer_filters=32,
         conv_layer_kernel_size=2,
         conv_dropout_rate=0.5,
         hidden_dropout_rate=0.5,
     )
コード例 #13
0
 def setUp(self):
     gin.clear_config()
     gin.parse_config("""
         StackedTransformerEncodersLayer.layers_count = 3
         StackedTransformerEncodersLayer.attention_heads_count = 4
         StackedTransformerEncodersLayer.attention_head_dimension = 5
         StackedTransformerEncodersLayer.pointwise_hidden_layer_dimension = 4
         StackedTransformerEncodersLayer.dropout_rate = 0.5
         StackedTransformerEncodersLayer.share_encoder_parameters = False
         StackedTransformerEncodersLayer.share_encoder_parameters = False
         StackedTransformerEncodersLayer.encoder_layer_type = %TransformerEncoderLayerType.PRE_LAYER_NORM
     """)
     dataset = MaskedEntityOfEdgeDataset(
         dataset_id="dataset1", inference_mode=False, dataset_type=DatasetType.TRAINING,
         data_directory=self.DATASET_PATH, shuffle_dataset=False, batch_size=5
     )
     self.model_inputs = next(iter(dataset.samples))
     self.embeddings_config = EmbeddingsConfig(
         entities_count=3, relations_count=2, embeddings_dimension=6, use_special_token_embeddings=True,
     )
     self.default_model_config = TransformerSoftmaxModelConfig(
         use_pre_normalization=True, pre_dropout_rate=0.5
     )