コード例 #1
0
    def __init__(
        self,
        config: Config,
        dataset: Dataset,
        configuration_key=None,
        init_for_load_only=False,
    ):
        self._init_configuration(config, configuration_key)

        # Initialize base model
        # Using a dataset with twice the number of relations to initialize base model
        alt_dataset = dataset.shallow_copy()
        alt_dataset._num_relations = dataset.num_relations() * 2
        base_model = KgeModel.create(
            config=config,
            dataset=alt_dataset,
            configuration_key=self.configuration_key + ".base_model",
            init_for_load_only=init_for_load_only,
        )

        # Initialize this model
        super().__init__(
            config=config,
            dataset=dataset,
            scorer=base_model.get_scorer(),
            create_embedders=False,
            init_for_load_only=init_for_load_only,
        )
        self._base_model = base_model
        # TODO change entity_embedder assignment to sub and obj embedders when support
        # for that is added
        self._entity_embedder = self._base_model.get_s_embedder()
        self._relation_embedder = self._base_model.get_p_embedder()
コード例 #2
0
    def __init__(self, config: Config, configuration_key: str,
                 dataset: Dataset):
        super().__init__(config, configuration_key)

        # load config
        self.num_samples = torch.zeros(3, dtype=torch.int)
        self.filter_positives = torch.zeros(3, dtype=torch.bool)
        self.vocabulary_size = torch.zeros(3, dtype=torch.int)
        self.shared = self.get_option("shared")
        self.with_replacement = self.get_option("with_replacement")
        if not self.with_replacement and not self.shared:
            raise ValueError(
                "Without replacement sampling is only supported when "
                "shared negative sampling is enabled.")
        self.filtering_split = config.get("negative_sampling.filtering.split")
        if self.filtering_split == "":
            self.filtering_split = config.get("train.split")
        for slot in SLOTS:
            slot_str = SLOT_STR[slot]
            self.num_samples[slot] = self.get_option(f"num_samples.{slot_str}")
            self.filter_positives[slot] = self.get_option(
                f"filtering.{slot_str}")
            self.vocabulary_size[slot] = (dataset.num_relations() if slot == P
                                          else dataset.num_entities())
            # create indices for filtering here already if needed and not existing
            # otherwise every worker would create every index again and again
            if self.filter_positives[slot]:
                pair = ["po", "so", "sp"][slot]
                dataset.index(f"{self.filtering_split}_{pair}_to_{slot_str}")
        if any(self.filter_positives):
            if self.shared:
                raise ValueError(
                    "Filtering is not supported when shared negative sampling is enabled."
                )
            self.check_option("filtering.implementation",
                              ["standard", "fast", "fast_if_available"])

            self.filter_implementation = self.get_option(
                "filtering.implementation")
        self.dataset = dataset
        # auto config
        for slot, copy_from in [(S, O), (P, None), (O, S)]:
            if self.num_samples[slot] < 0:
                if copy_from is not None and self.num_samples[copy_from] > 0:
                    self.num_samples[slot] = self.num_samples[copy_from]
                else:
                    self.num_samples[slot] = 0
コード例 #3
0
    def __init__(
        self,
        config: Config,
        dataset: Dataset,
        scorer: Union[RelationalScorer, type],
        initialize_embedders=True,
        configuration_key=None,
    ):
        super().__init__(config, dataset, configuration_key)

        # TODO support different embedders for subjects and objects

        #: Embedder used for entities (both subject and objects)
        self._entity_embedder: KgeEmbedder

        #: Embedder used for relations
        self._relation_embedder: KgeEmbedder

        if initialize_embedders:
            self._entity_embedder = KgeEmbedder.create(
                config,
                dataset,
                self.configuration_key + ".entity_embedder",
                dataset.num_entities(),
            )

            #: Embedder used for relations
            num_relations = dataset.num_relations()
            self._relation_embedder = KgeEmbedder.create(
                config,
                dataset,
                self.configuration_key + ".relation_embedder",
                num_relations,
            )

        #: Scorer
        self._scorer: RelationalScorer
        if type(scorer) == type:
            # scorer is type of the scorer to use; call its constructor
            self._scorer = scorer(config=config,
                                  dataset=dataset,
                                  configuration_key=self.configuration_key)
        else:
            self._scorer = scorer
コード例 #4
0
    def __init__(
        self,
        config: Config,
        dataset: Dataset,
        scorer: Union[RelationalScorer, type],
        create_embedders=True,
        configuration_key=None,
        init_for_load_only=False,
    ):
        super().__init__(config, dataset, configuration_key)

        # TODO support different embedders for subjects and objects

        #: Embedder used for entities (both subject and objects)
        self._entity_embedder: KgeEmbedder

        #: Embedder used for relations
        self._relation_embedder: KgeEmbedder

        if create_embedders:
            self._entity_embedder = KgeEmbedder.create(
                config,
                dataset,
                self.configuration_key + ".entity_embedder",
                dataset.num_entities(),
                init_for_load_only=init_for_load_only,
            )

            #: Embedder used for relations
            num_relations = dataset.num_relations()
            self._relation_embedder = KgeEmbedder.create(
                config,
                dataset,
                self.configuration_key + ".relation_embedder",
                num_relations,
                init_for_load_only=init_for_load_only,
            )

            if not init_for_load_only:
                # load pretrained embeddings
                pretrained_entities_filename = ""
                pretrained_relations_filename = ""
                if self.has_option("entity_embedder.pretrain.model_filename"):
                    pretrained_entities_filename = self.get_option(
                        "entity_embedder.pretrain.model_filename"
                    )
                if self.has_option("relation_embedder.pretrain.model_filename"):
                    pretrained_relations_filename = self.get_option(
                        "relation_embedder.pretrain.model_filename"
                    )

                def load_pretrained_model(
                    pretrained_filename: str,
                ) -> Optional[KgeModel]:
                    if pretrained_filename != "":
                        self.config.log(
                            f"Initializing with embeddings stored in "
                            f"{pretrained_filename}"
                        )
                        checkpoint = load_checkpoint(pretrained_filename)
                        return KgeModel.create_from(checkpoint)
                    return None

                pretrained_entities_model = load_pretrained_model(
                    pretrained_entities_filename
                )
                if pretrained_entities_filename == pretrained_relations_filename:
                    pretrained_relations_model = pretrained_entities_model
                else:
                    pretrained_relations_model = load_pretrained_model(
                        pretrained_relations_filename
                    )
                if pretrained_entities_model is not None:
                    if (
                        pretrained_entities_model.get_s_embedder()
                        != pretrained_entities_model.get_o_embedder()
                    ):
                        raise ValueError(
                            "Can only initialize with pre-trained models having "
                            "identical subject and object embeddings."
                        )
                    self._entity_embedder.init_pretrained(
                        pretrained_entities_model.get_s_embedder()
                    )
                if pretrained_relations_model is not None:
                    self._relation_embedder.init_pretrained(
                        pretrained_relations_model.get_p_embedder()
                    )

        #: Scorer
        self._scorer: RelationalScorer
        if type(scorer) == type:
            # scorer is type of the scorer to use; call its constructor
            self._scorer = scorer(
                config=config, dataset=dataset, configuration_key=self.configuration_key
            )
        else:
            self._scorer = scorer
コード例 #5
0
    def __init__(
        self,
        config: Config,
        dataset: Dataset,
        scorer: Union[RelationalScorer, type],
        create_embedders=True,
        configuration_key=None,
        init_for_load_only=False,
        parameter_client=None,
        max_partition_entities=0,
    ):
        super().__init__(config, dataset, configuration_key)

        # TODO support different embedders for subjects and objects

        #: Embedder used for entities (both subject and objects)
        self._entity_embedder: KgeEmbedder

        #: Embedder used for relations
        self._relation_embedder: KgeEmbedder

        if create_embedders:
            self._create_embedders(init_for_load_only)
        elif False:
            #if self.get_option("create_complete"):
            #    embedding_layer_size = dataset.num_entities()
            if config.get("job.distributed.entity_sync_level") == "partition" and max_partition_entities != 0:
                embedding_layer_size =max_partition_entities
            else:
                embedding_layer_size = self._calc_embedding_layer_size(config, dataset)
            config.log(f"creating entity_embedder with {embedding_layer_size} keys")
            self._entity_embedder = KgeEmbedder.create(
                config=config,
                dataset=dataset,
                configuration_key=self.configuration_key + ".entity_embedder",
                #dataset.num_entities(),
                vocab_size=embedding_layer_size,
                init_for_load_only=init_for_load_only,
                parameter_client=parameter_client,
                lapse_offset=0,
                complete_vocab_size=dataset.num_entities()
            )

            #: Embedder used for relations
            num_relations = dataset.num_relations()
            self._relation_embedder = KgeEmbedder.create(
                config,
                dataset,
                self.configuration_key + ".relation_embedder",
                num_relations,
                init_for_load_only=init_for_load_only,
                parameter_client=parameter_client,
                lapse_offset=dataset.num_entities(),
                complete_vocab_size=dataset.num_relations(),
            )

            if not init_for_load_only and parameter_client.rank == get_min_rank(config):
                # load pretrained embeddings
                pretrained_entities_filename = ""
                pretrained_relations_filename = ""
                if self.has_option("entity_embedder.pretrain.model_filename"):
                    pretrained_entities_filename = self.get_option(
                        "entity_embedder.pretrain.model_filename"
                    )
                if self.has_option("relation_embedder.pretrain.model_filename"):
                    pretrained_relations_filename = self.get_option(
                        "relation_embedder.pretrain.model_filename"
                    )

                def load_pretrained_model(
                    pretrained_filename: str,
                ) -> Optional[KgeModel]:
                    if pretrained_filename != "":
                        self.config.log(
                            f"Initializing with embeddings stored in "
                            f"{pretrained_filename}"
                        )
                        checkpoint = load_checkpoint(pretrained_filename)
                        return KgeModel.create_from(checkpoint, parameter_client=parameter_client)
                    return None

                pretrained_entities_model = load_pretrained_model(
                    pretrained_entities_filename
                )
                if pretrained_entities_filename == pretrained_relations_filename:
                    pretrained_relations_model = pretrained_entities_model
                else:
                    pretrained_relations_model = load_pretrained_model(
                        pretrained_relations_filename
                    )
                if pretrained_entities_model is not None:
                    if (
                        pretrained_entities_model.get_s_embedder()
                        != pretrained_entities_model.get_o_embedder()
                    ):
                        raise ValueError(
                            "Can only initialize with pre-trained models having "
                            "identical subject and object embeddings."
                        )
                    self._entity_embedder.init_pretrained(
                        pretrained_entities_model.get_s_embedder()
                    )
                if pretrained_relations_model is not None:
                    self._relation_embedder.init_pretrained(
                        pretrained_relations_model.get_p_embedder()
                    )

        #: Scorer
        self._scorer: RelationalScorer
        if type(scorer) == type:
            # scorer is type of the scorer to use; call its constructor
            self._scorer = scorer(
                config=config, dataset=dataset, configuration_key=self.configuration_key
            )
        else:
            self._scorer = scorer