Example #1
0
    def __init__(self, config, dataset, configuration_key, vocab_size):
        super().__init__(config, dataset, configuration_key)

        # initialize base_embedder
        if self.configuration_key + ".base_embedder.type" not in config.options:
            config.set(
                self.configuration_key + ".base_embedder.type",
                self.get_option("base_embedder.type"),
            )
        self.base_embedder = KgeEmbedder.create(
            config, dataset, self.configuration_key + ".base_embedder",
            vocab_size)

        # initialize projection
        if self.dim < 0:
            self.dim = self.base_embedder.dim
        self.dropout = self.get_option("dropout")
        self.normalize = self.check_option("normalize", ["", "L2"])
        self.regularize = self.check_option("regularize", ["", "l1", "l2"])
        self.projection = torch.nn.Linear(self.base_embedder.dim,
                                          self.dim,
                                          bias=False)
        self.initialize(
            self.projection.weight.data,
            self.get_option("initialize"),
            self.get_option("initialize_args"),
        )
Example #2
0
    def __init__(self,
                 config,
                 dataset,
                 configuration_key,
                 vocab_size,
                 init_for_load_only=False):
        super().__init__(config,
                         dataset,
                         configuration_key,
                         init_for_load_only=init_for_load_only)

        # initialize base_embedder
        if self.configuration_key + ".base_embedder.type" not in config.options:
            config.set(
                self.configuration_key + ".base_embedder.type",
                self.get_option("base_embedder.type"),
            )
        self.base_embedder = KgeEmbedder.create(
            config, dataset, self.configuration_key + ".base_embedder",
            vocab_size)

        # initialize projection
        if self.dim < 0:
            self.dim = self.base_embedder.dim
        self.dropout = self.get_option("dropout")
        self.regularize = self.check_option("regularize", ["", "lp"])
        self.projection = torch.nn.Linear(self.base_embedder.dim,
                                          self.dim,
                                          bias=False)
        if not init_for_load_only:
            self._init_embeddings(self.projection.weight.data)
Example #3
0
 def init_pretrained(self, pretrained_embedder: KgeEmbedder) -> None:
     (
         self_intersect_ind,
         pretrained_intersect_ind,
     ) = self._intersect_ids_with_pretrained_embedder(pretrained_embedder)
     self._embeddings.weight[torch.from_numpy(self_intersect_ind).to(
         self._embeddings.weight.device).long(
         )] = pretrained_embedder.embed(
             torch.from_numpy(pretrained_intersect_ind)).to(
                 self._embeddings.weight.device)
Example #4
0
 def init_pretrained(self, pretrained_embedder: KgeEmbedder) -> None:
     (
         self_intersect_ind,
         pretrained_intersect_ind,
     ) = self._intersect_ids_with_pretrained_embedder(pretrained_embedder)
     self._embeddings.weight[torch.from_numpy(self_intersect_ind).to(
         self._embeddings.weight.device).long(
         )] = pretrained_embedder.embed(
             torch.from_numpy(pretrained_intersect_ind)).to(
                 self._embeddings.weight.device)
     if ("relation_embedder" in self.configuration_key
             and "reciprocal_relations_model" in self.configuration_key):
         nbr_rel_this = int(self._embeddings.weight.shape[0] / 2)
         nbr_rel_pretrained = int(
             pretrained_embedder._embeddings.weight.shape[0] / 2)
         self._embeddings.weight[torch.from_numpy(
             self_intersect_ind + nbr_rel_this).to(
                 self._embeddings.weight.device).long(
                 )] = pretrained_embedder.embed(
                     torch.from_numpy(pretrained_intersect_ind +
                                      nbr_rel_pretrained)).to(
                                          self._embeddings.weight.device)
Example #5
0
 def init_pretrained(self, pretrained_embedder: KgeEmbedder) -> None:
     (
         self_intersect_ind,
         pretrained_intersect_ind,
     ) = self._intersect_ids_with_pretrained_embedder(pretrained_embedder)
     pretrained_embeddings = pretrained_embedder.embed(
         torch.from_numpy(pretrained_intersect_ind))
     self.parameter_client.push(
         torch.from_numpy(self_intersect_ind) + self.lapse_offset,
         torch.cat(
             (
                 pretrained_embeddings,
                 torch.zeros(
                     (len(pretrained_embeddings),
                      self.optimizer_dim + self.unnecessary_dim),
                     dtype=pretrained_embeddings.dtype,
                 ),
             ),
             dim=1,
         ),
     )
Example #6
0
    def penalty(self, **kwargs) -> List[Tensor]:
        # TODO factor out to a utility method
        # Avoid calling lookup embedder penalty and instead call KgeEmbedder penalty
        result = KgeEmbedder.penalty(self, **kwargs)
        if self.regularize == "" or self.get_option(
                "regularize_weight") == 0.0:
            pass
        elif self.regularize == "lp":
            p = (self.get_option("regularize_args.p")
                 if self.has_option("regularize_args.p") else 2)
            regularize_weight = self._get_regularize_weight()
            if not self.get_option("regularize_args.weighted"):
                # unweighted Lp regularization
                parameters = self._embeddings_all()
                result += [(
                    f"{self.configuration_key}.L{p}_penalty",
                    (regularize_weight / p * parameters.norm(p=p)**p).sum(),
                )]
            else:
                # weighted Lp regularization
                unique_indexes, counts = torch.unique(kwargs["indexes"],
                                                      return_counts=True)
                parameters = self._embed(unique_indexes)
                if p % 2 == 1:
                    parameters = torch.abs(parameters)
                result += [(
                    f"{self.configuration_key}.L{p}_penalty",
                    (regularize_weight / p *
                     (parameters**p * counts.float().view(-1, 1))).sum()
                    # In contrast to unweighted Lp regularization, rescaling by
                    # number of triples/indexes is necessary here so that penalty
                    # term is correct in expectation
                    / len(kwargs["indexes"]),
                )]
        else:  # unknown regularization
            raise ValueError(f"Invalid value regularize={self.regularize}")

        return result
Example #7
0
    def __init__(
        self,
        config: Config,
        dataset: Dataset,
        configuration_key: str,
        vocab_size: int,
        init_for_load_only=False,
    ):
        super().__init__(config,
                         dataset,
                         configuration_key,
                         init_for_load_only=init_for_load_only)

        # read config
        self.config.check("train.trace_level", ["batch", "epoch"])
        self.vocab_size = vocab_size

        if self.get_option("modalities")[0] != "struct":
            raise ValueError("DKRL assumes that struct is the first modality")

        # set relation embedder dim
        # fixes the problem that for the search, relation and entity embeder dim
        # has to be set with a single config
        # CAREFULL: THIS ASSUMES THAT THE ENITY EMBEDER IS CREATED FIRST
        rel_emb_conf_key = configuration_key.replace("entity_embedder",
                                                     "relation_embedder")
        if configuration_key == rel_emb_conf_key:
            raise ValueError("Cannot set the relation embedding size")
        config.set(f"{rel_emb_conf_key}.dim", self.dim)

        # create embedder for each modality
        self.embedder = torch.nn.ModuleDict()
        for modality in self.get_option("modalities"):
            # if dim of modality embedder is < 0 set it to parent embedder dim
            # e.g. when using dkrl, the text embedding dim should equal embedding dim
            # but when using literale, the text embedding dim can vary
            if self.get_option(f"{modality}.dim") < 0:
                config.set(f"{self.configuration_key}.{modality}.dim",
                           self.dim)

            embedder = KgeEmbedder.create(
                config,
                dataset,
                f"{self.configuration_key}.{modality}",
                vocab_size=self.vocab_size,
                init_for_load_only=init_for_load_only)
            self.embedder[modality] = embedder

        # HACK
        # kwargs["indexes"] is set to None, if dkrl_embedder has
        # regularize_args.weighted set to False.
        # If the child_embedder has regularize_args.weighted set to True,
        # it tries to access kwargs["indexes"], which leads to an error

        # Set regularize_args.weighted to True, if it is set for the struct embedder
        if self.embedder["struct"].get_option("regularize_args.weighted"):
            config.set(self.configuration_key + ".regularize_args.weighted",
                       True)

        # TODO handling negative dropout because using it with ax searches for now
        dropout = self.get_option("dropout")
        if dropout < 0:
            if config.get("train.auto_correct"):
                config.log("Setting {}.dropout to 0, "
                           "was set to {}.".format(configuration_key, dropout))
                dropout = 0
        self.dropout = torch.nn.Dropout(dropout)