def __init__(self, config, dataset, configuration_key, vocab_size): super().__init__(config, dataset, configuration_key) # initialize base_embedder if self.configuration_key + ".base_embedder.type" not in config.options: config.set( self.configuration_key + ".base_embedder.type", self.get_option("base_embedder.type"), ) self.base_embedder = KgeEmbedder.create( config, dataset, self.configuration_key + ".base_embedder", vocab_size) # initialize projection if self.dim < 0: self.dim = self.base_embedder.dim self.dropout = self.get_option("dropout") self.normalize = self.check_option("normalize", ["", "L2"]) self.regularize = self.check_option("regularize", ["", "l1", "l2"]) self.projection = torch.nn.Linear(self.base_embedder.dim, self.dim, bias=False) self.initialize( self.projection.weight.data, self.get_option("initialize"), self.get_option("initialize_args"), )
def __init__(self, config, dataset, configuration_key, vocab_size, init_for_load_only=False): super().__init__(config, dataset, configuration_key, init_for_load_only=init_for_load_only) # initialize base_embedder if self.configuration_key + ".base_embedder.type" not in config.options: config.set( self.configuration_key + ".base_embedder.type", self.get_option("base_embedder.type"), ) self.base_embedder = KgeEmbedder.create( config, dataset, self.configuration_key + ".base_embedder", vocab_size) # initialize projection if self.dim < 0: self.dim = self.base_embedder.dim self.dropout = self.get_option("dropout") self.regularize = self.check_option("regularize", ["", "lp"]) self.projection = torch.nn.Linear(self.base_embedder.dim, self.dim, bias=False) if not init_for_load_only: self._init_embeddings(self.projection.weight.data)
def init_pretrained(self, pretrained_embedder: KgeEmbedder) -> None: ( self_intersect_ind, pretrained_intersect_ind, ) = self._intersect_ids_with_pretrained_embedder(pretrained_embedder) self._embeddings.weight[torch.from_numpy(self_intersect_ind).to( self._embeddings.weight.device).long( )] = pretrained_embedder.embed( torch.from_numpy(pretrained_intersect_ind)).to( self._embeddings.weight.device)
def init_pretrained(self, pretrained_embedder: KgeEmbedder) -> None: ( self_intersect_ind, pretrained_intersect_ind, ) = self._intersect_ids_with_pretrained_embedder(pretrained_embedder) self._embeddings.weight[torch.from_numpy(self_intersect_ind).to( self._embeddings.weight.device).long( )] = pretrained_embedder.embed( torch.from_numpy(pretrained_intersect_ind)).to( self._embeddings.weight.device) if ("relation_embedder" in self.configuration_key and "reciprocal_relations_model" in self.configuration_key): nbr_rel_this = int(self._embeddings.weight.shape[0] / 2) nbr_rel_pretrained = int( pretrained_embedder._embeddings.weight.shape[0] / 2) self._embeddings.weight[torch.from_numpy( self_intersect_ind + nbr_rel_this).to( self._embeddings.weight.device).long( )] = pretrained_embedder.embed( torch.from_numpy(pretrained_intersect_ind + nbr_rel_pretrained)).to( self._embeddings.weight.device)
def init_pretrained(self, pretrained_embedder: KgeEmbedder) -> None: ( self_intersect_ind, pretrained_intersect_ind, ) = self._intersect_ids_with_pretrained_embedder(pretrained_embedder) pretrained_embeddings = pretrained_embedder.embed( torch.from_numpy(pretrained_intersect_ind)) self.parameter_client.push( torch.from_numpy(self_intersect_ind) + self.lapse_offset, torch.cat( ( pretrained_embeddings, torch.zeros( (len(pretrained_embeddings), self.optimizer_dim + self.unnecessary_dim), dtype=pretrained_embeddings.dtype, ), ), dim=1, ), )
def penalty(self, **kwargs) -> List[Tensor]: # TODO factor out to a utility method # Avoid calling lookup embedder penalty and instead call KgeEmbedder penalty result = KgeEmbedder.penalty(self, **kwargs) if self.regularize == "" or self.get_option( "regularize_weight") == 0.0: pass elif self.regularize == "lp": p = (self.get_option("regularize_args.p") if self.has_option("regularize_args.p") else 2) regularize_weight = self._get_regularize_weight() if not self.get_option("regularize_args.weighted"): # unweighted Lp regularization parameters = self._embeddings_all() result += [( f"{self.configuration_key}.L{p}_penalty", (regularize_weight / p * parameters.norm(p=p)**p).sum(), )] else: # weighted Lp regularization unique_indexes, counts = torch.unique(kwargs["indexes"], return_counts=True) parameters = self._embed(unique_indexes) if p % 2 == 1: parameters = torch.abs(parameters) result += [( f"{self.configuration_key}.L{p}_penalty", (regularize_weight / p * (parameters**p * counts.float().view(-1, 1))).sum() # In contrast to unweighted Lp regularization, rescaling by # number of triples/indexes is necessary here so that penalty # term is correct in expectation / len(kwargs["indexes"]), )] else: # unknown regularization raise ValueError(f"Invalid value regularize={self.regularize}") return result
def __init__( self, config: Config, dataset: Dataset, configuration_key: str, vocab_size: int, init_for_load_only=False, ): super().__init__(config, dataset, configuration_key, init_for_load_only=init_for_load_only) # read config self.config.check("train.trace_level", ["batch", "epoch"]) self.vocab_size = vocab_size if self.get_option("modalities")[0] != "struct": raise ValueError("DKRL assumes that struct is the first modality") # set relation embedder dim # fixes the problem that for the search, relation and entity embeder dim # has to be set with a single config # CAREFULL: THIS ASSUMES THAT THE ENITY EMBEDER IS CREATED FIRST rel_emb_conf_key = configuration_key.replace("entity_embedder", "relation_embedder") if configuration_key == rel_emb_conf_key: raise ValueError("Cannot set the relation embedding size") config.set(f"{rel_emb_conf_key}.dim", self.dim) # create embedder for each modality self.embedder = torch.nn.ModuleDict() for modality in self.get_option("modalities"): # if dim of modality embedder is < 0 set it to parent embedder dim # e.g. when using dkrl, the text embedding dim should equal embedding dim # but when using literale, the text embedding dim can vary if self.get_option(f"{modality}.dim") < 0: config.set(f"{self.configuration_key}.{modality}.dim", self.dim) embedder = KgeEmbedder.create( config, dataset, f"{self.configuration_key}.{modality}", vocab_size=self.vocab_size, init_for_load_only=init_for_load_only) self.embedder[modality] = embedder # HACK # kwargs["indexes"] is set to None, if dkrl_embedder has # regularize_args.weighted set to False. # If the child_embedder has regularize_args.weighted set to True, # it tries to access kwargs["indexes"], which leads to an error # Set regularize_args.weighted to True, if it is set for the struct embedder if self.embedder["struct"].get_option("regularize_args.weighted"): config.set(self.configuration_key + ".regularize_args.weighted", True) # TODO handling negative dropout because using it with ax searches for now dropout = self.get_option("dropout") if dropout < 0: if config.get("train.auto_correct"): config.log("Setting {}.dropout to 0, " "was set to {}.".format(configuration_key, dropout)) dropout = 0 self.dropout = torch.nn.Dropout(dropout)