def __init__(self, elements, nodes, emb_size, compact_dst=True): ElementEmbedderBase.__init__(self, elements=elements, nodes=nodes, compact_dst=compact_dst) nn.Module.__init__(self) Scorer.__init__(self, num_embs=len(self.elements["dst"].unique()), emb_size=emb_size, src2dst=self.element_lookup) self.emb_size = emb_size n_elems = self.elements['emb_id'].unique().size self.embed = nn.Embedding(n_elems, emb_size) self.norm = nn.LayerNorm(emb_size)
def sample_negative(self, size, ids=None, strategy="closest"): if strategy == "w2v" or self.scorer_index is None: negative = ElementEmbedderBase.sample_negative(self, size) else: negative = Scorer.sample_closest_negative(self, ids, k=size // len(ids)) assert len(negative) == size return negative
def __init__(self, elements, nodes, emb_size, num_buckets=5000, max_len=100, gram_size=3): ElementEmbedderBase.__init__(self, elements=elements, nodes=nodes, compact_dst=False) nn.Module.__init__(self) Scorer.__init__(self, num_embs=len(self.elements["dst"].unique()), emb_size=emb_size, src2dst=self.element_lookup) self.gram_size = gram_size self.emb_size = emb_size self.init_subwords(elements, num_buckets=num_buckets, max_len=max_len)
def sample_negative(self, size, ids=None, strategy="w2v"): # TODO switch to w2v? if strategy == "w2v": negative = ElementEmbedderBase.sample_negative(self, size) else: negative = Scorer.sample_closest_negative(self, ids, k=size // len(ids)) assert len(negative) == size return negative
def sample_negative(self, size, ids=None, strategy="closest"): # TODO # Try other distributions if strategy == "w2v": negative = ElementEmbedderBase.sample_negative(self, size) else: ### negative = random.choices(Scorer.sample_closest_negative(self, ids), k=size) negative = Scorer.sample_closest_negative(self, ids, k=size // len(ids)) assert len(negative) == size return torch.LongTensor(negative)
def __init__(self, elements, nodes, compact_dst=True, dst_to_global=True, emb_size=None, device="cpu", method="inner_prod", nn_index="brute", ns_groups=None): assert emb_size is not None ElementEmbedderBase.__init__(self, elements=elements, nodes=nodes, compact_dst=compact_dst, dst_to_global=dst_to_global) Scorer.__init__(self, num_embs=len(self.elements["dst"].unique()), emb_size=emb_size, src2dst=self.element_lookup, device=device, method=method, index_backend=nn_index, ns_groups=ns_groups)
def prepare_index(self): self.set_embed() Scorer.prepare_index(self)