def __init__(self, W, corpus, power=0.75, sample_size=5): self.sample_size = sample_size self.sampler = UnigramSampler(corpus, power, sample_size) self.loss_layers = [SigmoidWithLoss() for _ in range(sample_size + 1)] self.embed_dot_layers = [ EmbeddingDot(W) for _ in range(sample_size + 1) ] self.params, self.grads = [], [] for layer in self.embed_dot_layers: self.params += layer.params self.grads += layer.grads
class NegativeSamplingLoss: def __init__(self, W, corpus, power=0.75, sample_size=5): self.sample_size = sample_size self.sampler = UnigramSampler(corpus, power, sample_size) self.loss_layers = [SigmoidWithLoss() for _ in range(sample_size + 1)] self.embed_dot_layers = [ EmbeddingDot(W) for _ in range(sample_size + 1) ] self.params, self.grads = [], [] for layer in self.embed_dot_layers: self.params += layer.params self.grads += layer.grads def forward(self, h, target): batch_size = target.shape[0] negative_sample = self.sampler.get_negative_sample(target) score = self.embed_dot_layers[0].forward(h, target) correct_label = np.ones(batch_size, dtype=np.int32) loss = self.loss_layers[1 + i].forward(score, negative_label) return loss def backward(self, dout=1): dh = 0 for l0, l1 in zip(self.loss_layers, self.embed_dot_layers): dscore = l0.backward(dout) dh += l1.backward(dscore) return dh
print(np.random.choice(words, size=5, replace=False)) p = [0.5, 0.1, 0.05, 0.2, 0.05, 0.1] print(np.random.choice(words, p=p)) p = [0.7, 0.29, 0.01] new_p = np.power(p, 0.75) new_p /= np.sum(new_p) print(new_p) import sys sys.path.append('..') from ch04.negative_sampling_layer import UnigramSampler corpus = np.array([0, 1, 2, 3, 4, 1, 2, 3]) power = 0.75 sample_size = 2 sampler = UnigramSampler(corpus, power, sample_size) target = np.array([1, 3, 0]) negative_sample = sampler.get_negative_sample(target) print(negative_sample) class NegativeSamplingLoss: def __init__(self, W, corpus, power=0.75, sample_size=5): self.sample_size = sample_size self.sampler = UnigramSampler(corpus, power, sample_size) self.loss_layers = [SigmoidWithLoss() for _ in range(sample_size + 1)] self.embed_dot_layers = [ EmbeddingDot(W) for _ in range(sample_size + 1) ] self.params, self.grads = [], []