def __init__(self,dataset, cs): self.window = args.window #self.bs = args.batchsize self.ns = args.negative_size self.dataset = dataset self.sampler = walker_alias.WalkerAlias(np.power(cs,0.75))
def __init__(self, in_size, counts, sample_size): super(BlackOut, self).__init__() vocab_size = len(counts) p = numpy.array(counts, dtype=numpy.float32) self.sampler = walker_alias.WalkerAlias(p) self.sample_size = sample_size self.log_q = -np.log(p + 1e-8) with self.init_scope(): self.W = variable.Parameter(shape=(vocab_size, in_size))
def __init__(self, in_size, counts, sample_size, power=0.75): self.sample_size = sample_size p = numpy.array(counts, numpy.float32) p = numpy.power(p, p.dtype.type(power)) self.sampler = walker_alias.WalkerAlias(p) vocab_size = len(counts) self.W = numpy.zeros((vocab_size, in_size)).astype(numpy.float32) self.gW = numpy.full_like(self.W, numpy.nan)
def __init__(self, in_size, counts, sample_size, power=0.75): vocab_size = len(counts) super(NegativeSampling, self).__init__(W=(vocab_size, in_size)) self.W.data.fill(0) self.sample_size = sample_size power = numpy.float32(power) p = numpy.array(counts, power.dtype) numpy.power(p, power, p) self.sampler = walker_alias.WalkerAlias(p)
def __init__(self, in_size, counts, sample_size, power=0.75): super(NegativeSampling, self).__init__() vocab_size = len(counts) self.sample_size = sample_size power = numpy.float32(power) p = numpy.array(counts, power.dtype) numpy.power(p, power, p) self.sampler = walker_alias.WalkerAlias(p) with self.init_scope(): self.W = variable.Parameter(0, (vocab_size, in_size))
def __init__(self, in_size, counts, sample_size, power=0.75, dtype=None): super(NegativeSampling, self).__init__() dtype = chainer.get_dtype(dtype) vocab_size = len(counts) self.sample_size = sample_size power = dtype.type(power) p = numpy.array(counts, dtype) numpy.power(p, power, p) self.sampler = walker_alias.WalkerAlias(p) with self.init_scope(): self.W = variable.Parameter(0, (vocab_size, in_size))
def __init__(self, in_size, counts, sample_size): vocab_size = len(counts) super(BlackOut, self).__init__(W=(vocab_size, in_size)) p = numpy.array(counts, dtype=numpy.float32) self.sampler = walker_alias.WalkerAlias(p) self.sample_size = sample_size
for word in line.split(): if word not in word2index: ind = len(word2index) word2index[word] = ind index2word[ind] = word counts[word2index[word]] = +1 dataset.append(word2index[word]) n_vocab = len(word2index) datasize = len(dataset) print("num_of_vocab, datasize : ({0}, {1})".format(n_vocab, datasize)) cs = [counts[w] for w in range(len(counts))] power = np.float32(0.75) p = np.array(cs, power.dtype) # ネガティブサンプル生成器(確率分布) sampler = walker_alias.WalkerAlias(p) # define model class MyW2V2(chainer.Chain): # Chainクラスを継承 def __init__(self, v, m): super(MyW2V2, self).__init__( # superクラスの初期化を継承 embed = L.EmbedID(v,m), # 単語数vの分散表現次元m ) def __call__(self, xb, eb, sampler, ngs): # xb:単語, eb:分散表現, sampler:sample生成器, ngs:負例数 loss = None for i in range(len(xb)): x = Variable(np.array([xb[i]], dtype=np.int32)) e = eb[i] ls = F.negative_sampling(e, x, self.embed.W, sampler, ngs)
if args.gpu >= 0: cuda.get_device_from_id(args.gpu).use() model.to_gpu() #optimizer = optimizers.Adam() optimizer = optimizers.AdaGrad() #optimizer = optimizers.SGD() optimizer.setup(model) #optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001)) #==================== # model learning #==================== sampler = walker_alias.WalkerAlias(np.power(cs,0.75)) ng_size = args.negative_size n_data = len(text_data) n_win = args.window bs = args.batchsize for epoch in tqdm(range(args.epoch)): indexes = np.arange(n_win, n_data-n_win) np.random.shuffle(indexes) for n in range(0, len(indexes), bs): index = indexes[n:n+bs] context = [] sentiment = []
def _make_sampler(dataset: DataSet) -> walker_alias.WalkerAlias: _, counts = np.unique(dataset.data, return_counts=True) counts = np.power(counts, 0.75) return walker_alias.WalkerAlias(counts)