Example #1
0
 def __init__(self,dataset, cs):
     self.window  = args.window
     #self.bs  = args.batchsize
     self.ns = args.negative_size
     
     self.dataset = dataset
     self.sampler = walker_alias.WalkerAlias(np.power(cs,0.75))
Example #2
0
 def __init__(self, in_size, counts, sample_size):
     super(BlackOut, self).__init__()
     vocab_size = len(counts)
     p = numpy.array(counts, dtype=numpy.float32)
     self.sampler = walker_alias.WalkerAlias(p)
     self.sample_size = sample_size
     self.log_q = -np.log(p + 1e-8)
     with self.init_scope():
         self.W = variable.Parameter(shape=(vocab_size, in_size))
Example #3
0
    def __init__(self, in_size, counts, sample_size, power=0.75):
        self.sample_size = sample_size
        p = numpy.array(counts, numpy.float32)
        p = numpy.power(p, p.dtype.type(power))
        self.sampler = walker_alias.WalkerAlias(p)

        vocab_size = len(counts)
        self.W = numpy.zeros((vocab_size, in_size)).astype(numpy.float32)
        self.gW = numpy.full_like(self.W, numpy.nan)
    def __init__(self, in_size, counts, sample_size, power=0.75):
        vocab_size = len(counts)
        super(NegativeSampling, self).__init__(W=(vocab_size, in_size))
        self.W.data.fill(0)

        self.sample_size = sample_size
        power = numpy.float32(power)
        p = numpy.array(counts, power.dtype)
        numpy.power(p, power, p)
        self.sampler = walker_alias.WalkerAlias(p)
Example #5
0
    def __init__(self, in_size, counts, sample_size, power=0.75):
        super(NegativeSampling, self).__init__()
        vocab_size = len(counts)
        self.sample_size = sample_size
        power = numpy.float32(power)
        p = numpy.array(counts, power.dtype)
        numpy.power(p, power, p)
        self.sampler = walker_alias.WalkerAlias(p)

        with self.init_scope():
            self.W = variable.Parameter(0, (vocab_size, in_size))
Example #6
0
    def __init__(self, in_size, counts, sample_size, power=0.75, dtype=None):
        super(NegativeSampling, self).__init__()
        dtype = chainer.get_dtype(dtype)
        vocab_size = len(counts)
        self.sample_size = sample_size
        power = dtype.type(power)
        p = numpy.array(counts, dtype)
        numpy.power(p, power, p)
        self.sampler = walker_alias.WalkerAlias(p)

        with self.init_scope():
            self.W = variable.Parameter(0, (vocab_size, in_size))
Example #7
0
 def __init__(self, in_size, counts, sample_size):
     vocab_size = len(counts)
     super(BlackOut, self).__init__(W=(vocab_size, in_size))
     p = numpy.array(counts, dtype=numpy.float32)
     self.sampler = walker_alias.WalkerAlias(p)
     self.sample_size = sample_size
        for word in line.split():
            if word not in word2index:
                ind = len(word2index)
                word2index[word] = ind
                index2word[ind] = word
            counts[word2index[word]] = +1
            dataset.append(word2index[word])

n_vocab = len(word2index)
datasize = len(dataset)
print("num_of_vocab, datasize : ({0}, {1})".format(n_vocab, datasize))

cs = [counts[w] for w in range(len(counts))]
power = np.float32(0.75)
p = np.array(cs, power.dtype) # ネガティブサンプル生成器(確率分布)
sampler = walker_alias.WalkerAlias(p)

# define model

class MyW2V2(chainer.Chain): # Chainクラスを継承
    def __init__(self, v, m):
        super(MyW2V2, self).__init__( # superクラスの初期化を継承
            embed = L.EmbedID(v,m),   # 単語数vの分散表現次元m
        )
    def __call__(self, xb, eb, sampler, ngs):
    # xb:単語, eb:分散表現, sampler:sample生成器, ngs:負例数
        loss = None
        for i in range(len(xb)):
            x = Variable(np.array([xb[i]], dtype=np.int32))
            e = eb[i]
            ls = F.negative_sampling(e, x, self.embed.W, sampler, ngs)
Example #9
0
if args.gpu >= 0:
    cuda.get_device_from_id(args.gpu).use()
    model.to_gpu()

#optimizer = optimizers.Adam()
optimizer = optimizers.AdaGrad()
#optimizer = optimizers.SGD()
optimizer.setup(model)
#optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001))

#====================
# model learning
#====================

sampler = walker_alias.WalkerAlias(np.power(cs,0.75))
ng_size = args.negative_size
n_data = len(text_data)
n_win = args.window
bs = args.batchsize

for epoch in tqdm(range(args.epoch)):
    indexes = np.arange(n_win, n_data-n_win)
    np.random.shuffle(indexes)

    for n in range(0, len(indexes), bs):
        index = indexes[n:n+bs]

        context = []
        sentiment = []
Example #10
0
def _make_sampler(dataset: DataSet) -> walker_alias.WalkerAlias:
    _, counts = np.unique(dataset.data, return_counts=True)
    counts = np.power(counts, 0.75)
    return walker_alias.WalkerAlias(counts)