Esempio n. 1
0
 def fit_partial(self, rsty_ids, raut_ids, rwrd_ids, window=5):
     sty_ids, aut_ids, wrd_ids = move(self.xp, rsty_ids, raut_ids, rwrd_ids)
     pivot_idx = next(move(self.xp, rwrd_ids[window: -window]))
     pivot = F.embed_id(pivot_idx, self.sampler.W)
     sty_at_pivot = rsty_ids[window: -window]
     aut_at_pivot = raut_ids[window: -window]
     sty = self.mixture_sty(next(move(self.xp, sty_at_pivot)))
     aut = self.mixture_aut(next(move(self.xp, aut_at_pivot)))
     loss = 0.0
     start, end = window, rwrd_ids.shape[0] - window
     context = sty + aut + F.dropout(pivot, self.dropout_ratio)
     for frame in range(-window, window + 1):
         # Skip predicting the current pivot
         if frame == 0:
             continue
         # Predict word given context and pivot word
         # The target starts before the pivot
         targetidx = rwrd_ids[start + frame: end + frame]
         sty_at_target = rsty_ids[start + frame: end + frame]
         aut_at_target = raut_ids[start + frame: end + frame]
         sty_is_same = sty_at_target == sty_at_pivot
         aut_is_same = aut_at_target == aut_at_pivot
         # Randomly dropout words (default is to never do this)
         rand = np.random.uniform(0, 1, sty_is_same.shape[0])
         mask = (rand > self.word_dropout_ratio).astype('bool')
         sty_and_aut_are_same = np.logical_and(sty_is_same, aut_is_same)
         weight = np.logical_and(sty_and_aut_are_same, mask).astype('int32')
         # If weight is 1.0 then targetidx
         # If weight is 0.0 then -1
         targetidx = targetidx * weight + -1 * (1 - weight)
         target, = move(self.xp, targetidx)
         loss = self.sampler(context, target)
         loss.backward()
     return loss.data
Esempio n. 2
0
 def fit_partial(self, rdoc_ids, rword_indices, window=5):
     doc_ids, word_indices = move(self.xp, rdoc_ids, rword_indices)
     pivot_idx = next(move(self.xp, rword_indices[window:-window]))
     pivot = F.embed_id(pivot_idx, self.sampler.W)
     doc_at_pivot = rdoc_ids[window:-window]
     doc = self.mixture(next(move(self.xp, doc_at_pivot)))
     loss = 0.0
     start, end = window, rword_indices.shape[0] - window
     context = (F.dropout(doc, self.dropout_ratio) +
                F.dropout(pivot, self.dropout_ratio))
     for frame in range(-window, window + 1):
         # Skip predicting the current pivot
         if frame == 0:
             continue
         # Predict word given context and pivot word
         # The target starts before the pivot
         targetidx = rword_indices[start + frame:end + frame]
         doc_at_target = rdoc_ids[start + frame:end + frame]
         doc_is_same = doc_at_target == doc_at_pivot
         rand = np.random.uniform(0, 1, doc_is_same.shape[0])
         mask = (rand > self.word_dropout_ratio).astype('bool')
         weight = np.logical_and(doc_is_same, mask).astype('int32')
         # If weight is 1.0 then targetidx
         # If weight is 0.0 then -1
         targetidx = targetidx * weight + -1 * (1 - weight)
         target, = move(self.xp, targetidx)
         loss = self.sampler(context, target)
         loss.backward()
     return loss.data
Esempio n. 3
0
    def fit_partial(self, rsty_ids, raut_ids, rwrd_ids, window=5):
        sty_ids, aut_ids, wrd_ids = move(self.xp, rsty_ids, raut_ids, rwrd_ids)
        pivot_idx = next(move(self.xp, rwrd_ids[window: -window]))
        pivot = F.embed_id(pivot_idx, self.sampler.W)
        sty_at_pivot = rsty_ids[window: -window]
        aut_at_pivot = raut_ids[window: -window]
        sty = self.mixture_sty(next(move(self.xp, sty_at_pivot)))
#        aut = self.mixture_aut(next(move(self.xp, aut_at_pivot)))
        loss = 0.0
        start, end = window, rwrd_ids.shape[0] - window
        context = F.dropout(pivot, self.dropout_ratio) # + aut + sty
        for frame in range(-window, window + 1):
            # Skip predicting the current pivot
            if frame == 0:
                continue
            # Predict word given context and pivot word
            # The target starts before the pivot
            targetidx = rwrd_ids[start + frame: end + frame]
            sty_at_target = rsty_ids[start + frame: end + frame]
#            aut_at_target = raut_ids[start + frame: end + frame]
            sty_is_same = sty_at_target == sty_at_pivot
#            aut_is_same = aut_at_target == aut_at_pivot
            # Randomly dropout words (default is to never do this)
            rand = np.random.uniform(0, 1, sty_is_same.shape[0])
            mask = (rand > self.word_dropout_ratio).astype('bool')
#            sty_and_aut_are_same = np.logical_and(sty_is_same, aut_is_same)
#            weight = np.logical_and(sty_and_aut_are_same, mask).astype('int32')
            # If weight is 1.0 then targetidx
            # If weight is 0.0 then -1
            targetidx = targetidx # * weight + -1 * (1 - weight)
            target, = move(self.xp, targetidx)
            loss = self.sampler(context, target)
            loss.backward()
        return loss.data
Esempio n. 4
0
 def fit_partial(self, rdoc_ids, rword_indices, window=5):
     doc_ids, word_indices = move(self.xp, rdoc_ids, rword_indices)
     pivot_idx = next(move(self.xp, rword_indices[window: -window]))
     pivot = F.embed_id(pivot_idx, self.sampler.W)
     doc_at_pivot = rdoc_ids[window: -window]
     doc = self.mixture(next(move(self.xp, doc_at_pivot)))
     loss = 0.0
     start, end = window, rword_indices.shape[0] - window
     context = (F.dropout(doc, self.dropout_ratio) +
                F.dropout(pivot, self.dropout_ratio))
     for frame in range(-window, window + 1):
         # Skip predicting the current pivot
         if frame == 0:
             continue
         # Predict word given context and pivot word
         # The target starts before the pivot
         targetidx = rword_indices[start + frame: end + frame]
         doc_at_target = rdoc_ids[start + frame: end + frame]
         doc_is_same = doc_at_target == doc_at_pivot
         rand = np.random.uniform(0, 1, doc_is_same.shape[0])
         mask = (rand > self.word_dropout_ratio).astype('bool')
         weight = np.logical_and(doc_is_same, mask).astype('int32')
         # If weight is 1.0 then targetidx
         # If weight is 0.0 then -1
         targetidx = targetidx * weight + -1 * (1 - weight)
         target, = move(self.xp, targetidx)
         loss = self.sampler(context, target)
         loss.backward()
     return loss.data
Esempio n. 5
0
 def forward(self, doc, wrd, window=5):
     doc, wrd = utils.move(self.xp, doc, wrd)
     proportions = self.proportions(doc)
     ld = dirichlet_likelihood(self.proportions.W)
     context = F.matmul(F.softmax(proportions), self.factors())
     loss = self.loss_func(context, wrd)
     return loss, ld
Esempio n. 6
0
 def forward(self, ids, bow):
     bow, ids = utils.move(self.xp, bow, ids)
     proportions = self.proportions(ids)
     ld = dirichlet_likelihood(proportions)
     doc = F.matmul(F.softmax(proportions), self.factors())
     logp = F.dropout(self.embedding(doc))
     # loss = -F.sum(bow * F.log_softmax(logp))
     sources, targets, counts = [], [], []
     lpi =  F.sum(bow * F.log_softmax(logp), axis=1)
     loss = -F.sum(lpi)
     return loss, ld
Esempio n. 7
0
 def forward(self, ids, bow):
     bow, ids = utils.move(self.xp, bow, ids)
     proportions = self.proportions(ids)
     ld = dirichlet_likelihood(proportions)
     doc = F.matmul(F.softmax(proportions), self.factors())
     logp = F.dropout(self.embedding(doc))
     # loss = -F.sum(bow * F.log_softmax(logp))
     sources, targets, counts = [], [], []
     lpi = F.sum(bow * F.log_softmax(logp), axis=1)
     loss = -F.sum(lpi)
     return loss, ld
Esempio n. 8
0
 def fit_partial(self, rdoc_ids, rword_indices, window=5,
                 update_words=False, update_topics=True):
 """ Function where all the training happens. Word vector training,
 topic vector training and the topic distribution is updated 
 """
 
     doc_ids, word_indices = move(self.xp, rdoc_ids, rword_indices)
     pivot_idx = next(move(self.xp, rword_indices[window: -window]))
     pivot = F.embed_id(pivot_idx, self.sampler.W)
     if not update_words:
         pivot.unchain_backward()
     doc_at_pivot = rdoc_ids[window: -window]
     doc = self.mixture(next(move(self.xp, doc_at_pivot)),
                        update_only_docs=not update_topics)
     loss = 0.0
     start, end = window, rword_indices.shape[0] - window
     context = (F.dropout(doc, self.dropout_ratio) +
                F.dropout(pivot, self.dropout_ratio))
     for frame in range(-window, window + 1):
         # Skip predicting the current pivot
         if frame == 0:
             continue
         # Predict word given context and pivot word
         # The target starts before the pivot
         targetidx = rword_indices[start + frame: end + frame]
         doc_at_target = rdoc_ids[start + frame: end + frame]
         doc_is_same = doc_at_target == doc_at_pivot
         rand = np.random.uniform(0, 1, doc_is_same.shape[0])
         mask = (rand > self.word_dropout_ratio).astype('bool')
         weight = np.logical_and(doc_is_same, mask).astype('int32')
         # If weight is 1.0 then targetidx
         # If weight is 0.0 then -1
         targetidx = targetidx * weight + -1 * (1 - weight)
         target, = move(self.xp, targetidx)
         loss = self.sampler(context, target)
         loss.backward()
         if not update_words:
             # Wipe out any gradient accumulation on word vectors
             self.sampler.W.grad *= 0.0
     return loss.data
Esempio n. 9
0
 def fit_partial(self, rsty_ids, raut_ids, rwrd_ids, window=5):
     doc_idx, usr_idx, wrd_idx = move(self.xp, rsty_ids, raut_ids, rwrd_ids)
     pivot = self.embed(next(move(self.xp, rwrd_ids[window: -window])))
     sty_at_pivot = rsty_ids[window: -window]
     aut_at_pivot = raut_ids[window: -window]
     sty = self.mixture_stories(next(move(self.xp, sty_at_pivot)))
     aut = self.mixture_authors(next(move(self.xp, aut_at_pivot)))
     start, end = window, rwrd_ids.shape[0] - window
     context = (F.dropout(sty, self.dropout_ratio) +
                F.dropout(aut, self.dropout_ratio) +
                F.dropout(pivot, self.dropout_ratio))
     n_frame = 2 * window
     # Precompute all neg samples since they're indep of frame
     size = context.data.shape[0]
     samples = self.sampler.sampler.sample((self.n_samples * n_frame, size))
     samples = chainer.cuda.cupy.split(samples.ravel(), n_frame)
     sources = []
     targets = []
     weights = []
     for frame in range(-window, window + 1):
         # Predict word given context and pivot word
         # The target starts before the pivot
         # Skip predicting the current pivot
         if frame == 0:
             continue
         # Here we're creating a weight mask. We don't want to
         # predict tokens that are outside this document or user
         # scope.
         wrd_at_target = rwrd_ids[start + frame: end + frame]
         sty_at_target = rsty_ids[start + frame: end + frame]
         aut_at_target = raut_ids[start + frame: end + frame]
         sty_is_same = sty_at_target == sty_at_pivot
         usr_is_same = aut_at_target == aut_at_pivot
         is_same = sty_is_same & usr_is_same
         weight, = move(self.xp, is_same.astype('float32'))
         target, = move(self.xp, wrd_at_target)
         sources.append(context)
         targets.append(target)
         weights.append(weight)
         sample, = move(self.xp, samples.pop())
         targets.append(sample)
         for _ in range(self.n_samples):
             # Note that the context is now negative
             sources.append(-context)
             weights.append(weight)
     sources = F.concat(sources, axis=0)
     targets = F.concat(targets, axis=0)
     weights = F.concat(weights, axis=0)
     loss = self.loss(sources, targets, weights)
     return loss
Esempio n. 10
0
 def fit_partial(self, rdoc_ids, rword_indices, window=5):
     doc_ids, word_indices = move(self.xp, rdoc_ids, rword_indices)
     pivot = self.embed(next(move(self.xp, rword_indices[window:-window])))
     doc_at_pivot = rdoc_ids[window:-window]
     doc = self.mixture(next(move(self.xp, doc_at_pivot)))
     loss = 0.0
     start, end = window, rword_indices.shape[0] - window
     context = (F.dropout(doc, self.dropout_ratio) +
                F.dropout(pivot, self.dropout_ratio))
     n_frame = 2 * window
     # Precompute all neg samples since they're indep of frame
     size = context.data.shape[0]
     samples = self.sampler.sampler.sample((self.n_samples * n_frame, size))
     samples = chainer.cuda.cupy.split(samples.ravel(), n_frame)
     sources = []
     targets = []
     weights = []
     for frame in range(-window, window + 1):
         # Skip predicting the current pivot
         if frame == 0:
             continue
         # Predict word given context and pivot word
         # The target starts before the pivot
         targetidx = rword_indices[start + frame:end + frame]
         doc_at_target = rdoc_ids[start + frame:end + frame]
         doc_is_same = doc_at_target == doc_at_pivot
         rand = np.random.uniform(0, 1, doc_is_same.shape[0])
         mask = (rand > self.word_dropout_ratio).astype('bool')
         weight = np.logical_and(doc_is_same, mask)
         weight, = move(self.xp, weight.astype('float32'))
         target, = move(self.xp, targetidx)
         sources.append(context)
         targets.append(target)
         weights.append(weight)
         sample, = move(self.xp, samples.pop())
         targets.append(sample)
         for _ in range(self.n_samples):
             # Note that the context is now negative
             sources.append(-context)
             weights.append(weight)
     sources = F.concat(sources, axis=0)
     targets = F.concat(targets, axis=0)
     weights = F.concat(weights, axis=0)
     loss = self.loss(sources, targets, weights)
     return loss
Esempio n. 11
0
 def fit_partial(self, rdoc_ids, rword_indices, window=5):
     doc_ids, word_indices = move(self.xp, rdoc_ids, rword_indices)
     pivot = self.embed(next(move(self.xp, rword_indices[window: -window])))
     doc_at_pivot = rdoc_ids[window: -window]
     doc = self.mixture(next(move(self.xp, doc_at_pivot)))
     loss = 0.0
     start, end = window, rword_indices.shape[0] - window
     context = (F.dropout(doc, self.dropout_ratio) +
                F.dropout(pivot, self.dropout_ratio))
     n_frame = 2 * window
     # Precompute all neg samples since they're indep of frame
     size = context.data.shape[0]
     samples = self.sampler.sampler.sample((self.n_samples * n_frame, size))
     samples = chainer.cuda.cupy.split(samples.ravel(), n_frame)
     sources = []
     targets = []
     weights = []
     for frame in range(-window, window + 1):
         # Skip predicting the current pivot
         if frame == 0:
             continue
         # Predict word given context and pivot word
         # The target starts before the pivot
         targetidx = rword_indices[start + frame: end + frame]
         doc_at_target = rdoc_ids[start + frame: end + frame]
         doc_is_same = doc_at_target == doc_at_pivot
         rand = np.random.uniform(0, 1, doc_is_same.shape[0])
         mask = (rand > self.word_dropout_ratio).astype('bool')
         weight = np.logical_and(doc_is_same, mask)
         weight, = move(self.xp, weight.astype('float32'))
         target, = move(self.xp, targetidx)
         sources.append(context)
         targets.append(target)
         weights.append(weight)
         sample, = move(self.xp, samples.pop())
         targets.append(sample)
         for _ in range(self.n_samples):
             # Note that the context is now negative
             sources.append(-context)
             weights.append(weight)
     sources = F.concat(sources, axis=0)
     targets = F.concat(targets, axis=0)
     weights = F.concat(weights, axis=0)
     loss = self.loss(sources, targets, weights)
     return loss
Esempio n. 12
0
    def fit_partial(self,
                    rdoc_ids,
                    rword_indices,
                    window=5,
                    update_only_docs=False,
                    word2vec_only=False,
                    update_only_docs_topics=False):
        """ Compact indices of chunk words, from flattened
            (Pdb) len(rword_indices) -> 4096, batch size
            (Pdb) rword_indices.max() -> 4874, max word compact # in this chunk

            The belonged document ids of chunk words: 1660, from doc_ids
            (Pdb) len(rdoc_ids) -> 4096, batch size
            (Pdb) rdoc_ids.max() -> 1660, max doc id in this chunk
        """

        if update_only_docs_topics:
            update_only_docs = False

        # Note that self.xp is module numpy. Function move uses following stmt
        # to convert both rdoc_ids and rword_indices as Chainer's Variable:
        # ---> yield Variable(xp.asarray(arg, dtype='float32'))
        #
        # so doc_ids and word_indices are just Variable wrapper of rdoc_ids
        # and rword_indices.
        # (Pdb) len(doc_ids.data) -> 4096
        # (Pdb) len(word_indices.data) -> 4096
        #
        # Note that doc_ids NOT IN USE
        doc_ids, word_indices = move(self.xp, rdoc_ids, rword_indices)

        # pivot_idx is Variable wrapper of rword_indices[window: -window]
        # (Pdb) len(pivot_idx.data) -> 4086, note that windows is 5
        pivot_idx = next(move(self.xp, rword_indices[window:-window]))

        # (Pdb) pivot.data.shape -> (4086, 300)
        # Again batchsize is 4096 while window size is (5, -5)
        pivot = F.embed_id(pivot_idx, self.sampler.W)

        # max word compact hash# < compacted vocabulary size (4891)
        assert pivot_idx.data.max() < self.sampler.W.shape[0]

        # Note that we meed to adjust word2vec from GoogleNews as we never
        # train word2vec using twenty_newgroups so that the context words prediction
        # not work well at the begining
        if update_only_docs or update_only_docs_topics:
            pivot.unchain_backward()

        # (Pdb) window -> 5
        # (Pdb) len(doc_at_pivot) -> 4086, 10 less than rdoc_ids
        # (Pdb) doc_at_pivot.max() -> 1660
        doc_at_pivot = rdoc_ids[window:-window]
        doc = self.mixture(next(move(self.xp, doc_at_pivot)),
                           update_only_docs=update_only_docs)
        if word2vec_only:
            doc.unchain_backward()
        loss = 0.0

        # (Pdb) start -> 5
        # (Pdb) rword_indices.shape[0] -> 4096
        # (Pdb) end -> 4091
        start, end = window, rword_indices.shape[0] - window

        # (Pdb) context.data.shape -> (4086, 300)
        if not update_only_docs_topics:
            context = (F.dropout(doc, self.dropout_ratio) +
                       F.dropout(pivot, self.dropout_ratio))
        else:
            context = F.dropout(doc, self.dropout_ratio)

        # from -5 to 5, that is:
        # With given context vector (pivot wordvec + doc-topic_vec), predicts
        # each target word in the window frame.
        # Note that we do this for all words in the whole batch size.
        for frame in tqdm(range(-window, window + 1)):
            # Skip predicting the current pivot
            if frame == 0 and not update_only_docs_topics:
                continue

            # Predict word given context and pivot word
            # The target starts before the pivot.
            #
            # Initial round:
            # (Pdb) start + frame -> 5 + -5 -> 0
            # (Pdb) end + frame -> 4091 + 5 -> 4086
            #
            # Word compact indices
            targetidx = rword_indices[start + frame:end + frame]

            # Word's document IDs
            doc_at_target = rdoc_ids[start + frame:end + frame]

            # Since we flatten everything: all words from all different documents
            # now in one array, we need to make sure we only predict words in the
            # same document.
            #
            # Note that doc_at_pivot is rdoc_ids[window/5: -window/4091],
            # And      doc_at_target is rdoc_ids[0: 4086] in the starting round
            #
            # (Pdb) doc_is_same -> array([ True,  True,  True, ...,  True,  True,  True])
            # (Pdb) len(doc_is_same) -> 4086
            doc_is_same = doc_at_target == doc_at_pivot

            # Generate <SKIP>, OOV mask
            mask_SKIP = targetidx != np.array([0])
            mask_OOV = targetidx != np.array([1])
            assert True in mask_SKIP and True in mask_OOV

            # Generate drop-out mask
            # (Pdb) rand -> array([0.7982769 , 0.12706805, 0.77982534, ..., 0.69266078])
            rand = np.random.uniform(0, 1, doc_is_same.shape[0])
            # (Pdb) mask -> array([ True,  True,  True, ...,  True,  True,  True])
            mask = (rand > self.word_dropout_ratio).astype('bool')

            # (Pdb) weight -> array([1, 1, 1, ..., 1, 1, 1], dtype=int32)
            weight = np.logical_and(doc_is_same, mask)
            weight = np.logical_and(weight, mask_SKIP)
            weight = np.logical_and(weight, mask_OOV).astype('int32')

            # targetindex = target word indices
            # If weight is 1.0 then targetidx
            # If weight is 0.0 then -1, <SKIP>? => compact index 0
            # (Pdb) targetidx -> array([  28,    9, 2094, ...,   16, 1357,   16])
            #
            # Note that this is skip-gram, from pivot word -> target context words
            # See NegativeSampling below for ignore label -1.
            chainer_nce_ignore_label = -1
            targetidx = targetidx * weight + chainer_nce_ignore_label * (
                1 - weight)
            target, = move(self.xp, targetidx)

            # context, word_vec + docu-topic_vec, -> target words in context
            #
            # (Pdb) context.shape -> (4086, 300), dtype('float32')
            # (Pdb) weight.shape -> (4086,), dtype('int32')
            # (Pdb) targetidx.shape -> (4086,), dtype('int64')
            # (Pdb) target.shape -> (4086,), dtype('int32')
            # (Pdb) pivot_idx.shape -> (4086,), dtype('int32')
            # (Pdb) pivot.shape -> (4086, 300), dtype('float32')
            #
            # REF
            # self.sampler.__call__ =
            # negative_sampling.negative_sampling(
            #       x, t, self.W, self.sampler.sample, self.sample_size,
            #       reduce='sum')
            # here:
            # context -> x (~chainer.Variable): Input of the weight matrix multiplication.
            # target -> t (~chainer.Variable): Batch of ground truth labels.
            # GoogleNews Embedding -> self.sampler.W.data
            # L.NegativeSampling -> sampler
            #
            # returns loss value, sum of all losses on the whole batchsize data.
            #
            # Source (https://github.com/chainer/chainer/blob/v3.4.0/chainer/functions/loss/negative_sampling.py#L315)
            # NegativeSamplingFunction(function_node.FunctionNode):
            #       ignore_label = -1
            #       target as t -- self.sampler.W --> w
            #       context as x OP w --> loss
            # note that (Pdb) self.sampler.W.data.shape -> (4891, 300)
            #
            # DEBUG
            # b chainer/functions/loss/negative_sampling.py:48
            loss = self.sampler(context, target)
            loss.backward()

            if update_only_docs or update_only_docs_topics:
                # Wipe out any gradient accumulation on word vectors
                # self.sampler.W.grad *= 0.0
                self.sampler.W.cleargrad()
            if word2vec_only and self.mixture.weights.W.grad is not None:
                assert self.mixture.weights.W.grad.min() == 0.0
                assert self.mixture.weights.W.grad.max() == 0.0
            if word2vec_only and self.mixture.factors.W.grad is not None:
                assert self.mixture.factors.W.grad.min() == 0.0
                assert self.mixture.factors.W.grad.max() == 0.0

        return loss.data
Esempio n. 13
0
 def observe(self, bow):
     bow, = utils.move(self.xp, bow * 1.0)
     sample, kl = self.encode(bow)
     rec = self.decode(sample, bow)
     return rec, kl