Exemple #1
0
    def test_mask_t(self):
        batsize = 5
        lstm = q.LSTMCell(9, 10)
        x_t = torch.randn(batsize, 9)
        mask_t = torch.tensor([1, 1, 0, 1, 0])
        c_tm1 = torch.randn(1, 10)
        h_tm1 = torch.randn(1, 10)
        lstm.c_0 = q.val(c_tm1).v
        lstm.y_0 = q.val(h_tm1).v
        y_t = lstm(x_t, mask_t=mask_t)
        self.assertEqual((batsize, 10), y_t.detach().numpy().shape)

        self.assertTrue(
            np.allclose(h_tm1[0].detach().numpy(), y_t[2].detach().numpy()))
        self.assertFalse(
            np.allclose(h_tm1[0].detach().numpy(), y_t[1].detach().numpy()))
        self.assertTrue(
            np.allclose(h_tm1[0].detach().numpy(), y_t[4].detach().numpy()))

        self.assertTrue(
            np.allclose(h_tm1[0].detach().numpy(),
                        lstm.y_tm1[2].detach().numpy()))
        self.assertFalse(
            np.allclose(h_tm1[0].detach().numpy(),
                        lstm.y_tm1[1].detach().numpy()))
        self.assertTrue(
            np.allclose(h_tm1[0].detach().numpy(),
                        lstm.y_tm1[4].detach().numpy()))
Exemple #2
0
    def test_zoneout(self):
        batsize = 5
        lstm = q.LSTMCell(9, 10, zoneout=0.5)
        x_t = torch.randn(batsize, 9)
        c_tm1 = torch.randn(1, 10)
        y_tm1 = torch.randn(1, 10)
        lstm.c_0 = q.val(c_tm1).v
        lstm.y_0 = q.val(y_tm1).v
        y_t = lstm(x_t)
        self.assertEqual((5, 10), y_t.detach().numpy().shape)

        self.assertEqual(lstm.training, True)
        lstm.train(False)
        self.assertEqual(lstm.training, False)

        lstm.rec_reset()
        pred1 = lstm(x_t)
        lstm.rec_reset()
        pred2 = lstm(x_t)

        self.assertTrue(
            np.allclose(pred1.detach().numpy(),
                        pred2.detach().numpy()))

        lstm.train(True)
        self.assertEqual(lstm.training, True)

        lstm.rec_reset()
        pred1 = lstm(x_t)
        lstm.rec_reset()
        pred2 = lstm(x_t)

        self.assertFalse(
            np.allclose(pred1.detach().numpy(),
                        pred2.detach().numpy()))
    def __init__(self,
                 outdic,
                 gen_out,
                 inpdic=None,
                 gen_zero=None,
                 gen_outD=None,
                 **kw):
        """
                :param outdic:          output dictionary, must contain all tokens in inpdic and gen_out.D
                :param gen_prob_comp:   module to compute probability of generating vs pointing
                                        must produce (batsize, 1) shapes
                :param gen_out:         module to compute generation scores.
                                            must have a dictionary accessible as ".D".
                                            must produce unnormalized scores (no softmax)
                :param inpdic:          input dictionary (for pointer)
                :param gen_zero:        None or set of tokens for which the gen_out's prob will be set to zero.
                                        All tokens should occur in inpdic (or their score will always be zero)
                :param gen_outD:        if set, gen_out must not have a ".D"
                :param kw:
                """
        super(PointerGeneratorOut, self).__init__(**kw)
        self.gen_out = gen_out
        self.D = outdic
        self.gen_outD = self.gen_out.D if gen_outD is None else gen_outD
        self.outsize = max(outdic.values()) + 1
        self.gen_to_out = q.val(
            torch.zeros(1, max(self.gen_outD.values()) + 1,
                        dtype=torch.int64)).v
        # --> where in out to scatter every element of the gen
        self.gen_zero_mask = None if gen_zero is None else \
            q.val(torch.ones_like(self.gen_to_out, dtype=torch.float32)).v
        # (1, genvocsize), integer ids in outvoc, one-to-one mapping
        # if symbol in gendic is not in outdic, throws error
        for k, v in self.gen_outD.items():
            if k in outdic:
                self.gen_to_out[0, v] = outdic[k]
                if gen_zero is not None:
                    if k in gen_zero:
                        self.gen_zero_mask[0, v] = 0
            else:
                raise q.SumTingWongException(
                    "symbols in gen_outD must be in outdic, but \"{}\" isn't".
                    format(k))

        self.inp_to_out = q.val(
            torch.zeros(max(inpdic.values()) + 1, dtype=torch.int64)).v
        # --> where in out to scatter every element of the inp
        # (1, inpvocsize), integer ids in outvoc, one-to-one mapping
        # if symbol in inpdic is not in outdic, throws error
        for k, v in inpdic.items():
            if k in outdic:
                self.inp_to_out[v] = outdic[k]
            else:
                raise q.SumTingWongException(
                    "symbols in inpdic must be in outdic, but \"{}\" isn't".
                    format(k))
        self.sm = torch.nn.Softmax(-1)
        self._reset()
        self.check()
Exemple #4
0
    def test_mask_t(self):
        batsize = 5
        gru = q.GRUCell(9, 10)
        x_t = torch.randn(batsize, 9)
        mask_t = torch.tensor([1, 1, 0, 1, 0])
        h_tm1 = torch.randn(1, 10)
        gru.h_0 = q.val(h_tm1).v
        y_t = gru(x_t, mask_t=mask_t)
        self.assertEqual((batsize, 10), y_t.detach().numpy().shape)

        self.assertTrue(
            np.allclose(h_tm1[0].detach().numpy(), y_t[2].detach().numpy()))
        self.assertFalse(
            np.allclose(h_tm1[0].detach().numpy(), y_t[1].detach().numpy()))
        self.assertTrue(
            np.allclose(h_tm1[0].detach().numpy(), y_t[4].detach().numpy()))

        self.assertTrue(
            np.allclose(h_tm1[0].detach().numpy(),
                        gru.h_tm1[2].detach().numpy()))
        self.assertFalse(
            np.allclose(h_tm1[0].detach().numpy(),
                        gru.h_tm1[1].detach().numpy()))
        self.assertTrue(
            np.allclose(h_tm1[0].detach().numpy(),
                        gru.h_tm1[4].detach().numpy()))
Exemple #5
0
    def test_zoneout(self):
        batsize = 5
        gru = q.GRUCell(9, 10, zoneout=0.5)
        x_t = torch.randn(batsize, 9)
        h_tm1 = torch.randn(1, 10)
        gru.h_0 = q.val(h_tm1).v
        y_t = gru(x_t)
        self.assertEqual((5, 10), y_t.detach().numpy().shape)

        self.assertEqual(gru.training, True)
        gru.train(False)
        self.assertEqual(gru.training, False)

        gru.rec_reset()
        pred1 = gru(x_t)
        gru.rec_reset()
        pred2 = gru(x_t)

        self.assertTrue(
            np.allclose(pred1.detach().numpy(),
                        pred2.detach().numpy()))

        gru.train(True)
        self.assertEqual(gru.training, True)

        gru.rec_reset()
        pred1 = gru(x_t)
        gru.rec_reset()
        pred2 = gru(x_t)

        self.assertFalse(
            np.allclose(pred1.detach().numpy(),
                        pred2.detach().numpy()))
Exemple #6
0
    def test_lstm_shapes(self):
        batsize = 5
        lstm = q.LSTMCell(9, 10)
        x_t = torch.randn(batsize, 9)
        c_tm1 = torch.randn(1, 10)
        y_tm1 = torch.randn(1, 10)
        lstm.c_0 = q.val(c_tm1).v
        lstm.y_0 = q.val(y_tm1).v

        y_t = lstm(x_t)
        self.assertEqual((5, 10), y_t.detach().numpy().shape)

        self.assertTrue(
            np.allclose(lstm.y_tm1.detach().numpy(),
                        y_t.detach().numpy()))

        q.rec_reset(lstm)
Exemple #7
0
 def test_gru_shapes(self):
     batsize = 5
     gru = q.GRUCell(9, 10)
     x_t = torch.randn(batsize, 9)
     h_tm1 = torch.randn(1, 10)
     gru.h_0 = q.val(h_tm1).v
     y_t = gru(x_t)
     self.assertEqual((batsize, 10), y_t.detach().numpy().shape)
Exemple #8
0
    def __init__(self, wordlinout, wdic, **kw):
        D = wordlinout.D
        # assert (self.raretoken in D)  # must have rareid in D to map extra words to it
        # assert(wordlinout.raretoken in wdic)
        super(AdaptedWordLinout, self).__init__(wdic, **kw)
        self.inner = wordlinout

        self.cosnorm = wordlinout.cosnorm

        rareid_new2old = D[
            wordlinout.raretoken] if wordlinout.raretoken in D else 0
        rareid_old2new = wdic[self.raretoken] if self.raretoken in wdic else 0

        self.new_to_old_d = {
            v: D[k] if k in D else rareid_new2old
            for k, v in wdic.items()
        }
        # mapping from new indexes (wdic) to old indexes (wordlinout)
        self.old_to_new_d = {
            v: wdic[k] if k in wdic else rareid_old2new
            for k, v in D.items()
        }
        # mapping from old indexes (wordlinout) to new indexes (wdic)

        numnew = max(self.new_to_old_d.keys()) + 1
        numold = max(self.old_to_new_d.keys()) + 1

        new_to_old = np.zeros((numnew, ), dtype="int64")
        for i in range(new_to_old.shape[0]):
            j = self.new_to_old_d[
                i] if i in self.new_to_old_d else rareid_new2old
            new_to_old[i] = j
        self.new_to_old = q.val(
            new_to_old).v  # for every new dic word id, contains old dic id
        # index in new dic contains idx value of old dic
        # --> used to slice from matrix in old idxs to get matrix in new idxs

        old_to_new = np.zeros((numold, ), dtype="int64")
        for i in range(old_to_new.shape[0]):
            j = self.old_to_new_d[
                i] if i in self.old_to_new_d else rareid_old2new
            old_to_new[i] = j
        self.old_to_new = q.val(
            old_to_new).v  # for every old dic word id, contains new dic id
Exemple #9
0
    def __init__(self,
                 dim=50,
                 worddic=None,
                 keepvanilla=None,
                 path=None,
                 gradfracs=(1., 1.),
                 **kw):
        """
        :param dim:         embedding dimension
        :param worddic:     which words to create embeddings for, must map from strings to ids
        :param keepvanilla: set of words which will be kept in the vanilla set of vectors
                            even if they occur in pretrained embeddings
        :param path:        where to load pretrained word from
        :param gradfracs:   tuple (vanilla_frac, pretrained_frac)
        :param kw:
        """
        super(PartiallyPretrainedWordEmb, self).__init__(dim=dim,
                                                         worddic=worddic,
                                                         **kw)
        path = self._get_path(dim, path=path)
        value, wdic = self.loadvalue(path,
                                     dim,
                                     indim=None,
                                     worddic=None,
                                     maskid=None,
                                     rareid=None)
        value = torch.tensor(value)
        self.mixmask = q.val(np.zeros((len(self.D), ), dtype="float32")).v

        for k, v in self.D.items():
            if k in wdic and (keepvanilla is None or k not in keepvanilla):
                self.embedding.weight[v, :] = value[wdic[k], :]
                self.mixmask[v] = 1

        self.embedding.weight = torch.nn.Parameter(self.embedding.weight)

        self.gradfrac_vanilla, self.gradfrac_pretrained = gradfracs

        def apply_gradfrac(grad):
            if self.gradfrac_vanilla != 1.:
                grad = grad * (
                    (1 - self.mixmask.unsqueeze(1)) *
                    q.v(self.gradfrac_vanilla) + self.mixmask.unsqueeze(1))
            if self.gradfrac_pretrained != 1.:
                grad = grad * (
                    self.mixmask.unsqueeze(1) * q.v(self.gradfrac_pretrained) +
                    (1 - self.mixmask.unsqueeze(1)))
            return grad

        self.embedding.weight.register_hook(apply_gradfrac)
Exemple #10
0
    def __init__(self, wordemb, wdic, **kw):
        D = wordemb.D
        # assert(wordemb.raretoken in D)     # must have rareid in D to map extra words to it
        super(AdaptedWordEmb, self).__init__(wdic, **kw)
        self.inner = wordemb

        rareid = D[wordemb.raretoken] if wordemb.raretoken in D else 0

        # maps all idx from wdic (new) to idx in wordemb.D (old)
        # maps words from wdic (new) that are missing in wordemb.D (old)
        #   to wordemb.D's rare id

        self.ad = {v: D[k] if k in D else rareid for k, v in wdic.items()}

        valval = np.ones((max(self.ad.keys()) + 1, ), dtype="int64")
        for i in range(valval.shape[0]):
            valval[i] = self.ad[i] if i in self.ad else rareid
        self.adb = q.val(valval).v
Exemple #11
0
    def __init__(self, base, override, which=None, whichnot=None, **kw):
        super(OverriddenWordVecBase, self).__init__(base.D)
        self.base = base
        self.over = override.adapt(base.D)
        self.vecdim = self.base.vecdim
        assert (not (which is not None and whichnot is not None))
        numout = max(base.D.values()) + 1
        whichnot = set()

        overridemask_val = np.zeros((numout, ), dtype="float32")
        if which is None:  # which: list of words to override
            for k, v in base.D.items():  # for all symbols in base dic
                if k in override.D and k not in whichnot:  # if also in override dic
                    overridemask_val[v] = 1
        else:
            for k in which:
                if k in override.D:  # TODO: if k from which is missing from base.D
                    overridemask_val[base.D[k]] = 1
        self.overridemask = q.val(overridemask_val).v
Exemple #12
0
    def __init__(self,
                 data=None,
                 computer=None,
                 worddic=None,
                 bias=False,
                 cosnorm=False):
        """
        WordLinout that computes the weight matrix of the Linear transformation dynamically
        based on provided data and computer.
        :param data:    numpy array, 2D or more, one symbol data per row. Automatically wrapped so watch the dtype
        :param computer: module that builds vectors for rows of data
        :param worddic: token dictionary from token to id
        :param bias: (optional) use bias (not computed)
        """
        super(ComputedWordLinout, self).__init__(worddic)
        self.data = q.val(torch.tensor(data)).v
        self.computer = computer
        # TODO: batches for computer???

        wdvals = list(worddic.values())
        assert (min(wdvals) >= 0)  # word ids must be positive

        # extract maskid and rareid from worddic
        maskid = worddic[self.masktoken] if self.masktoken in worddic else None
        rareid = worddic[self.raretoken] if self.raretoken in worddic else None

        self.outdim = max(worddic.values()) + 1
        self.cosnorm = cosnorm
        if cosnorm and bias:
            print("disabling bias because cosnorm")
            bias = False
        if bias:
            self.bias = nn.Parameter(torch.FloatTensor(self.outdim))
        else:
            self.register_parameter("bias", None)
        self.reset_parameters()
        self.base_weight = None  # zero weight