def test_mask_t(self): batsize = 5 lstm = q.LSTMCell(9, 10) x_t = torch.randn(batsize, 9) mask_t = torch.tensor([1, 1, 0, 1, 0]) c_tm1 = torch.randn(1, 10) h_tm1 = torch.randn(1, 10) lstm.c_0 = q.val(c_tm1).v lstm.y_0 = q.val(h_tm1).v y_t = lstm(x_t, mask_t=mask_t) self.assertEqual((batsize, 10), y_t.detach().numpy().shape) self.assertTrue( np.allclose(h_tm1[0].detach().numpy(), y_t[2].detach().numpy())) self.assertFalse( np.allclose(h_tm1[0].detach().numpy(), y_t[1].detach().numpy())) self.assertTrue( np.allclose(h_tm1[0].detach().numpy(), y_t[4].detach().numpy())) self.assertTrue( np.allclose(h_tm1[0].detach().numpy(), lstm.y_tm1[2].detach().numpy())) self.assertFalse( np.allclose(h_tm1[0].detach().numpy(), lstm.y_tm1[1].detach().numpy())) self.assertTrue( np.allclose(h_tm1[0].detach().numpy(), lstm.y_tm1[4].detach().numpy()))
def test_zoneout(self): batsize = 5 lstm = q.LSTMCell(9, 10, zoneout=0.5) x_t = torch.randn(batsize, 9) c_tm1 = torch.randn(1, 10) y_tm1 = torch.randn(1, 10) lstm.c_0 = q.val(c_tm1).v lstm.y_0 = q.val(y_tm1).v y_t = lstm(x_t) self.assertEqual((5, 10), y_t.detach().numpy().shape) self.assertEqual(lstm.training, True) lstm.train(False) self.assertEqual(lstm.training, False) lstm.rec_reset() pred1 = lstm(x_t) lstm.rec_reset() pred2 = lstm(x_t) self.assertTrue( np.allclose(pred1.detach().numpy(), pred2.detach().numpy())) lstm.train(True) self.assertEqual(lstm.training, True) lstm.rec_reset() pred1 = lstm(x_t) lstm.rec_reset() pred2 = lstm(x_t) self.assertFalse( np.allclose(pred1.detach().numpy(), pred2.detach().numpy()))
def __init__(self, outdic, gen_out, inpdic=None, gen_zero=None, gen_outD=None, **kw): """ :param outdic: output dictionary, must contain all tokens in inpdic and gen_out.D :param gen_prob_comp: module to compute probability of generating vs pointing must produce (batsize, 1) shapes :param gen_out: module to compute generation scores. must have a dictionary accessible as ".D". must produce unnormalized scores (no softmax) :param inpdic: input dictionary (for pointer) :param gen_zero: None or set of tokens for which the gen_out's prob will be set to zero. All tokens should occur in inpdic (or their score will always be zero) :param gen_outD: if set, gen_out must not have a ".D" :param kw: """ super(PointerGeneratorOut, self).__init__(**kw) self.gen_out = gen_out self.D = outdic self.gen_outD = self.gen_out.D if gen_outD is None else gen_outD self.outsize = max(outdic.values()) + 1 self.gen_to_out = q.val( torch.zeros(1, max(self.gen_outD.values()) + 1, dtype=torch.int64)).v # --> where in out to scatter every element of the gen self.gen_zero_mask = None if gen_zero is None else \ q.val(torch.ones_like(self.gen_to_out, dtype=torch.float32)).v # (1, genvocsize), integer ids in outvoc, one-to-one mapping # if symbol in gendic is not in outdic, throws error for k, v in self.gen_outD.items(): if k in outdic: self.gen_to_out[0, v] = outdic[k] if gen_zero is not None: if k in gen_zero: self.gen_zero_mask[0, v] = 0 else: raise q.SumTingWongException( "symbols in gen_outD must be in outdic, but \"{}\" isn't". format(k)) self.inp_to_out = q.val( torch.zeros(max(inpdic.values()) + 1, dtype=torch.int64)).v # --> where in out to scatter every element of the inp # (1, inpvocsize), integer ids in outvoc, one-to-one mapping # if symbol in inpdic is not in outdic, throws error for k, v in inpdic.items(): if k in outdic: self.inp_to_out[v] = outdic[k] else: raise q.SumTingWongException( "symbols in inpdic must be in outdic, but \"{}\" isn't". format(k)) self.sm = torch.nn.Softmax(-1) self._reset() self.check()
def test_mask_t(self): batsize = 5 gru = q.GRUCell(9, 10) x_t = torch.randn(batsize, 9) mask_t = torch.tensor([1, 1, 0, 1, 0]) h_tm1 = torch.randn(1, 10) gru.h_0 = q.val(h_tm1).v y_t = gru(x_t, mask_t=mask_t) self.assertEqual((batsize, 10), y_t.detach().numpy().shape) self.assertTrue( np.allclose(h_tm1[0].detach().numpy(), y_t[2].detach().numpy())) self.assertFalse( np.allclose(h_tm1[0].detach().numpy(), y_t[1].detach().numpy())) self.assertTrue( np.allclose(h_tm1[0].detach().numpy(), y_t[4].detach().numpy())) self.assertTrue( np.allclose(h_tm1[0].detach().numpy(), gru.h_tm1[2].detach().numpy())) self.assertFalse( np.allclose(h_tm1[0].detach().numpy(), gru.h_tm1[1].detach().numpy())) self.assertTrue( np.allclose(h_tm1[0].detach().numpy(), gru.h_tm1[4].detach().numpy()))
def test_zoneout(self): batsize = 5 gru = q.GRUCell(9, 10, zoneout=0.5) x_t = torch.randn(batsize, 9) h_tm1 = torch.randn(1, 10) gru.h_0 = q.val(h_tm1).v y_t = gru(x_t) self.assertEqual((5, 10), y_t.detach().numpy().shape) self.assertEqual(gru.training, True) gru.train(False) self.assertEqual(gru.training, False) gru.rec_reset() pred1 = gru(x_t) gru.rec_reset() pred2 = gru(x_t) self.assertTrue( np.allclose(pred1.detach().numpy(), pred2.detach().numpy())) gru.train(True) self.assertEqual(gru.training, True) gru.rec_reset() pred1 = gru(x_t) gru.rec_reset() pred2 = gru(x_t) self.assertFalse( np.allclose(pred1.detach().numpy(), pred2.detach().numpy()))
def test_lstm_shapes(self): batsize = 5 lstm = q.LSTMCell(9, 10) x_t = torch.randn(batsize, 9) c_tm1 = torch.randn(1, 10) y_tm1 = torch.randn(1, 10) lstm.c_0 = q.val(c_tm1).v lstm.y_0 = q.val(y_tm1).v y_t = lstm(x_t) self.assertEqual((5, 10), y_t.detach().numpy().shape) self.assertTrue( np.allclose(lstm.y_tm1.detach().numpy(), y_t.detach().numpy())) q.rec_reset(lstm)
def test_gru_shapes(self): batsize = 5 gru = q.GRUCell(9, 10) x_t = torch.randn(batsize, 9) h_tm1 = torch.randn(1, 10) gru.h_0 = q.val(h_tm1).v y_t = gru(x_t) self.assertEqual((batsize, 10), y_t.detach().numpy().shape)
def __init__(self, wordlinout, wdic, **kw): D = wordlinout.D # assert (self.raretoken in D) # must have rareid in D to map extra words to it # assert(wordlinout.raretoken in wdic) super(AdaptedWordLinout, self).__init__(wdic, **kw) self.inner = wordlinout self.cosnorm = wordlinout.cosnorm rareid_new2old = D[ wordlinout.raretoken] if wordlinout.raretoken in D else 0 rareid_old2new = wdic[self.raretoken] if self.raretoken in wdic else 0 self.new_to_old_d = { v: D[k] if k in D else rareid_new2old for k, v in wdic.items() } # mapping from new indexes (wdic) to old indexes (wordlinout) self.old_to_new_d = { v: wdic[k] if k in wdic else rareid_old2new for k, v in D.items() } # mapping from old indexes (wordlinout) to new indexes (wdic) numnew = max(self.new_to_old_d.keys()) + 1 numold = max(self.old_to_new_d.keys()) + 1 new_to_old = np.zeros((numnew, ), dtype="int64") for i in range(new_to_old.shape[0]): j = self.new_to_old_d[ i] if i in self.new_to_old_d else rareid_new2old new_to_old[i] = j self.new_to_old = q.val( new_to_old).v # for every new dic word id, contains old dic id # index in new dic contains idx value of old dic # --> used to slice from matrix in old idxs to get matrix in new idxs old_to_new = np.zeros((numold, ), dtype="int64") for i in range(old_to_new.shape[0]): j = self.old_to_new_d[ i] if i in self.old_to_new_d else rareid_old2new old_to_new[i] = j self.old_to_new = q.val( old_to_new).v # for every old dic word id, contains new dic id
def __init__(self, dim=50, worddic=None, keepvanilla=None, path=None, gradfracs=(1., 1.), **kw): """ :param dim: embedding dimension :param worddic: which words to create embeddings for, must map from strings to ids :param keepvanilla: set of words which will be kept in the vanilla set of vectors even if they occur in pretrained embeddings :param path: where to load pretrained word from :param gradfracs: tuple (vanilla_frac, pretrained_frac) :param kw: """ super(PartiallyPretrainedWordEmb, self).__init__(dim=dim, worddic=worddic, **kw) path = self._get_path(dim, path=path) value, wdic = self.loadvalue(path, dim, indim=None, worddic=None, maskid=None, rareid=None) value = torch.tensor(value) self.mixmask = q.val(np.zeros((len(self.D), ), dtype="float32")).v for k, v in self.D.items(): if k in wdic and (keepvanilla is None or k not in keepvanilla): self.embedding.weight[v, :] = value[wdic[k], :] self.mixmask[v] = 1 self.embedding.weight = torch.nn.Parameter(self.embedding.weight) self.gradfrac_vanilla, self.gradfrac_pretrained = gradfracs def apply_gradfrac(grad): if self.gradfrac_vanilla != 1.: grad = grad * ( (1 - self.mixmask.unsqueeze(1)) * q.v(self.gradfrac_vanilla) + self.mixmask.unsqueeze(1)) if self.gradfrac_pretrained != 1.: grad = grad * ( self.mixmask.unsqueeze(1) * q.v(self.gradfrac_pretrained) + (1 - self.mixmask.unsqueeze(1))) return grad self.embedding.weight.register_hook(apply_gradfrac)
def __init__(self, wordemb, wdic, **kw): D = wordemb.D # assert(wordemb.raretoken in D) # must have rareid in D to map extra words to it super(AdaptedWordEmb, self).__init__(wdic, **kw) self.inner = wordemb rareid = D[wordemb.raretoken] if wordemb.raretoken in D else 0 # maps all idx from wdic (new) to idx in wordemb.D (old) # maps words from wdic (new) that are missing in wordemb.D (old) # to wordemb.D's rare id self.ad = {v: D[k] if k in D else rareid for k, v in wdic.items()} valval = np.ones((max(self.ad.keys()) + 1, ), dtype="int64") for i in range(valval.shape[0]): valval[i] = self.ad[i] if i in self.ad else rareid self.adb = q.val(valval).v
def __init__(self, base, override, which=None, whichnot=None, **kw): super(OverriddenWordVecBase, self).__init__(base.D) self.base = base self.over = override.adapt(base.D) self.vecdim = self.base.vecdim assert (not (which is not None and whichnot is not None)) numout = max(base.D.values()) + 1 whichnot = set() overridemask_val = np.zeros((numout, ), dtype="float32") if which is None: # which: list of words to override for k, v in base.D.items(): # for all symbols in base dic if k in override.D and k not in whichnot: # if also in override dic overridemask_val[v] = 1 else: for k in which: if k in override.D: # TODO: if k from which is missing from base.D overridemask_val[base.D[k]] = 1 self.overridemask = q.val(overridemask_val).v
def __init__(self, data=None, computer=None, worddic=None, bias=False, cosnorm=False): """ WordLinout that computes the weight matrix of the Linear transformation dynamically based on provided data and computer. :param data: numpy array, 2D or more, one symbol data per row. Automatically wrapped so watch the dtype :param computer: module that builds vectors for rows of data :param worddic: token dictionary from token to id :param bias: (optional) use bias (not computed) """ super(ComputedWordLinout, self).__init__(worddic) self.data = q.val(torch.tensor(data)).v self.computer = computer # TODO: batches for computer??? wdvals = list(worddic.values()) assert (min(wdvals) >= 0) # word ids must be positive # extract maskid and rareid from worddic maskid = worddic[self.masktoken] if self.masktoken in worddic else None rareid = worddic[self.raretoken] if self.raretoken in worddic else None self.outdim = max(worddic.values()) + 1 self.cosnorm = cosnorm if cosnorm and bias: print("disabling bias because cosnorm") bias = False if bias: self.bias = nn.Parameter(torch.FloatTensor(self.outdim)) else: self.register_parameter("bias", None) self.reset_parameters() self.base_weight = None # zero weight