def setUp(self): vocsize = 10 innerdim = 50 encdim = 30 seqlen = 5 batsize = 77 self.att = Attention( LinearSumAttentionGenerator(indim=innerdim + encdim), WeightedSumAttCon()) self.decwatt = SeqDecoder([ IdxToOneHot(vocsize), GRU(dim=vocsize + encdim, innerdim=innerdim) ], inconcat=True, attention=self.att, innerdim=innerdim) self.decwoatt = SeqDecoder([ IdxToOneHot(vocsize), GRU(dim=vocsize + encdim, innerdim=innerdim) ], inconcat=True, innerdim=innerdim) self.attdata = np.random.random( (batsize, seqlen, encdim)).astype("float32") self.data = np.random.random((batsize, encdim)).astype("float32") self.seqdata = np.random.randint(0, vocsize, (batsize, seqlen)) self.predshape = (batsize, seqlen, vocsize)
def __init__(self, innerdim=50, input_vocsize=100, output_vocsize=100, **kw): super(SimpleEncoderDecoder, self).__init__(**kw) input_embedder = IdxToOneHot(input_vocsize) output_embedder = IdxToOneHot(output_vocsize) encrec = GRU(dim=input_vocsize, innerdim=innerdim) decrecrnu = GRU(dim=output_vocsize, innerdim=innerdim) self.enc = SeqEncoder(input_embedder, encrec) self.dec = SeqDecoder([output_embedder, decrecrnu], outconcat=True, inconcat=False, innerdim=innerdim+innerdim)
def __init__(self, vocsize=25, outvocsize=25, encdim=200, innerdim=200, attdim=50, **kw): super(RewAttSumDecoder, self).__init__(**kw) self.rnn = SeqEncoder(IdxToOneHot(vocsize), GRU(dim=vocsize, innerdim=encdim)).all_outputs attgen = LinearGateAttentionGenerator(indim=innerdim+encdim, innerdim=attdim) attcon = WeightedSumAttCon() self.dec = SeqDecoder([IdxToOneHot(outvocsize), GRU(dim=outvocsize+encdim, innerdim=innerdim)], inconcat=True, attention=Attention(attgen, attcon), innerdim=innerdim)
def __init__(self, vocsize=25, outvocsize=20, encdim=200, innerdim=200, attdim=50, **kw): super(FwdAttRNNEncDecoder, self).__init__(**kw) self.emb = IdxToOneHot(vocsize) attgen = LinearGateAttentionGenerator(indim=innerdim+vocsize, innerdim=attdim) attcon = SeqEncoder(None, GRU(dim=vocsize, innerdim=encdim)) self.dec = SeqDecoder([IdxToOneHot(outvocsize), GRU(dim=outvocsize, innerdim=innerdim)], outconcat=True, inconcat=False, attention=Attention(attgen, attcon), innerdim=innerdim+encdim)
def __init__(self, vocsize=25, encdim=200, innerdim=200, seqlen=50, **kw): super(RNNAutoEncoder, self).__init__(**kw) self.seqlen = seqlen self.encoder = SeqEncoder( IdxToOneHot(vocsize=vocsize), GRU(dim=vocsize, innerdim=encdim)) self.decoder = SeqDecoder([IdxToOneHot(vocsize), GRU(dim=vocsize+encdim, innerdim=innerdim)], innerdim=innerdim, inconcat=True )
def __init__(self, indim=400, inpembdim=50, inpemb=None, mode="concat", innerdim=100, numouts=1, maskid=0, bidir=False, maskmode=MaskMode.NONE, **kw): super(SimpleSeq2MultiVec, self).__init__(**kw) if inpemb is None: if inpembdim is None: inpemb = IdxToOneHot(indim) inpembdim = indim else: inpemb = VectorEmbed(indim=indim, dim=inpembdim) elif inpemb is False: inpemb = None else: inpembdim = inpemb.outdim if not issequence(innerdim): innerdim = [innerdim] innerdim[-1] += numouts rnn, lastdim = self.makernu(inpembdim, innerdim, bidir=bidir) self.outdim = lastdim * numouts self.maskid = maskid self.inpemb = inpemb self.numouts = numouts self.mode = mode if not issequence(rnn): rnn = [rnn] self.enc = SeqEncoder(inpemb, *rnn).maskoptions(maskid, maskmode) self.enc.all_outputs()
def __init__(self, indim=400, inpembdim=50, inpemb=None, innerdim=100, maskid=None, bidir=False, **kw): if inpemb is None: if inpembdim is None: inpemb = IdxToOneHot(indim) inpembdim = indim else: inpemb = VectorEmbed(indim=indim, dim=inpembdim) else: inpembdim = inpemb.outdim lastdim = inpembdim if not issequence(innerdim): # single encoder innerdim = [innerdim] rnns = [] for innerdimi in innerdim: if not issequence(innerdimi): # one layer in encoder innerdimi = [innerdimi] rnn, lastdim = MakeRNU.make(lastdim, innerdimi, bidir=bidir) rnns.append(rnn) self.outdim = lastdim super(SimpleSeqStar2Vec, self).__init__(inpemb, *rnns, maskid=maskid, **kw)
def __init__(self, indim=400, inpembdim=50, inpemb=None, innerdim=100, maskid=0, bidir=False, pool=False, **kw): if inpemb is False: inpemb = None elif inpemb is None: if inpembdim is None: inpemb = IdxToOneHot(indim) inpembdim = indim else: inpemb = VectorEmbed(indim=indim, dim=inpembdim) else: inpembdim = inpemb.outdim rnn, lastdim = self.makernu(inpembdim, innerdim, bidir=bidir) self.outdim = lastdim poolblock = None if pool is False else Pool( (None, ), axis=(1, ), mode="max") super(SimpleSeq2Vec, self).__init__(inpemb, rnn, maskid=maskid, pool=poolblock, **kw)
def test_memory_block_with_seq_encoder_dynamic_fail(self): invocabsize = 5 encdim = 13 gru = GRU(dim=invocabsize, innerdim=encdim) payload = SeqEncoder(IdxToOneHot(vocsize=invocabsize), gru) dynmemb = MemoryBlock(payload, outdim=encdim) idxs = [0, 2, 5] #dynmemb.predict(idxs) self.assertRaises(AssertionError, lambda: dynmemb.predict(idxs))
def __init__(self, indim=220, outdim=200, maskid=0, **kw): # indim is number of characters super(WordEncoder, self).__init__(**kw) self.enc = SeqEncoder(IdxToOneHot(indim), GRU(dim=indim, innerdim=outdim)).maskoptions( maskid, MaskMode.AUTO)
def test_output_shape_LSTM(self): batsize = 100 seqlen = 5 dim = 50 indim = 13 m = SeqEncoder(IdxToOneHot(13), LSTM(dim=indim, innerdim=dim)) data = np.random.randint(0, indim, (batsize, seqlen)).astype("int32") mpred = m.predict(data) self.assertEqual(mpred.shape, (batsize, dim))
def test_memory_block_with_seq_encoder_static_fail(self): invocabsize = 5 memsize = 10 seqlen = 3 encdim = 13 data = np.random.randint(0, invocabsize, (memsize, seqlen)) gru = GRU(dim=invocabsize, innerdim=encdim) payload = SeqEncoder(IdxToOneHot(vocsize=invocabsize), gru) memb = MemoryBlock(payload, data, outdim=encdim) idxs = [0, 2, 5] self.assertRaises(AssertionError, lambda: memb.predict(idxs, data))
def test_output_shape_w_mask(self): batsize = 2 seqlen = 5 dim = 3 indim = 7 m = SeqEncoder(IdxToOneHot(indim), GRU(dim=indim, innerdim=dim)).all_outputs data = np.random.randint(0, indim, (batsize, seqlen)).astype("int32") mask = np.zeros_like(data).astype("float32") mask[:, 0:2] = 1 weights = np.ones_like(data).astype("float32") mpred = m.predict(data, weights, mask) self.assertEqual(mpred.shape, (batsize, seqlen, dim))
def getemb(emb=None, embdim=None, vocsize=None, maskid=-1): if emb is False: assert (embdim is not None) return None, embdim elif emb is not None: return emb, emb.outdim else: if embdim is None: return IdxToOneHot(vocsize), vocsize else: return VectorEmbed(indim=vocsize, dim=embdim, maskid=maskid), embdim
def test_memory_block_with_seq_encoder(self): invocabsize = 5 memsize = 10 seqlen = 3 encdim = 13 data = np.random.randint(0, invocabsize, (memsize, seqlen)) gru = GRU(dim=invocabsize, innerdim=encdim) payload = SeqEncoder(IdxToOneHot(vocsize=invocabsize), gru) memb = MemoryBlock(payload, data, indim=invocabsize, outdim=encdim) idxs = [0, 2, 5] memory_element = memb.predict(idxs) self.assertEqual(memory_element.shape, (len(idxs), encdim)) gruparams = set([getattr(gru, pname) for pname in gru.paramnames]) allparams = set(memb.output.allparams) self.assertEqual(gruparams.intersection(allparams), allparams)
def test_mask_dynamic_pad(self): batsize = 10 seqlen = 5 dim = 6 indim = 5 m = SeqEncoder(IdxToOneHot(indim), GRU(dim=indim, innerdim=dim)).maskoption(-1).all_outputs() data = np.random.randint(0, indim, (batsize, seqlen)).astype("int32") rmasker = np.random.randint(2, seqlen, (batsize, )).astype("int32") print rmasker for i in range(data.shape[0]): data[i, rmasker[i]:] = -1 print data pred = m.predict(data) print pred
def test_mask_no_state_updates(self): batsize = 10 seqlen = 3 dim = 7 indim = 5 m = SeqEncoder(IdxToOneHot(indim), GRU(dim=indim, innerdim=dim)).maskoption(-1).all_outputs data = np.random.randint(0, indim, (batsize, seqlen)).astype("int32") data[:, 1] = 0 ndata = np.ones_like(data) * -1 data = np.concatenate([data, ndata], axis=1) pred = m.predict(data) for i in range(1, pred.shape[1]): print np.linalg.norm(pred[:, i - 1, :] - pred[:, i, :]) if i < seqlen: self.assertTrue(not np.allclose(pred[:, i - 1, :], pred[:, i, :])) else: self.assertTrue(np.allclose(pred[:, i - 1, :], pred[:, i, :]))
def setUp(self): self.batsize = 70 self.vocsize = 27 self.encdim = 44 self.hdim = 33 self.statedim = 50 self.seqlen = 30 self.encodings_data = np.random.random( (self.batsize, self.encdim)).astype("float32") self.sequence_data = np.random.randint(0, self.vocsize, (self.batsize, self.seqlen)) self.dec = SeqDecoder([ IdxToOneHot(self.vocsize), GRU(dim=self.vocsize + self.encdim, innerdim=self.hdim), LSTM(dim=self.hdim, innerdim=self.statedim) ], inconcat=True, innerdim=self.statedim)
def test_mask_zero_mask_with_custom_maskid(self): batsize = 10 seqlen = 3 dim = 7 indim = 5 m = SeqEncoder(IdxToOneHot(indim), GRU(dim=indim, innerdim=dim)).maskoptions(-1, MaskSetMode.ZERO).all_outputs data = np.random.randint(0, indim, (batsize, seqlen)).astype("int32") data[:, 1] = 0 ndata = np.ones_like(data) * -1 data = np.concatenate([data, ndata], axis=1) pred = m.predict(data) for i in range(pred.shape[1]): print np.linalg.norm(pred[:, i - 1, :] - pred[:, i, :]) if i < seqlen: for j in range(pred.shape[0]): self.assertTrue(np.linalg.norm(pred[j, i, :]) > 0.0) else: for j in range(pred.shape[0]): self.assertTrue(np.linalg.norm(pred[j, i, :]) == 0.0)
def getdeclayers(self, outembdim, outvocsize, lastencinnerdim, decinnerdim, rnu, inconcat): if outembdim is None: outemb = IdxToOneHot(outvocsize) outembdim = outvocsize elif isinstance(outembdim, Block): outemb = outembdim outembdim = outemb.outdim else: outemb = VectorEmbed(indim=outvocsize, dim=outembdim) decrnus = [] firstdecdim = outembdim + lastencinnerdim if inconcat else outembdim dims = [firstdecdim] + decinnerdim i = 1 while i < len(dims): decrnus.append(rnu(dim=dims[i - 1], innerdim=dims[i])) i += 1 declayers = [outemb] + decrnus return declayers
def getenclayers(self, inpembdim, inpvocsize, encinnerdim, bidir, rnu): if inpembdim is None: inpemb = IdxToOneHot(inpvocsize) inpembdim = inpvocsize elif isinstance(inpembdim, Block): inpemb = inpembdim inpembdim = inpemb.outdim else: inpemb = VectorEmbed(indim=inpvocsize, dim=inpembdim) encrnus = [] dims = [inpembdim] + encinnerdim #print dims i = 1 lastencinnerdim = dims[-1] if not bidir else dims[-1] * 2 while i < len(dims): if bidir: newrnu = BiRNU.fromrnu(rnu, dim=dims[i - 1], innerdim=dims[i]) else: newrnu = rnu(dim=dims[i - 1], innerdim=dims[i]) encrnus.append(newrnu) i += 1 enclayers = [inpemb] + encrnus return enclayers, lastencinnerdim
def test_memory_block_with_seq_encoder_dynamic(self): invocabsize = 5 memsize = 10 seqlen = 3 encdim = 13 data = np.random.randint(0, invocabsize, (memsize, seqlen)) gru = GRU(dim=invocabsize, innerdim=encdim) payload = SeqEncoder(IdxToOneHot(vocsize=invocabsize), gru) dynmemb = MemoryBlock(payload, outdim=encdim) idxs = [0, 2, 5] p = dynmemb.predict memory_element = p(idxs, data) self.assertEqual(memory_element.shape, (len(idxs), encdim)) gruparams = set([ getattr(gru, pname) for pname in "u w b uhf whf bhf um wm bm".split() ]) allparams = set(p.outs[0].allparams) self.assertEqual(gruparams.intersection(allparams), allparams) statmemb = MemoryBlock(payload, data, outdim=encdim) statpred = statmemb.predict(idxs) self.assertTrue(np.allclose(statpred, memory_element))
def test_vector_out(self): decdim = 50 outvocsize = 17 outemb = IdxToOneHot(outvocsize) outembdim = outvocsize decrnus = [GRU(dim=outvocsize, innerdim=decdim)] dec = SeqDecoder([outemb] + decrnus, innerdim=decdim * 2, outconcat=True, inconcat=False) ctxdata = np.random.random((2, decdim)).astype("float32") seqdata = np.asarray([[2, 3, 4], [2, 3, 4]]) pred = dec.predict(ctxdata, seqdata) self.assertEqual(pred.shape, (2, 3, outvocsize)) dec = SeqDecoder([outemb] + decrnus, innerdim=decdim * 2, softmaxoutblock=False, outconcat=True, inconcat=False) pred = dec.predict(ctxdata, seqdata) self.assertEqual(pred.shape, (2, 3, decdim * 2))
def __init__(self, encdim=44, invocsize=500, outvocsize=27, innerdim=300, seqlen=20, **kw): super(idx2seq, self).__init__(**kw) self.invocsize = invocsize self.outvocsize = outvocsize self.innerdim = innerdim self.seqlen = seqlen self.encdim = encdim self.emb = VectorEmbed(indim=self.invocsize, dim=self.encdim, normalize=False) self.dec = SeqDecoder([ IdxToOneHot(self.outvocsize), GRU(dim=self.outvocsize + self.encdim, innerdim=self.innerdim, nobias=True) ], inconcat=True, innerdim=self.innerdim)
def test_idx_to_one_hot(self): ioh = IdxToOneHot(25) data = np.arange(0, 25).astype("int32") expout = np.eye(25, 25) outioh = ioh.predict(data) self.assertEqual(np.linalg.norm(expout - outioh), 0)
def test_idxtoonehot(self): m = IdxToOneHot(100) self.assertEqual(m.W.dtype, "float32")