def ntm_address(opt, wprev_bhn, M_bnm, k_bhm, beta_bh, g_bh, s_bh3, gamma_bh): # Content addressing # Cosine similarity # take inner product along memory axis k * M numer_bhn = cgt.einsum("bhm,bnm->bhn", k_bhm, M_bnm) # compute denominator |k| * |m| denom_bhn = cgt.broadcast( "*", cgt.norm(k_bhm, axis=2, keepdims=True), # -> shape bh1 cgt.norm(M_bnm, axis=2, keepdims=True).transpose([0, 2, 1]), # -> bn1 -> b1n "xx1,x1x") csim_bhn = numer_bhn / denom_bhn assert infer_shape(csim_bhn) == (opt.b, 2 * opt.h, opt.n) # scale by beta tmp_bhn = cgt.broadcast("*", beta_bh[:, :, None], csim_bhn, "xx1,xxx") wc_bhn = sum_normalize2(cgt.exp(tmp_bhn)) # Interpolation g_bh1 = g_bh[:, :, None] wg_bhn = cgt.broadcast("*", wprev_bhn, (1 - g_bh1), "xxx,xx1") \ + cgt.broadcast("*", wc_bhn, g_bh1, "xxx,xx1") # Shift wtil_bhn = circ_conv_1d(wg_bhn, s_bh3, axis=2) # Sharpening wfin_bhn = sum_normalize2( cgt.broadcast("**", wtil_bhn, gamma_bh.reshape([opt.b, 2 * opt.h, 1]), "xxx,xx1")) b, h, n = opt.b, 2 * opt.h, opt.n assert infer_shape(wtil_bhn) == (b, h, n) assert infer_shape(gamma_bh) == (b, h) assert infer_shape(gamma_bh[:, :, None]) == (b, h, 1) return wfin_bhn
def ntm_address(opt, wprev_bhn, M_bnm, k_bhm, beta_bh, g_bh, s_bh3, gamma_bh): # Content addressing # Cosine similarity # take inner product along memory axis k * M numer_bhn = cgt.einsum("bhm,bnm->bhn", k_bhm, M_bnm) # compute denominator |k| * |m| denom_bhn = cgt.broadcast("*", cgt.norm(k_bhm, axis=2, keepdims=True), # -> shape bh1 cgt.norm(M_bnm, axis=2, keepdims=True).transpose([0,2,1]), # -> bn1 -> b1n "xx1,x1x" ) csim_bhn = numer_bhn / denom_bhn assert infer_shape(csim_bhn) == (opt.b, 2*opt.h, opt.n) # scale by beta tmp_bhn = cgt.broadcast("*", beta_bh[:,:,None], csim_bhn, "xx1,xxx") wc_bhn = sum_normalize2(cgt.exp( tmp_bhn )) # Interpolation g_bh1 = g_bh[:,:,None] wg_bhn = cgt.broadcast("*", wprev_bhn, (1 - g_bh1), "xxx,xx1") \ + cgt.broadcast("*", wc_bhn, g_bh1, "xxx,xx1") # Shift wtil_bhn = circ_conv_1d(wg_bhn, s_bh3, axis=2) # Sharpening wfin_bhn = sum_normalize2(cgt.broadcast("**", wtil_bhn, gamma_bh.reshape([opt.b,2*opt.h,1]), "xxx,xx1")) b,h,n = opt.b, 2*opt.h, opt.n assert infer_shape(wtil_bhn) == (b,h,n) assert infer_shape(gamma_bh) == (b,h) assert infer_shape(gamma_bh[:,:,None]) == (b,h,1) return wfin_bhn
def test_einsum(): x = cgt.tensor3() y = cgt.tensor3() sizes = {'i': 2, 'j': 3, 'k': 5, 'l': 7} xaxes = 'ijk' yaxes = 'ikl' zaxes = 'ijl' for i in xrange(10): xperm = xaxes (yperm, zperm) = permaxes = [[chars[i] for i in np.random.permutation(3)] for chars in [yaxes, zaxes]] desc = "%s,%s->%s" % tuple("".join(chars) for chars in [xperm] + permaxes) z = cgt.einsum(desc, x, y) xval = nr.randn(*(sizes[c] for c in xperm)) yval = nr.randn(*(sizes[c] for c in yperm)) np.testing.assert_allclose(cgt.numeric_eval(z, { x: xval, y: yval }), np.einsum(desc, xval, yval), atol={ "single": 1e-3, "double": 1e-6 }[cgt.get_precision()])
def conv2d_fft(x_BKRC, f_LKrc, subsample, pad): # TODO add shape assertion f_LKrc = cgt.flip(f_LKrc, [2,3]) padnrows = size(x_BKRC, 2) + size(f_LKrc, 2) - 1 padncols = size(x_BKRC, 3) + size(f_LKrc, 3) - 1 tx = cgt.rfft(x_BKRC, (padnrows,padncols), (2,3)) tf = cgt.rfft(f_LKrc, (padnrows,padncols), (2,3)) out = cgt.irfft( cgt.einsum("BKrc,LKrc->BLrc",tx, tf), (2,3)) out = out[:,:,pad[0]:(padnrows-pad[0]):subsample[0],pad[1]:(padncols-pad[1]):subsample[1]] #pylint: disable=E1127 return out
def ntm_write(M_bnm, w_bhn, e_bhm, a_bhm): if False: # Here's the version that's faithful to the paper # weighted erases bhn1 bh1m # ideally we wouldn't create this big 4-tensor but this operation # requires a more general kind of contraction than is provided by einsum we_bhmn = cgt.broadcast("*", w_bhn[:,:,:,None], e_bhm[:,:,None,:], "xxx1,xx1x") # take produce of erasing factors mult_bmn = (1 - we_bhmn).prod(axis=1) M_bnm = M_bnm * mult_bmn # Equation 3 http://arxiv.org/pdf/1410.5401v2.pdf else: # This version just does a regular contraction erase_bnm = cgt.einsum( "bhn,bhm->bnm", w_bhn, e_bhm) M_bnm = M_bnm*(1-erase_bnm) # Now do the same thing with adds # But now it's just a regular contraction since we are adding rather than taking product add_bnm = cgt.einsum( "bhn,bhm->bnm", w_bhn, a_bhm) M_bnm = M_bnm + add_bnm return M_bnm
def test_einsum(): x = cgt.tensor3() y = cgt.tensor3() sizes = {"i": 2, "j": 3, "k": 5, "l": 7} xaxes = "ijk" yaxes = "ikl" zaxes = "ijl" for i in xrange(10): xperm = xaxes (yperm, zperm) = permaxes = [[chars[i] for i in np.random.permutation(3)] for chars in [yaxes, zaxes]] desc = "%s,%s->%s" % tuple("".join(chars) for chars in [xperm] + permaxes) z = cgt.einsum(desc, x, y) xval = nr.randn(*(sizes[c] for c in xperm)) yval = nr.randn(*(sizes[c] for c in yperm)) np.testing.assert_allclose( cgt.numeric_eval(z, {x: xval, y: yval}), np.einsum(desc, xval, yval), atol={"single": 1e-3, "double": 1e-6}[cgt.get_precision()], )
def test_einsum(): cgt.reset_config() cgt.set_precision("double") x = cgt.tensor3() y = cgt.tensor3() sizes = {'i':2,'j':3,'k':5,'l':7} xaxes = 'ijk' yaxes = 'ikl' zaxes = 'ijl' for i in xrange(10): xperm = xaxes (yperm,zperm) = permaxes = [[chars[i] for i in np.random.permutation(3)] for chars in [yaxes,zaxes]] desc = "%s,%s->%s"%tuple("".join(chars) for chars in [xperm] + permaxes) z = cgt.einsum(desc, x, y) xval = nr.randn(*(sizes[c] for c in xperm)) yval = nr.randn(*(sizes[c] for c in yperm)) np.testing.assert_allclose( cgt.numeric_eval(z, {x : xval, y : yval}), np.einsum(desc, xval, yval))
def ntm_read(M_bnm, w_bhn): r_bhm = cgt.einsum('bhn,bnm->bhm', w_bhn, M_bnm) return r_bhm