예제 #1
0
def ntm_address(opt, wprev_bhn, M_bnm, k_bhm, beta_bh, g_bh, s_bh3, gamma_bh):

    # Content addressing

    # Cosine similarity
    # take inner product along memory axis k * M
    numer_bhn = cgt.einsum("bhm,bnm->bhn", k_bhm, M_bnm)
    # compute denominator |k| * |m|
    denom_bhn = cgt.broadcast(
        "*",
        cgt.norm(k_bhm, axis=2, keepdims=True),  # -> shape bh1
        cgt.norm(M_bnm, axis=2, keepdims=True).transpose([0, 2,
                                                          1]),  # -> bn1 -> b1n
        "xx1,x1x")
    csim_bhn = numer_bhn / denom_bhn
    assert infer_shape(csim_bhn) == (opt.b, 2 * opt.h, opt.n)
    # scale by beta
    tmp_bhn = cgt.broadcast("*", beta_bh[:, :, None], csim_bhn, "xx1,xxx")
    wc_bhn = sum_normalize2(cgt.exp(tmp_bhn))
    # Interpolation
    g_bh1 = g_bh[:, :, None]
    wg_bhn = cgt.broadcast("*", wprev_bhn, (1 - g_bh1), "xxx,xx1") \
            + cgt.broadcast("*", wc_bhn, g_bh1, "xxx,xx1")
    # Shift
    wtil_bhn = circ_conv_1d(wg_bhn, s_bh3, axis=2)
    # Sharpening
    wfin_bhn = sum_normalize2(
        cgt.broadcast("**", wtil_bhn, gamma_bh.reshape([opt.b, 2 * opt.h, 1]),
                      "xxx,xx1"))

    b, h, n = opt.b, 2 * opt.h, opt.n
    assert infer_shape(wtil_bhn) == (b, h, n)
    assert infer_shape(gamma_bh) == (b, h)
    assert infer_shape(gamma_bh[:, :, None]) == (b, h, 1)
    return wfin_bhn
예제 #2
0
def ntm_address(opt, wprev_bhn, M_bnm, k_bhm, beta_bh, g_bh, s_bh3, gamma_bh):

    # Content addressing

    # Cosine similarity
    # take inner product along memory axis k * M
    numer_bhn = cgt.einsum("bhm,bnm->bhn", k_bhm, M_bnm) 
    # compute denominator |k| * |m|
    denom_bhn = cgt.broadcast("*",
        cgt.norm(k_bhm, axis=2, keepdims=True), # -> shape bh1
        cgt.norm(M_bnm, axis=2, keepdims=True).transpose([0,2,1]), # -> bn1 -> b1n
        "xx1,x1x"
    )
    csim_bhn =  numer_bhn / denom_bhn
    assert infer_shape(csim_bhn) == (opt.b, 2*opt.h, opt.n)
    # scale by beta
    tmp_bhn = cgt.broadcast("*", beta_bh[:,:,None], csim_bhn, "xx1,xxx")
    wc_bhn = sum_normalize2(cgt.exp( tmp_bhn ))
    # Interpolation
    g_bh1 = g_bh[:,:,None]
    wg_bhn = cgt.broadcast("*", wprev_bhn, (1 - g_bh1), "xxx,xx1") \
            + cgt.broadcast("*", wc_bhn, g_bh1, "xxx,xx1")
    # Shift
    wtil_bhn = circ_conv_1d(wg_bhn, s_bh3, axis=2)
    # Sharpening
    wfin_bhn = sum_normalize2(cgt.broadcast("**", wtil_bhn, gamma_bh.reshape([opt.b,2*opt.h,1]), "xxx,xx1"))

    b,h,n = opt.b, 2*opt.h, opt.n
    assert infer_shape(wtil_bhn) == (b,h,n)
    assert infer_shape(gamma_bh) == (b,h)
    assert infer_shape(gamma_bh[:,:,None]) == (b,h,1)
    return wfin_bhn
예제 #3
0
def make_ff_controller(opt):

    b, h, m, p, k = opt.b, opt.h, opt.m, opt.p, opt.k

    H = 2*h
    in_size = k + h*m
    out_size = H*m + H + H + H*3 + H + h*m + h*m + p

    # Previous reads
    r_bhm = cgt.tensor3("r", fixed_shape = (b,h,m))
    # External inputs
    X_bk = cgt.matrix("x", fixed_shape = (b,k))
    r_b_hm = r_bhm.reshape([r_bhm.shape[0], r_bhm.shape[1]*r_bhm.shape[2]])
    # Input to controller
    inp_bq = cgt.concatenate([X_bk, r_b_hm], axis=1)

    hid_sizes = opt.ff_hid_sizes
    activation = cgt.tanh

    layer_out_sizes = [in_size] + hid_sizes + [out_size]
    last_out = inp_bq
    # feedforward part. we could simplify a bit by using nn.Affine
    for i in xrange(len(layer_out_sizes)-1):
        indim = layer_out_sizes[i]
        outdim = layer_out_sizes[i+1]        
        W = cgt.shared(.02*nr.randn(indim, outdim), name="W%i"%i, fixed_shape_mask="all")
        bias = cgt.shared(.02*nr.randn(1, outdim), name="b%i"%i, fixed_shape_mask="all")
        last_out = cgt.broadcast("+",last_out.dot(W),bias,"xx,1x")
        # Don't apply nonlinearity at the last layer
        if i != len(layer_out_sizes)-2: last_out = activation(last_out)

    idx = 0
    k_bHm = last_out[:,idx:idx+H*m];      idx += H*m;         k_bHm = k_bHm.reshape([b,H,m])
    beta_bH = last_out[:,idx:idx+H];      idx += H
    g_bH = last_out[:,idx:idx+H];         idx += H
    s_bH3 = last_out[:,idx:idx+3*H];      idx += 3*H;         s_bH3 = s_bH3.reshape([b,H,3])
    gamma_bH = last_out[:,idx:idx+H];     idx += H
    e_bhm = last_out[:,idx:idx+h*m];      idx += h*m;         e_bhm = e_bhm.reshape([b,h,m])
    a_bhm = last_out[:,idx:idx+h*m];      idx += h*m;         a_bhm = a_bhm.reshape([b,h,m])
    y_bp = last_out[:,idx:idx+p];         idx += p

    k_bHm = cgt.tanh(k_bHm)
    beta_bH = nn.softplus(beta_bH)
    g_bH = cgt.sigmoid(g_bH)
    s_bH3 = sum_normalize2(cgt.exp(s_bH3))
    gamma_bH = cgt.sigmoid(gamma_bH)+1
    e_bhm = cgt.sigmoid(e_bhm)
    a_bhm = cgt.tanh(a_bhm)
    # y_bp = y_bp

    assert infer_shape(k_bHm) == (b,H,m)
    assert infer_shape(beta_bH) == (b,H)
    assert infer_shape(g_bH) == (b,H)
    assert infer_shape(s_bH3) == (b,H,3)
    assert infer_shape(gamma_bH) == (b,H)
    assert infer_shape(e_bhm) == (b,h,m)
    assert infer_shape(a_bhm) == (b,h,m)
    assert infer_shape(y_bp) == (b,p)

    return nn.Module([r_bhm, X_bk], [k_bHm, beta_bH, g_bH, s_bH3, gamma_bH, e_bhm, a_bhm, y_bp])
예제 #4
0
def make_ff_controller(opt):

    b, h, m, p, k = opt.b, opt.h, opt.m, opt.p, opt.k

    H = 2*h
    in_size = k + h*m
    out_size = H*m + H + H + H*3 + H + h*m + h*m + p

    # Previous reads
    r_bhm = cgt.tensor3("r", fixed_shape = (b,h,m))
    # External inputs
    X_bk = cgt.matrix("x", fixed_shape = (b,k))
    r_b_hm = r_bhm.reshape([r_bhm.shape[0], r_bhm.shape[1]*r_bhm.shape[2]])
    # Input to controller
    inp_bq = cgt.concatenate([X_bk, r_b_hm], axis=1)

    hid_sizes = opt.ff_hid_sizes
    activation = cgt.tanh

    layer_out_sizes = [in_size] + hid_sizes + [out_size]
    last_out = inp_bq
    # feedforward part. we could simplify a bit by using nn.Affine
    for i in xrange(len(layer_out_sizes)-1):
        indim = layer_out_sizes[i]
        outdim = layer_out_sizes[i+1]        
        W = cgt.shared(.02*nr.randn(indim, outdim), name="W%i"%i, fixed_shape_mask="all")
        bias = cgt.shared(.02*nr.randn(1, outdim), name="b%i"%i, fixed_shape_mask="all")
        last_out = cgt.broadcast("+",last_out.dot(W),bias,"xx,1x")
        # Don't apply nonlinearity at the last layer
        if i != len(layer_out_sizes)-2: last_out = activation(last_out)

    idx = 0
    k_bHm = last_out[:,idx:idx+H*m];      idx += H*m;         k_bHm = k_bHm.reshape([b,H,m])
    beta_bH = last_out[:,idx:idx+H];      idx += H
    g_bH = last_out[:,idx:idx+H];         idx += H
    s_bH3 = last_out[:,idx:idx+3*H];      idx += 3*H;         s_bH3 = s_bH3.reshape([b,H,3])
    gamma_bH = last_out[:,idx:idx+H];     idx += H
    e_bhm = last_out[:,idx:idx+h*m];      idx += h*m;         e_bhm = e_bhm.reshape([b,h,m])
    a_bhm = last_out[:,idx:idx+h*m];      idx += h*m;         a_bhm = a_bhm.reshape([b,h,m])
    y_bp = last_out[:,idx:idx+p];         idx += p

    k_bHm = cgt.tanh(k_bHm)
    beta_bH = nn.softplus(beta_bH)
    g_bH = cgt.sigmoid(g_bH)
    s_bH3 = sum_normalize2(cgt.exp(s_bH3))
    gamma_bH = cgt.sigmoid(gamma_bH)+1
    e_bhm = cgt.sigmoid(e_bhm)
    a_bhm = cgt.tanh(a_bhm)
    # y_bp = y_bp

    assert infer_shape(k_bHm) == (b,H,m)
    assert infer_shape(beta_bH) == (b,H)
    assert infer_shape(g_bH) == (b,H)
    assert infer_shape(s_bH3) == (b,H,3)
    assert infer_shape(gamma_bH) == (b,H)
    assert infer_shape(e_bhm) == (b,h,m)
    assert infer_shape(a_bhm) == (b,h,m)
    assert infer_shape(y_bp) == (b,p)

    return nn.Module([r_bhm, X_bk], [k_bHm, beta_bH, g_bH, s_bH3, gamma_bH, e_bhm, a_bhm, y_bp])
예제 #5
0
파일: caffe2cgt.py 프로젝트: zxie/cgt
         cgt.tensor(dtype=cgt.floatX,
                    ndim=4,
                    name=layer.name,
                    fixed_shape=(batch_size, chans, crop_size,
                                 crop_size)),
         cgt.tensor(dtype='i8',
                    ndim=2,
                    name=layer.name,
                    fixed_shape=(batch_size, 1))
     ]
 elif layer.type == "Convolution":
     X = inputs[0]
     param = layer.convolution_param
     kh,kw = (param.kernel_size, param.kernel_size) if param.HasField("kernel_size")\
         else (param.kernel_h, param.kernel_w)
     nchanin = infer_shape(X)[0]
     Wshape = (param.num_output, nchanin, kh, kw)
     Wname = layer.param[0].name or layer.name + ":W"
     Wval = np.empty(Wshape, dtype=cgt.floatX)
     W = name2node[Wname] = cgt.shared(Wval,
                                       name=Wname,
                                       fixed_shape_mask="all")
     bshape = (1, param.num_output, 1, 1)
     bname = layer.param[1].name or layer.name + ":b"
     bval = np.empty(bshape, dtype=cgt.floatX)
     b = name2node[bname] = cgt.shared(bval,
                                       name=bname,
                                       fixed_shape_mask="all")
     sh,sw = (param.stride, param.stride) if param.HasField("stride")\
         else (param.stride_h, param.stride_w)
     output = [
예제 #6
0
파일: caffe2cgt.py 프로젝트: EdsterG/cgt
 output = None
 inputs = [name2node[name] for name in layer.bottom]
 if layer.type == "Data":
     tp = layer.transform_param
     crop_size = tp.crop_size
     chans = len(tp.mean_value)
     dp = layer.data_param
     batch_size = dp.batch_size
     output = [cgt.tensor(dtype=cgt.floatX,ndim=4,name=layer.name, fixed_shape=(batch_size,chans,crop_size,crop_size)),
               cgt.tensor(dtype='i8',ndim=2,name=layer.name, fixed_shape=(batch_size, 1))]
 elif layer.type == "Convolution":
     X = inputs[0]
     param = layer.convolution_param
     kh,kw = (param.kernel_size, param.kernel_size) if param.HasField("kernel_size")\
         else (param.kernel_h, param.kernel_w)
     nchanin = infer_shape(X)[0]
     Wshape = (param.num_output, nchanin, kh, kw)
     Wname = layer.param[0].name or layer.name+":W"
     Wval = np.empty(Wshape, dtype=cgt.floatX)
     W = name2node[Wname] = cgt.shared(Wval, name=Wname, fixed_shape_mask="all")
     bshape = (1, param.num_output, 1, 1)
     bname = layer.param[1].name or layer.name+":b"
     bval = np.empty(bshape, dtype=cgt.floatX)
     b = name2node[bname] = cgt.shared(bval, name=bname, fixed_shape_mask="all")
     sh,sw = (param.stride, param.stride) if param.HasField("stride")\
         else (param.stride_h, param.stride_w)
     output = [cgt.broadcast("+",nn.conv2d(X, W, subsample=(sh,sw)), b, "xxxx,1x11")]
 elif layer.type == "Pooling":
     param = layer.pooling_param
     X = inputs[0]
     pool_type = {param.MAX : "max", param.AVE : "mean"}[param.pool]