コード例 #1
0
ファイル: ar.py プロジェクト: SynthAI/SynthAI
def msconv2d(name,
             n_scales,
             n_in,
             n_out,
             size_kernel=(3, 3),
             zerodiagonal=True,
             flipmask=False,
             pad_channel=True,
             border_mode='valid',
             w={}):
    convs = [
        conv2d(name + "_s" + str(i), n_in, n_out, size_kernel, zerodiagonal,
               flipmask, pad_channel, border_mode, w) for i in range(n_scales)
    ]

    def f(h, w):
        results = []
        for i in range(n_scales - 1):
            results.append(convs[i](h, w))
            h = N.conv.downsample2d_nearest_neighbour(h, scale=2)
        result = convs[-1](h, w)
        for i in range(n_scales - 1):
            result = N.conv.upsample2d_nearest_neighbour(result)
            result += results[-1 - i]
        return result

    def postup(updates, w):
        for conv in convs:
            updates = conv.postup(updates, w)
        return updates

    return G.Struct(__call__=f, w=w, postup=postup)
コード例 #2
0
ファイル: rand.py プロジェクト: SynthAI/SynthAI
def gsm(name, k, w={}, logvar_minmax=16):
    w[name + '_weight'] = G.sharedf(np.zeros((k, )))
    w[name + '_logvar'] = G.sharedf(np.random.randn(k) * .1)

    def logp(v, w):
        mixtureweights = T.exp(w[name + '_weight'])
        mixtureweights /= mixtureweights.sum()
        logvar = logvar_minmax * w[name + '_logvar']
        var = T.exp(logvar)
        if k == 0:
            return 0.
        if k == 1:
            return -.5 * (v**2).sum() / var[0] - v.size.astype(
                G.floatX) * (.5 * T.log(2. * math.pi) + logvar[0])
        p = 0.
        for i in range(k):
            p += mixtureweights[i] * T.exp(-.5 * v**2 / var[i]) / T.sqrt(
                2. * math.pi * var[i])
        logp = T.log(p).sum()
        return logp

    def postup(updates, w):
        updates[w[name + '_logvar']] = T.clip(updates[w[name + '_logvar']],
                                              -1., 1.)
        return updates

    return G.Struct(logp=logp, postup=postup, w=w)
コード例 #3
0
ファイル: conv.py プロジェクト: SynthAI/SynthAI
def resnetv3_layer_b(name, n_feats, factor=4, nl='softplus', alpha=.1, w={}):

    f_nl1 = N.nonlinearity(name+"_nl1", nl)
    f_nl2 = N.nonlinearity(name+"_nl2", nl)
    f_nl3 = N.nonlinearity(name+"_nl3", nl)
    
    # either no change in shape, or subsampling
    conv1 = conv2d(name+'_conv1', n_feats, n_feats/factor, (1,1), w=w)
    conv2 = conv2d(name+'_conv2', n_feats/factor, n_feats/factor, (3,3), w=w)
    conv3 = conv2d(name+'_conv3', n_feats/factor, n_feats, (1,1), w=w)
    
    def f(_input, w):
        h = f_nl1(_input)
        h = f_nl2(conv1(h, w))
        h = f_nl3(conv2(h, w))
        h = conv3(h, w)
        return _input + alpha * h
        
    def postup(updates, w):
        updates = conv1.postup(updates, w)
        updates = conv2.postup(updates, w)
        updates = conv3.postup(updates, w)
        return updates
    
    return G.Struct(__call__=f, w=w, postup=postup)
コード例 #4
0
ファイル: ar.py プロジェクト: SynthAI/SynthAI
def resnet_layer_a(name, n_feats, nl='elu', w={}):

    f_nl1 = N.nonlinearity(name + "_nl1", nl)
    f_nl2 = N.nonlinearity(name + "_nl2", nl)

    # either no change in shape, or subsampling
    conv1 = conv2d(name + '_conv1',
                   n_feats,
                   n_feats, (3, 3),
                   zerodiagonal=False,
                   w=w)
    conv2 = conv2d(name + '_conv2',
                   n_feats,
                   n_feats, (3, 3),
                   zerodiagonal=False,
                   w=w)

    def f(_input, w):
        h = f_nl1(_input)
        h = f_nl2(conv1(h, w))
        h = conv2(h, w)
        return _input + .1 * h

    def postup(updates, w):
        updates = conv1.postup(updates, w)
        updates = conv2.postup(updates, w)
        return updates

    return G.Struct(__call__=f, w=w, postup=postup)
コード例 #5
0
ファイル: conv.py プロジェクト: SynthAI/SynthAI
def resnetv1(name, n_layers, n_in, n_out, size_kernel=(3,3), downsample=1, upsample=1, nl='relu', w={}):
    layers = []
    for i in range(n_layers):
        _n_in = n_in
        _n_out = n_out
        _downsample = downsample
        _upsample = upsample
        if _downsample > 1 and i > 0:
            _downsample = 1
            _n_in = n_out
        if _upsample > 1 and i < n_layers-1:
            _upsample = 1
            _n_out = n_in
        
        layer = resnetv1_layer(name+'_'+str(i), _n_in, _n_out, size_kernel, _downsample, _upsample, nl, w)
        layers.append(layer)
    
    def f(h, w):
        for i in range(n_layers):
            h = layers[i](h, w)
        return h
    
    def postup(updates, w):
        for i in range(n_layers):
            updates = layers[i].postup(updates, w)
        return updates
    
    return G.Struct(__call__=f, w=w, postup=postup)
コード例 #6
0
ファイル: rand.py プロジェクト: SynthAI/SynthAI
def categorical(p, sample=None):
    if sample is None:
        sample = G.rng.multinomial(pvals=p, dtype='int32').argmax(axis=1)
    logp = -T.nnet.categorical_crossentropy(p, sample.flatten())
    entr = -(p * T.log(p)).sum(axis=1)
    return G.Struct(**{'sample': sample, 'logp': logp, 'entr': entr, 'p': p})
    return RandomVariable(sample, logp, entr, p=p)
コード例 #7
0
def batchnorm_meanonly(name, n_h, w={}):
    w[name+'_b'] = G.sharedf(np.zeros((n_h,)))
    def f(h, w):
        h -= h.mean(axis=(0,2,3), keepdims=True)
        h += w[name+'_b'].dimshuffle('x',0,'x','x')
        return h
    return G.Struct(__call__=f, w=w)
コード例 #8
0
def nonlinearity(name, which, shape=None, w={}):
    
    if which == 'prelu':
        w[name] = G.sharedf(np.zeros(shape))
    if which == 'pelu':
        w[name] = G.sharedf(np.zeros(shape))
    if which == 'softplus2':
        w[name] = G.sharedf(np.zeros(shape))
    if which == 'softplus_shiftscale':
        w[name+'_in_s'] = G.sharedf(np.zeros(shape))
        w[name+'_in_b'] = G.sharedf(np.zeros(shape))
    if which == 'linearsigmoid':
        w[name+'_a'] = G.sharedf(.5*np.ones(shape))
        w[name+'_b'] = G.sharedf(.5*np.ones(shape))
    if which == 'meanonlybatchnorm_softplus':
        assert type(shape) == int
        w[name+'_b'] = G.sharedf(np.zeros(shape))
    if which == 'meanonlybatchnorm_relu':
        assert type(shape) == int
        w[name+'_b'] = G.sharedf(np.zeros(shape))
    
    def f(h, w=None):
        if which == None or which == 'None':
            return h
        elif which == 'tanh':
            return T.tanh(h)
        elif which == 'softmax':
            return T.nnet.softmax(h)
        elif which == 'prelu':
            return w[name]*h*(h<0.) + h*(h>=0.)
        elif which == 'relu':
            return h*(h>=0.)
        elif which == 'shiftedrelu':
            return T.switch(h < -1., -1., h)
        elif which == 'leakyrelu':
            return 0.01 * h*(h<0.) + h*(h>=0.)
        elif which == 'elu':
            return T.switch(h < 0., T.exp(h)-1, h)
        elif which == 'softplus':
            return T.nnet.softplus(h)
        elif which == 'softplus_shiftscale':
            return T.nnet.softplus(T.exp(w[name+'_in_s']) * h + w[name+'_in_b'])
        elif which == 'softplus2':
            return T.nnet.softplus(h) - w[name] * T.nnet.softplus(-h)
        elif which == 'linearsigmoid':
            return w[name+'_a'] * h + w[name+'_b'] * T.nnet.sigmoid(h)
        elif which == 'meanonlybatchnorm_softplus':
            h -= h.mean(axis=(0,2,3), keepdims=True)
            h += w[name+'_b'].dimshuffle('x',0,'x','x')
            return T.nnet.softplus(h)
        elif which == 'meanonlybatchnorm_relu':
            h -= h.mean(axis=(0,2,3), keepdims=True)
            h += w[name+'_b'].dimshuffle('x',0,'x','x')
            return T.nnet.relu(h)
        else:
            raise Exception("Unrecognized nonlinearity: "+which)
        
        
    return G.Struct(__call__=f, w=w)
コード例 #9
0
def mlp_l2(name, n_in, n_h, n_out, nl_h, nl_out=None, nl_in=None, w={}):

    if not isinstance(n_out, list) and isinstance(n_out, int):
        n_out = [n_out]

    # parameters for input perturbation
    if nl_in != None:
        f_nl_in = N.nonlinearity(name + '_in_nl', nl_in, (n_in, ), w)

    # parameters for hidden units
    nh = [n_in] + n_h
    linear_h = []
    f_nl_h = []
    for i in range(len(n_h)):
        s = name + '_' + str(i)
        linear_h.append(N.linear_l2(s, nh[i], nh[i + 1], w))
        f_nl_h.append(N.nonlinearity(s + '_nl', nl_h, (nh[i + 1], ), w))

    # parameters for output
    f_nl_out = []
    linear_out = []
    for i in range(len(n_out)):
        s = name + '_out_' + str(i)
        linear_out.append(N.linear_l2(s, n_h[-1], n_out[i], w))
        f_nl_out.append(N.nonlinearity(s + 'nl', nl_out, (n_out[i], ), w))

    def f(h, w, return_hiddens=False):

        if nl_in != None:
            h = f_nl_in(h, w)

        hiddens = []
        for i in range(len(n_h)):
            h = linear_h[i](h, w)
            h = f_nl_h[i](h, w)
            hiddens.append(h)

        out = []
        for i in range(len(n_out)):
            _out = linear_out[i](h, w)
            _out = f_nl_out[i](_out, w)
            out.append(_out)

        if len(n_out) == 1: out = out[0]

        if return_hiddens:
            return hiddens, out

        return out

    def postup(updates, w):
        for l in linear_h:
            updates = l.postup(updates, w)
        for l in linear_out:
            updates = l.postup(updates, w)
        return updates

    return G.Struct(__call__=f, w=w, postup=postup)
コード例 #10
0
ファイル: ar.py プロジェクト: SynthAI/SynthAI
def mlp(name, n_in, n_context, n_h, n_out, nl, w={}):

    if not isinstance(n_out, list) and isinstance(n_out, int):
        n_out = [n_out]

    if n_context > 0:
        # parameters for context input
        linear_context = N.linear_l2(name + '_context', n_context, n_h[0], w)

    # parameters for hidden units
    nh = [n_in] + n_h
    linear_h = []
    f_nl_h = []
    for i in range(len(n_h)):
        s = name + '_' + str(i)
        linear_h.append(linear(s, nh[i], nh[i + 1], False, True, w))
        f_nl_h.append(N.nonlinearity(s + '_nl', nl, (nh[i + 1], ), w))

    # parameters for output
    linear_out = []
    for i in range(len(n_out)):
        s = name + '_out_' + str(i)
        linear_out.append(linear(s, n_h[-1], n_out[i], True, True, w))

    def f(h, h_context, w, return_hiddens=False):
        # h_context can be None if n_context == 0

        hiddens = []
        for i in range(len(n_h)):
            h = linear_h[i](h, w)
            if i == 0 and n_context > 0:
                h += linear_context(h_context, w)
            h = f_nl_h[i](h, w)
            hiddens.append(h)

        out = []
        for i in range(len(n_out)):
            _out = linear_out[i](h, w)
            out.append(_out)

        if len(n_out) == 1: out = out[0]

        if return_hiddens:
            return hiddens, out

        return out

    def postup(updates, w):
        if n_context > 0:
            updates = linear_context.postup(updates, w)
        for l in linear_h:
            updates = l.postup(updates, w)
        for l in linear_out:
            updates = l.postup(updates, w)
        return updates

    return G.Struct(__call__=f, w=w, postup=postup)
コード例 #11
0
ファイル: rand.py プロジェクト: SynthAI/SynthAI
def zero_centered_laplace(name, w={}):
    w[name + '_logscale'] = G.sharedf(0.)

    def logp(v, w):
        return -abs(v).sum() / T.exp(w[name + '_logscale']) - v.size.astype(
            G.floatX) * (T.log(2.) + w[name + '_logscale'])

    postup = lambda updates, w: updates
    return G.Struct(logp=logp, postup=postup, w=w)
コード例 #12
0
ファイル: rand.py プロジェクト: SynthAI/SynthAI
def zero_centered_gaussian(name, w={}):
    w[name + '_logvar'] = G.sharedf(0.)

    def logp(v, w):
        logvar = w[name + '_logvar'] * 10
        return v.size.astype(G.floatX) * -.5 * (
            T.log(2. * math.pi) + logvar) - .5 * (v**2).sum() / T.exp(logvar)

    postup = lambda updates, w: updates
    return G.Struct(logp=logp, postup=postup, w=w)
コード例 #13
0
ファイル: conv.py プロジェクト: SynthAI/SynthAI
def resnetv2_layer_a(name, n_feats, nl='relu', w={}):

    f_nl = N.nonlinearity(name+"_nl", nl)
    
    # either no change in shape, or subsampling
    conv1 = conv2d(name+'_conv1', n_feats, n_feats, (3,3), w=w)
    conv2 = conv2d(name+'_conv2', n_feats, n_feats, (3,3), w=w)
    
    def f(_input, w):
        h = _input
        h = f_nl(conv1(h, w))
        h = conv2(h, w)
        return T.nnet.relu(_input + .1 * h)
    
    def postup(updates, w):
        updates = conv1.postup(updates, w)
        updates = conv2.postup(updates, w)
        return updates
    
    return G.Struct(__call__=f, w=w, postup=postup)
コード例 #14
0
ファイル: conv.py プロジェクト: SynthAI/SynthAI
def resnetv3(name, n_layers, n_feats, nl='softplus', layertype='a', factor=4, w={}):
    
    layers = []
    for i in range(n_layers):
        if layertype == 'a':
            layers.append(resnetv3_layer_a(name+'_'+str(i), n_feats, nl, .1/n_layers, w))
        if layertype == 'b':
            layers.append(resnetv3_layer_b(name+'_'+str(i), n_feats, factor, nl, .1/n_layers, w))
    
    def f(h, w):
        for i in range(n_layers):
            h = layers[i](h, w)
        return h
    
    def postup(updates, w):
        for i in range(n_layers):
            updates = layers[i].postup(updates, w)
        return updates
    
    return G.Struct(__call__=f, w=w, postup=postup)
コード例 #15
0
ファイル: conv.py プロジェクト: SynthAI/SynthAI
def resnetv1_layer(name, n_in, n_out, size_kernel=(3,3), downsample=1, upsample=1, nl='relu', w={}):
    #print 'resnet_layer', name, shape_in, shape_out, size_kernel, downsample, upsample
    
    f_nl = N.nonlinearity(name+"_nl", nl)
    
    border_mode = 'valid'
    
    if upsample == 1:
        # either no change in shape, or subsampling
        conv1 = conv2d(name+'_conv1', n_in, n_out, size_kernel, True, border_mode, downsample, upsample, w=w)
        conv2 = conv2d(name+'_conv2', n_out, n_out, size_kernel, True, border_mode, downsample=1, upsample=1, w=w)
        conv3 = None
        if downsample>1 or upsample>1 or n_out != n_in:
            conv3 = conv2d(name+'_conv3', n_in, n_out, (downsample, downsample), None, 'valid', downsample, upsample, w=w)
    else:
        # upsampling
        assert downsample == 1
        conv1 = conv2d(name+'_conv1', n_in, n_in, size_kernel, True, border_mode, downsample=1, upsample=1, w=w)
        conv2 = conv2d(name+'_conv2', n_in, n_out, size_kernel, True, border_mode, downsample, upsample, w=w)
        conv3 = None
        if downsample>1 or upsample>1 or n_out != n_in:
            conv3 = conv2d(name+'_conv3', n_in, n_out, (downsample, downsample), None, 'valid', downsample, upsample, w=w)
    
    def f(_input, w):
        hidden = f_nl(conv1(_input, w))
        _output = .1 * conv2(hidden, w)
        if conv3 != None:
            return T.nnet.relu(conv3(_input, w) + _output)
        return T.nnet.relu(_input + _output)
    
    def postup(updates, w):
        updates = conv1.postup(updates, w)
        updates = conv2.postup(updates, w)
        if conv3 != None:
            updates = conv3.postup(updates, w)
        return updates
    
    return G.Struct(__call__=f, w=w, postup=postup)
コード例 #16
0
ファイル: models.py プロジェクト: SynthAI/SynthAI
def cvae1(shape_x,
          depths,
          depth_ar,
          n_h1,
          n_h2,
          n_z,
          prior='diag',
          posterior='down_diag',
          px='logistic',
          nl='softplus',
          kernel_x=(5, 5),
          kernel_h=(3, 3),
          kl_min=0,
          optim='adamax',
          alpha=0.002,
          beta1=0.1,
          beta2=0.001,
          weightsharing=None,
          pad_x=0,
          data_init=None,
          downsample_type='nn'):
    _locals = locals()
    _locals.pop('data_init')
    print 'CVAE1 with ', _locals
    #assert posterior in ['diag1','diag2','iaf_linear','iaf_nonlinear']
    assert px in ['logistic', 'bernoulli']
    w = {}  # model params
    if pad_x > 0:
        shape_x[1] += 2 * pad_x
        shape_x[2] += 2 * pad_x

    # Input whitening
    if px == 'logistic':
        w['logsd_x'] = G.sharedf(0.)

    # encoder
    x_enc = N.conv.conv2d('x_enc',
                          shape_x[0],
                          n_h1,
                          kernel_x,
                          downsample=2,
                          w=w)
    x_dec = N.conv.conv2d('x_dec', n_h1, shape_x[0], kernel_x, upsample=2, w=w)
    x_dec_nl = N.nonlinearity('x_dec_nl', nl, n_h1, w)

    layers = []
    for i in range(len(depths)):
        layers.append([])
        for j in range(depths[i]):
            downsample = (i > 0 and j == 0)
            if weightsharing is None or not weightsharing:
                name = str(i) + '_' + str(j)
            elif weightsharing == 'all':
                name = '[sharedw]' + str(i) + '_' + str(j) + '[/sharedw]'
            elif weightsharing == 'acrosslevels':
                name = '[sharedw]' + str(i) + '[/sharedw]' + '_' + str(j)
            elif weightsharing == 'withinlevel':
                name = '[sharedw]' + str(i) + '[/sharedw]' + '_' + str(j)
            else:
                raise Exception()
            layers[i].append(
                cvae_layer(name, prior, posterior, n_h1, n_h2, n_z, depth_ar,
                           downsample, nl, kernel_h, False, downsample_type,
                           w))

    # top-level value
    w['h_top'] = G.sharedf(np.zeros((n_h1, )))

    # Initialize variables
    x = T.tensor4('x', dtype='uint8')
    x.tag.test_value = data_init['x']
    n_batch_test = data_init['x'].shape[0]
    _x = T.clip((x + .5) / 256., 0, 1)
    #_x = T.clip(x / 255., 0, 1)

    if pad_x > 0:
        _x = N.conv.pad2d(_x, pad_x)

    # Objective function
    def f_encode_decode(w, train=True):

        results = {}

        h = x_enc(_x - .5, w)

        obj_kl = G.sharedf(0.)

        # bottom-up encoders
        for i in range(len(depths)):
            for j in range(depths[i]):
                h = layers[i][j].up(h, w)

        # top-level activations
        h = T.tile(w['h_top'].dimshuffle('x', 0, 'x', 'x'),
                   (_x.shape[0], 1, shape_x[1] / 2**len(depths),
                    shape_x[2] / 2**len(depths)))

        # top-down priors, posteriors and decoders
        for i in list(reversed(range(len(depths)))):
            for j in list(reversed(range(depths[i]))):
                h, kl = layers[i][j].down_q(h, train, w)
                kl_sum = kl.sum(axis=(1, 2, 3))
                results['cost_z' + str(i).zfill(3) + '_' +
                        str(j).zfill(3)] = kl_sum
                # Constraint: Minimum number of bits per featuremap, averaged across minibatch
                if kl_min > 0:
                    if True:
                        kl = kl.sum(axis=(2, 3)).mean(axis=0, dtype=G.floatX)
                        obj_kl += T.maximum(np.asarray(kl_min, G.floatX),
                                            kl).sum(dtype=G.floatX)
                    else:
                        kl = T.maximum(np.asarray(kl_min, G.floatX),
                                       kl.sum(axis=(2, 3))).sum(axis=1,
                                                                dtype=G.floatX)
                        obj_kl += kl
                else:
                    obj_kl += kl_sum

        output = .1 * x_dec(x_dec_nl(h, w), w)

        # empirical distribution
        if px == 'logistic':
            mean_x = T.clip(output + .5, 0 + 1 / 512., 1 - 1 / 512.)
            logsd_x = 0 * mean_x + w['logsd_x']
            obj_logpx = N.rand.discretized_logistic(mean_x, logsd_x, 1 / 256.,
                                                    _x).logp
            #obj_z = T.printing.Print('obj_z')(obj_z)
            obj = obj_logpx - obj_kl
            # Compute the bits per pixel
            obj *= (1. / np.prod(shape_x) * 1. / np.log(2.)).astype('float32')

            #if not '__init' in w:
            #    raise Exception()

        elif px == 'bernoulli':
            prob_x = T.nnet.sigmoid(output)
            prob_x = T.maximum(T.minimum(prob_x, 1 - 1e-7), 1e-7)
            #prob_x = T.printing.Print('prob_x')(prob_x)
            obj_logpx = N.rand.bernoulli(prob_x, _x).logp

            #obj_logqz = T.printing.Print('obj_logqz')(obj_logqz)
            #obj_logpz = T.printing.Print('obj_logpz')(obj_logpz)
            #obj_logpx = T.printing.Print('obj_logpx')(obj_logpx)
            obj = obj_logpx - obj_kl
            #obj = T.printing.Print('obj')(obj)

        results['cost_x'] = -obj_logpx
        results['cost'] = -obj
        return results

    # Turns Gaussian noise 'eps' into a sample
    def f_decoder(eps, w):

        # top-level activations
        h = T.tile(w['h_top'].dimshuffle('x', 0, 'x', 'x'),
                   (eps['eps_0_0'].shape[0], 1, shape_x[1] / 2**len(depths),
                    shape_x[2] / 2**len(depths)))

        # top-down priors, posteriors and decoders
        for i in list(reversed(range(len(depths)))):
            for j in list(reversed(range(depths[i]))):
                h = layers[i][j].down_p(h, eps['eps_' + str(i) + '_' + str(j)],
                                        w)

        output = .1 * x_dec(x_dec_nl(h, w), w)

        if px == 'logistic':
            mean_x = T.clip(output + .5, 0 + 1 / 512., 1 - 1 / 512.)
        elif px == 'bernoulli':
            mean_x = T.nnet.sigmoid(output)

        image = (256. * mean_x).astype('uint8')
        if pad_x > 0:
            image = image[:, :, pad_x:-pad_x, pad_x:-pad_x]

        return image

    def f_eps(n_batch, w):
        eps = {}
        for i in range(len(depths)):
            for j in range(depths[i]):
                eps['eps_' + str(i) + '_' + str(j)] = G.rng_curand.normal(
                    (n_batch, n_z, shape_x[1] / 2**(i + 1),
                     shape_x[2] / 2**(i + 1)),
                    dtype=floatX)
        return eps

    def postup(updates, w):
        nodes = [x_enc, x_dec]
        for n in nodes:
            updates = n.postup(updates, w)
        for i in range(len(depths)):
            for j in range(depths[i]):
                updates = layers[i][j].postup(updates, w)

        return updates

    # Compile init function
    if data_init != None:
        w['__init'] = OrderedDict()
        f_encode_decode(w)
        w.pop('__init')
        #for i in w: print i, abs(w[i].get_value()).min(), abs(w[i].get_value()).max(), abs(w[i].get_value()).mean()

    # Compile training function

    #todo: replace postup with below
    #w['_updates'] = updates
    #f_cost(w)
    #updates = w.pop('_updates')

    w_avg = {i: G.sharedf(w[i].get_value()) for i in w}

    def lazy(f):
        def newf(*args, **kws):
            if not hasattr(f, 'cache'):
                f.cache = f()
            return f.cache(*args, **kws)

        return newf

    @lazy
    def f_train():
        if optim == 'adamax':
            train_cost = f_encode_decode(w)['cost']
            updates = G.misc.optim.AdaMaxAvg([w], [w_avg],
                                             train_cost,
                                             alpha=-alpha,
                                             beta1=beta1,
                                             beta2=beta2,
                                             disconnected_inputs='ignore')
        elif optim == 'eve':
            f = lambda w: f_encode_decode(w)['cost']
            train_cost, updates = G.misc.optim.Eve(
                w,
                w_avg,
                f,
                alpha=-alpha,
                beta1=beta1,
                beta2=beta2,
                disconnected_inputs='ignore')
        updates = postup(updates, w)
        return G.function({'x': x}, train_cost, updates=updates, lazy=lazy)

    @lazy
    def f_train_q():
        keys_q = []
        for i in w:
            if '_q_' in i: keys_q.append(i)
        train_cost = f_encode_decode(w)['cost']
        updates = G.misc.optim.AdaMaxAvg([w],
                                         None,
                                         train_cost,
                                         alpha=-alpha,
                                         beta1=beta1,
                                         beta2=beta2,
                                         update_keys=keys_q,
                                         disconnected_inputs='ignore')
        updates = postup(updates, w)
        return G.function({'x': x}, train_cost, updates=updates, lazy=lazy)

    # Compile evaluation function
    @lazy
    def f_eval():
        results = f_encode_decode(w_avg, False)
        return G.function({'x': x}, results)

    # Compile epsilon generating function
    @lazy
    def f_eps_():
        n_batch = T.lscalar()
        n_batch.tag.test_value = 16
        eps = f_eps(n_batch, w)
        return G.function({'n_batch': n_batch}, eps, lazy=lazy)

    # Compile sampling function
    @lazy
    def f_decode():
        eps = {}
        for i in range(len(depths)):
            for j in range(depths[i]):
                eps['eps_' + str(i) + '_' + str(j)] = T.tensor4('eps' + str(i))
                eps['eps_' + str(i) + '_' +
                    str(j)].tag.test_value = np.random.randn(
                        n_batch_test, n_z, shape_x[1] / 2**(i + 1),
                        shape_x[2] / 2**(i + 1)).astype(floatX)
        image = f_decoder(eps, w_avg)
        return G.function(eps, image, lazy=lazy)

    return G.Struct(train=f_train,
                    eval=f_eval,
                    decode=f_decode,
                    eps=f_eps_,
                    w=w,
                    w_avg=w_avg)
コード例 #17
0
ファイル: models.py プロジェクト: SynthAI/SynthAI
def cvae_layer(name, prior, posterior, n_h1, n_h2, n_z, depth_ar, downsample,
               nl, kernel, weightsharing, downsample_type, w):

    if False:
        # New such that we can recognize variational params later
        name_q = name + '_q_'
        name_p = name + '_p_'
    else:
        name_q = name
        name_p = name

    n_conv_up1 = n_h2 + 2 * n_z
    n_conv_up2 = n_h2 + n_z

    n_conv_down_posterior = 0
    n_conv_down_prior = n_h2 + 2 * n_z

    # Prior
    prior_conv1 = None

    if prior in ['diag', 'diag2']:
        n_conv_down_prior = n_h2 + 2 * n_z
    elif prior == 'made':
        prior_conv1 = N.ar.multiconv2d(name_p + '_prior_conv1',
                                       n_z,
                                       depth_ar * [n_h2], [n_z, n_z],
                                       kernel,
                                       False,
                                       nl=nl,
                                       w=w)
        n_conv_down_prior = n_h2 + n_h2
    elif prior == 'bernoulli':
        n_conv_down_prior = n_h2 + n_z
        prior_conv1 = N.conv.conv2d(name_p + '_prior_conv1',
                                    n_z,
                                    n_z,
                                    kernel,
                                    w=w)
    else:
        raise Exception("Unknown prior")

    # Posterior
    posterior_conv1 = None
    posterior_conv2 = None
    posterior_conv3 = None
    posterior_conv4 = None

    if posterior == 'up_diag':
        pass
    elif posterior == 'up_iaf1':
        posterior_conv1 = N.ar.conv2d(name_q + '_posterior_conv1',
                                      n_z,
                                      n_z,
                                      kernel,
                                      w=w)
    elif posterior == 'up_iaf2':
        posterior_conv1 = N.ar.conv2d(name_q + '_posterior_conv1',
                                      n_z,
                                      2 * n_z,
                                      kernel,
                                      w=w)

    elif posterior == 'up_iaf1_nl':
        n_conv_up1 = n_h2 + 2 * n_z + n_h2
        posterior_conv1 = N.ar.multiconv2d(name_q + '_posterior_conv1',
                                           n_z,
                                           depth_ar * [n_h2],
                                           n_z,
                                           kernel,
                                           False,
                                           nl=nl,
                                           w=w)
    elif posterior == 'up_iaf2_nl':
        n_conv_up1 = n_h2 + 2 * n_z + n_h2
        posterior_conv1 = N.ar.multiconv2d(name_q + '_posterior_conv1',
                                           n_z,
                                           depth_ar * [n_h2], [n_z, n_z],
                                           kernel,
                                           False,
                                           nl=nl,
                                           w=w)

#    elif posterior == 'down_diag':
#        n_conv_down1 = n_h2+4*n_z
    elif posterior == 'down_diag':
        n_conv_up2 = n_h2
        n_conv_down_posterior = 2 * n_z
    elif posterior == 'down_bernoulli':
        n_conv_up2 = n_h2
        n_conv_down_posterior = n_z
    elif posterior == 'down_tim':
        pass
    elif posterior == 'down_iaf1':
        n_conv_up2 = n_h2
        n_conv_down_posterior = 2 * n_z
        posterior_conv1 = N.ar.conv2d(name_q + '_posterior_conv1',
                                      n_z,
                                      n_z,
                                      kernel,
                                      w=w)
    elif posterior == 'down_iaf2':
        n_conv_up2 = n_h2
        n_conv_down_posterior = 2 * n_z
        posterior_conv1 = N.ar.conv2d(name_q + '_posterior_conv1',
                                      n_z,
                                      2 * n_z,
                                      kernel,
                                      w=w)
    elif posterior == 'down_iaf1_nl':
        n_conv_up1 = n_h2 + 2 * n_z + n_h2
        n_conv_up2 = n_h2
        n_conv_down_posterior = 2 * n_z + n_h2
        posterior_conv1 = N.ar.multiconv2d(name_q + '_posterior_conv1',
                                           n_z,
                                           depth_ar * [n_h2],
                                           n_z,
                                           kernel,
                                           False,
                                           nl=nl,
                                           w=w)
    elif posterior == 'down_iaf2_nl':
        n_conv_up1 = n_h2 + 2 * n_z + n_h2
        n_conv_up2 = n_h2
        n_conv_down_posterior = 2 * n_z + n_h2
        posterior_conv1 = N.ar.multiconv2d(name_q + '_posterior_conv1',
                                           n_z,
                                           depth_ar * [n_h2], [n_z, n_z],
                                           kernel,
                                           False,
                                           nl=nl,
                                           w=w)
    elif posterior == 'down_iaf2_nl2':
        n_conv_up1 = n_h2 + 2 * n_z + n_h2
        n_conv_up2 = n_h2
        n_conv_down_posterior = 2 * n_z + n_h2
        posterior_conv1 = N.ar.multiconv2d(name_q + '_posterior_conv1',
                                           n_z,
                                           depth_ar * [n_h2], [n_z, n_z],
                                           kernel,
                                           False,
                                           nl=nl,
                                           w=w)
        posterior_conv2 = N.ar.multiconv2d(name_q + '_posterior_conv2',
                                           n_z,
                                           depth_ar * [n_h2], [n_z, n_z],
                                           kernel,
                                           True,
                                           nl=nl,
                                           w=w)
    elif posterior == 'down_iaf1_deep':
        n_conv_up1 = n_h2 + 2 * n_z + n_h2
        n_conv_up2 = n_h2
        n_conv_down_posterior = 2 * n_z + n_h2
        posterior_conv1 = N.ar.resnet(name_q + '_deepiaf',
                                      depth_ar,
                                      n_z,
                                      n_h2,
                                      n_z,
                                      kernel,
                                      False,
                                      nl=nl,
                                      weightsharing=weightsharing,
                                      w=w)
    elif posterior == 'down_iaf2_deep':
        n_conv_up1 = n_h2 + 2 * n_z + n_h2
        n_conv_up2 = n_h2
        n_conv_down_posterior = 2 * n_z + n_h2
        posterior_conv1 = N.ar.resnet(name_q + '_deepiaf',
                                      depth_ar,
                                      n_z,
                                      n_h2, [n_z, n_z],
                                      kernel,
                                      False,
                                      nl=nl,
                                      weightsharing=weightsharing,
                                      w=w)

    #elif posterior == 'iaf_deep1':
    #    extra1 = N.ar.resnet(name+'_posterior_2', depth_iaf, n_z, 2*n_h, n_h, n_z, (3,3), False, nl=nl, w=w)
    #elif posterior == 'iaf_deep2':
    #    extra1 = N.ar.resnet(name+'_posterior_2', depth_iaf, n_z, 2*n_h, n_h, [n_z,n_z], (3,3), False, nl=nl, w=w)
    else:
        raise Exception("Unknown posterior " + posterior)

    ds = 1
    if downsample:
        ds = 2
        if downsample_type == 'conv':
            up_conv3 = N.conv.conv2d(name_q + '_up_conv3',
                                     n_h1,
                                     n_h1,
                                     kernel,
                                     downsample=ds,
                                     w=w)
            down_conv3 = N.conv.conv2d(name_q + '_down_conv3',
                                       n_h1,
                                       n_h1,
                                       kernel,
                                       upsample=ds,
                                       w=w)

    up_nl1 = N.nonlinearity(name_q + "_up_nl1", nl)
    up_conv1 = N.conv.conv2d(name_q + '_up_conv1_' + str(ds),
                             n_h1,
                             n_conv_up1,
                             kernel,
                             downsample=ds,
                             w=w)
    up_nl2 = N.nonlinearity(name_q + "_nl_up2", nl)
    up_conv2 = N.conv.conv2d(name_q + '_up_conv2',
                             n_conv_up2,
                             n_h1,
                             kernel,
                             w=w)

    down_nl1 = N.nonlinearity(name_p + "_down_nl1", nl)
    down_conv1 = N.conv.conv2d(name_p + '_down_conv1',
                               n_h1,
                               n_conv_down_prior + n_conv_down_posterior,
                               kernel,
                               w=w)
    down_nl2 = N.nonlinearity(name_p + "_down_nl2", nl)
    down_conv2 = N.conv.conv2d(name_p + '_down_conv2_' + str(ds),
                               n_h2 + n_z,
                               n_h1,
                               kernel,
                               upsample=ds,
                               w=w)

    up_output = [None]
    qz = [None]
    up_context = [None]

    def up(input, w):

        h = up_conv1(up_nl1(input, w), w)
        h_det = h[:, :n_h2, :, :]
        qz_mean = h[:, n_h2:n_h2 + n_z, :, :]
        qz_logsd = h[:, n_h2 + n_z:n_h2 + 2 * n_z, :, :]
        qz[0] = N.rand.gaussian_diag(qz_mean, 2 * qz_logsd)
        if posterior == 'up_diag':
            h = T.concatenate([h_det, qz[0].sample], axis=1)
        elif posterior == 'up_iaf1':
            arw_mean = posterior_conv1(qz[0].sample, w)
            arw_mean *= .1
            qz[0].sample = (qz[0].sample - arw_mean)
            h = T.concatenate([h_det, qz[0].sample], axis=1)
        elif posterior == 'up_iaf2':
            arw_mean_logsd = posterior_conv1(qz[0].sample, w)
            arw_mean = arw_mean_logsd[:, ::2, :, :]
            arw_logsd = arw_mean_logsd[:, 1::2, :, :]
            arw_mean *= .1
            arw_logsd *= .1
            qz[0].sample = (qz[0].sample - arw_mean) / T.exp(arw_logsd)
            qz[0].logps += arw_logsd
            qz[0].logp += arw_logsd.flatten(2).sum(axis=1)
            h = T.concatenate([h_det, qz[0].sample], axis=1)
        elif posterior == 'up_iaf1_nl':
            context = h[:, n_h2 + 2 * n_z:n_h2 + 2 * n_z + n_h2]
            arw_mean = posterior_conv1(qz[0].sample, context, w)
            arw_mean *= .1
            qz[0].sample = (qz[0].sample - arw_mean)
            h = T.concatenate([h_det, qz[0].sample], axis=1)
        elif posterior == 'up_iaf2_nl':
            context = h[:, n_h2 + 2 * n_z:n_h2 + 2 * n_z + n_h2]
            arw_mean, arw_logsd = posterior_conv1(qz[0].sample, context, w)
            arw_mean *= .1
            arw_logsd *= .1
            qz[0].sample = (qz[0].sample - arw_mean) / T.exp(arw_logsd)
            qz[0].logps += arw_logsd
            qz[0].logp += arw_logsd.flatten(2).sum(axis=1)
            h = T.concatenate([h_det, qz[0].sample], axis=1)
        elif posterior == 'down_tim':
            h = T.concatenate([h_det, qz[0].mean], axis=1)
        elif posterior in [
                'down_iaf1_nl', 'down_iaf2_nl', 'down_iaf2_nl2',
                'down_iaf1_deep', 'down_iaf2_deep'
        ]:
            up_context[0] = h[:, n_h2 + 2 * n_z:n_h2 + 2 * n_z + n_h2]
            h = h_det
        elif posterior in [
                'down_diag', 'down_iaf1', 'down_iaf2', 'down_bernoulli'
        ]:
            h = h_det
        else:
            raise Exception()
        if downsample:
            if downsample_type == 'nn':
                input = N.conv.downsample2d_nearest_neighbour(input, 2)
            elif downsample_type == 'conv':
                input = up_conv3(input, w)
        output = input + .1 * up_conv2(up_nl2(h, w), w)
        up_output[0] = output

        return output

    def bernoulli_p(h):
        #p = T.clip(.5+.5*h, 1e-7, 1. - 1e-7)
        p = 1e-7 + (1 - 2e-7) * T.nnet.sigmoid(h)
        return p

    def down_q(input, train, w):

        #if name == '1':
        #    print input.tag.test_value

        # prior
        h = down_nl1(input, w)
        #h = T.printing.Print('h1'+name)(h)
        h = down_conv1(h, w)
        #h = T.printing.Print('h2'+name)(h)

        logqs = 0

        # posterior
        if posterior in [
                'up_diag', 'up_iaf1', 'up_iaf2', 'up_iaf1_nl', 'up_iaf2_nl'
        ]:
            z = qz[0].sample
            logqs = qz[0].logps
        elif posterior == 'down_diag':
            rz_mean = h[:, n_conv_down_prior:n_conv_down_prior + n_z, :, :]
            rz_logsd = h[:, n_conv_down_prior + n_z:n_conv_down_prior +
                         2 * n_z, :, :]
            _qz = N.rand.gaussian_diag(qz[0].mean + rz_mean,
                                       qz[0].logvar + 2 * rz_logsd)
            z = _qz.sample
            logqs = _qz.logps
        elif posterior == 'down_tim':
            assert prior == 'diag'
            pz_mean = h[:, n_h2:n_h2 + n_z, :, :]
            pz_logsd = h[:, n_h2 + n_z:n_h2 + 2 * n_z, :, :]

            qz_prec = 1. / T.exp(qz[0].logvar)
            pz_prec = 1. / T.exp(2 * pz_logsd)
            rz_prec = qz_prec + pz_prec
            rz_mean = (pz_prec / rz_prec) * pz_mean + (qz_prec /
                                                       rz_prec) * qz[0].mean
            _qz = N.rand.gaussian_diag(rz_mean, -T.log(rz_prec))
            z = _qz.sample
            logqs = _qz.logps
        elif posterior == 'down_iaf1':
            rz_mean = h[:, n_conv_down_prior:n_conv_down_prior + n_z, :, :]
            rz_logsd = h[:, n_conv_down_prior + n_z:n_conv_down_prior +
                         2 * n_z, :, :]
            _qz = N.rand.gaussian_diag(qz[0].mean + rz_mean,
                                       qz[0].logvar + 2 * rz_logsd)
            z = _qz.sample
            logqs = _qz.logps
            # ARW transform
            arw_mean = posterior_conv1(z, w)
            arw_mean *= .1
            z = (z - arw_mean)
        elif posterior == 'down_iaf2':
            rz_mean = h[:, n_conv_down_prior:n_conv_down_prior + n_z, :, :]
            rz_logsd = h[:, n_conv_down_prior + n_z:n_conv_down_prior +
                         2 * n_z, :, :]
            _qz = N.rand.gaussian_diag(qz[0].mean + rz_mean,
                                       qz[0].logvar + 2 * rz_logsd)
            z = _qz.sample
            logqs = _qz.logps
            # ARW transform
            arw_mean_logsd = posterior_conv1(z, w)
            arw_mean = arw_mean_logsd[:, ::2, :, :]
            arw_logsd = arw_mean_logsd[:, 1::2, :, :]
            arw_mean *= .1
            arw_logsd *= .1
            z = (z - arw_mean) / T.exp(arw_logsd)
            logqs += arw_logsd
        elif posterior in ['down_iaf1_nl', 'down_iaf1_deep']:
            rz_mean = h[:, n_conv_down_prior:n_conv_down_prior + n_z, :, :]
            rz_logsd = h[:, n_conv_down_prior + n_z:n_conv_down_prior +
                         2 * n_z, :, :]
            _qz = N.rand.gaussian_diag(qz[0].mean + rz_mean,
                                       qz[0].logvar + 2 * rz_logsd)
            z = _qz.sample
            logqs = _qz.logps
            # ARW transform
            down_context = h[:, n_conv_down_prior + 2 * n_z:n_conv_down_prior +
                             2 * n_z + n_h2, :, :]
            context = up_context[0] + down_context
            arw_mean = posterior_conv1(z, context, w)
            arw_mean *= .1
            z = (z - arw_mean)
        elif posterior in ['down_iaf2_nl', 'down_iaf2_nl2', 'down_iaf2_deep']:
            rz_mean = h[:, n_conv_down_prior:n_conv_down_prior + n_z, :, :]
            rz_logsd = h[:, n_conv_down_prior + n_z:n_conv_down_prior +
                         2 * n_z, :, :]
            _qz = N.rand.gaussian_diag(qz[0].mean + rz_mean,
                                       qz[0].logvar + 2 * rz_logsd)
            z = _qz.sample
            logqs = _qz.logps
            # ARW transform
            down_context = h[:, n_conv_down_prior + 2 * n_z:n_conv_down_prior +
                             2 * n_z + n_h2, :, :]
            context = up_context[0] + down_context
            arw_mean, arw_logsd = posterior_conv1(z, context, w)
            arw_mean *= .1
            arw_logsd *= .1
            z = (z - arw_mean) / T.exp(arw_logsd)
            logqs += arw_logsd
            if posterior == 'down_iaf2_nl2':
                arw_mean, arw_logsd = posterior_conv2(z, context, w)
                arw_mean *= .1
                arw_logsd *= .1
                z = (z - arw_mean) / T.exp(arw_logsd)
                logqs += arw_logsd

        # Prior
        if prior == 'diag':
            pz_mean = h[:, n_h2:n_h2 + n_z, :, :]
            pz_logsd = h[:, n_h2 + n_z:n_h2 + 2 * n_z, :, :]
            logps = N.rand.gaussian_diag(pz_mean, 2 * pz_logsd, z).logps
        elif prior == 'diag2':
            logps = N.rand.gaussian_diag(0 * z, 0 * z, z).logps
            pz_mean = h[:, n_h2:n_h2 + n_z, :, :]
            pz_logsd = h[:, n_h2 + n_z:n_h2 + 2 * n_z, :, :]
            z = pz_mean + z * T.exp(pz_logsd)
        elif prior == 'made':
            made_context = h[:, n_h2:2 * n_h2, :, :]
            made_mean, made_logsd = prior_conv1(z, made_context, w)
            made_mean *= .1
            made_logsd *= .1
            logps = N.rand.gaussian_diag(made_mean, 2 * made_logsd, z).logps
        elif prior == 'bernoulli':
            assert posterior == 'down_bernoulli'
            pz_p = bernoulli_p(h[:, n_h2:n_h2 + n_z, :, :])
            logps = z01 * T.log(pz_p) + (1. - z01) * T.log(1. - pz_p)
        else:
            raise Exception()

        h_det = h[:, :n_h2, :, :]
        h = T.concatenate([h_det, z], axis=1)
        if downsample:
            if downsample_type == 'nn':
                input = N.conv.upsample2d_nearest_neighbour(input)
            elif downsample_type == 'conv':
                input = down_conv3(input, w)

        output = input + .1 * down_conv2(down_nl2(h, w), w)

        return output, logqs - logps

    def down_p(input, eps, w):
        # prior
        h = down_conv1(down_nl1(input, w), w)
        h_det = h[:, :n_h2, :, :]
        if prior in ['diag', 'diag2']:
            mean_prior = h[:, n_h2:n_h2 + n_z, :, :]
            logsd_prior = h[:, n_h2 + n_z:n_h2 + 2 * n_z, :, :]
            z = mean_prior + eps * T.exp(logsd_prior)
        elif prior == 'made':
            print "TODO: SAMPLES FROM MADE PRIOR"
            z = eps
        elif prior == 'bernoulli':
            assert posterior == 'down_bernoulli'
            pz_p = bernoulli_p(h[:, n_h2:n_h2 + n_z, :, :])
            if False:
                z = N.rand.bernoulli(pz_p).sample
            else:
                print "Alert: Sampling using Gaussian approximation"
                z = pz_p + T.sqrt(pz_p * (1 - pz_p)) * eps
            z = prior_conv1(2 * z - 1, w)

        h = T.concatenate([h_det, z], axis=1)
        if downsample:
            if downsample_type == 'nn':
                input = N.conv.upsample2d_nearest_neighbour(input)
            elif downsample_type == 'conv':
                input = down_conv3(input, w)

        output = input + .1 * down_conv2(down_nl2(h, w), w)
        return output

    def postup(updates, w):
        modules = [up_conv1, up_conv2, down_conv1, down_conv2]
        if downsample and downsample_type == 'conv':
            modules += [up_conv3, down_conv3]
        if prior_conv1 != None:
            modules.append(prior_conv1)
        if posterior_conv1 != None:
            modules.append(posterior_conv1)
        if posterior_conv2 != None:
            modules.append(posterior_conv2)
        if posterior_conv3 != None:
            modules.append(posterior_conv3)
        if posterior_conv3 != None:
            modules.append(posterior_conv4)
        for m in modules:
            updates = m.postup(updates, w)
        return updates

    return G.Struct(up=up, down_q=down_q, down_p=down_p, postup=postup, w=w)
コード例 #18
0
ファイル: ar.py プロジェクト: SynthAI/SynthAI
def resnet(name,
           depth,
           n_in,
           n_h,
           n_out,
           size_kernel=(3, 3),
           flipmask=False,
           nl='elu',
           layertype='a',
           factor=4,
           weightsharing=False,
           w={}):

    if not isinstance(n_out, list) and isinstance(n_out, int):
        n_out = [n_out]

    conv_input = conv2d(name + '_input',
                        n_in,
                        n_h,
                        size_kernel,
                        False,
                        flipmask,
                        w=w)

    # parameters for hidden units
    resnet = []
    for i in range(depth):
        _name = name + '_' + str(i)
        if weightsharing:
            _name = name + '[sharedw]_' + str(i) + '[/sharedw]'
        if layertype == 'a':
            resnet.append(resnet_layer_a(_name, n_h, nl, w))
        elif layertype == 'b':
            resnet.append(resnet_layer_b(_name, n_h, factor, nl, w))
        else:
            raise Exception()

    # parameters for output
    conv_out = [
        conv2d(name + '_out_' + str(i),
               n_h,
               n_out[i],
               size_kernel,
               True,
               flipmask,
               w=w) for i in range(len(n_out))
    ]

    def f(h, h_context, w, return_hiddens=False):

        h = conv_input(h, w)
        if h_context != None:
            h += h_context

        hiddens = []
        for i in range(len(resnet)):
            h = resnet[i](h, w)
            hiddens.append(h)

        out = []
        for i in range(len(n_out)):
            _out = conv_out[i](h, w)
            out.append(_out)

        if len(n_out) == 1: out = out[0]

        if return_hiddens:
            return hiddens, out

        return out

    def postup(updates, w):
        for l in resnet:
            updates = l.postup(updates, w)
        for l in conv_out:
            updates = l.postup(updates, w)
        return updates

    return G.Struct(__call__=f, w=w, postup=postup)
コード例 #19
0
ファイル: ar.py プロジェクト: SynthAI/SynthAI
def multiconv2d(name,
                n_in,
                n_h,
                n_out,
                size_kernel,
                flipmask,
                nl='relu',
                w={}):

    if not isinstance(n_out, list) and isinstance(n_out, int):
        n_out = [n_out]

    # parameters for hidden units
    sizes = [n_in] + n_h
    conv_h = []
    f_nl_h = []
    for i in range(len(n_h)):
        conv_h.append(
            conv2d(name + '_' + str(i),
                   sizes[i],
                   sizes[i + 1],
                   size_kernel,
                   False,
                   flipmask,
                   w=w))
        f_nl_h.append(
            N.nonlinearity(name + '_' + str(i) + '_nl', nl, sizes[i + 1], w=w))

    # parameters for output
    conv_out = []
    for i in range(len(n_out)):
        conv_out.append(
            conv2d(name + '_out_' + str(i),
                   sizes[-1],
                   n_out[i],
                   size_kernel,
                   True,
                   flipmask,
                   w=w))

    def f(h, context, w, return_hiddens=False):
        # h_context can be None if n_context == 0

        hiddens = []
        for i in range(len(n_h)):
            h = conv_h[i](h, w)  # + context
            if i == 0: h += context
            h = f_nl_h[i](h, w)
            hiddens.append(h)

        out = []
        for i in range(len(n_out)):
            _out = conv_out[i](h, w)
            out.append(_out)

        if len(n_out) == 1: out = out[0]

        if return_hiddens:
            return hiddens, out

        return out

    def postup(updates, w):
        for l in conv_h:
            updates = l.postup(updates, w)
        for l in conv_out:
            updates = l.postup(updates, w)
        return updates

    return G.Struct(__call__=f, w=w, postup=postup)
コード例 #20
0
ファイル: ar.py プロジェクト: SynthAI/SynthAI
def conv2d(name,
           n_in,
           n_out,
           size_kernel=(3, 3),
           zerodiagonal=True,
           flipmask=False,
           pad_channel=True,
           border_mode='valid',
           zeroinit=False,
           l2norm=True,
           w={}):

    do_scale = False
    if zeroinit:
        l2norm = False
        do_scale = True

    if not pad_channel:
        border_mode = 'same'
        print 'No pad_channel, changing border_mode to same'

    #if 'whitener' not in name:
    #    pad_channel = False
    #    border_mode = 'same'

    if '[sharedw]' in name and '[/sharedw]' in name:
        name_w = name
        pre, b = name.split("[sharedw]")
        c, post = b.split("[/sharedw]")
        name_w = pre + "[s]" + post
        name = pre + c + post  # Don't share the bias and scales
        #name = name_w # Also share the bias and scales
    else:
        name_w = name

    assert border_mode in ['valid', 'full', 'same']

    _n_in = n_in

    if pad_channel:
        if size_kernel[0] > 1 or size_kernel[1] > 1:
            assert size_kernel[0] == size_kernel[1]
            assert border_mode == 'valid'
            _n_in += 1
        else:
            pad_channel = False

    if border_mode == 'same':
        assert size_kernel[0] % 2 == 1
        border_mode = ((size_kernel[0] - 1) / 2, (size_kernel[1] - 1) / 2)

    if True:
        # Build autoregressive mask
        l = (size_kernel[0] - 1) / 2
        m = (size_kernel[1] - 1) / 2
        mask = np.ones((n_out, _n_in, size_kernel[0], size_kernel[1]),
                       dtype=G.floatX)
        mask[:, :, :l, :] = 0
        mask[:, :, l, :m] = 0

        if n_out >= n_in:
            assert n_out % n_in == 0
            k = n_out / n_in
            for i in range(n_in):
                mask[i * k:(i + 1) * k, i + 1:, l, m] = 0
                if zerodiagonal:
                    mask[i * k:(i + 1) * k, i:i + 1, l, m] = 0
        else:
            assert n_in % n_out == 0
            k = n_in / n_out
            for i in range(n_out):
                mask[i:i + 1, (i + 1) * k:, l, m] = 0
                if zerodiagonal:
                    mask[i:i + 1, i * k:(i + 1) * k:, l, m] = 0
        if flipmask:
            mask = mask[::-1, ::-1, ::-1, ::-1]

    def l2normalize(kerns):
        if zerodiagonal:
            # to prevent NaN gradients
            # TODO: smarter solution (also see below)
            l = (size_kernel[0] - 1) / 2
            m = (size_kernel[1] - 1) / 2
            if n_out >= n_in:
                kerns = T.set_subtensor(kerns[:n_out / n_in, :, l, m], 0.)
            else:
                kerns = T.set_subtensor(kerns[:1, :, l, m], 0.)

        targetnorm = 1.
        norm = T.sqrt((kerns**2).sum(axis=(1, 2, 3), keepdims=True))
        norm += 1e-8
        return kerns * (targetnorm / norm)

    def maxconstraint(kerns):
        return kerns * (maxweight / T.maximum(
            maxweight,
            abs(kerns).max(axis=(1, 2, 3), keepdims=True)))

    if zeroinit:
        w[name_w + '_w'] = G.sharedf(
            np.zeros((n_out, _n_in, size_kernel[0], size_kernel[1])))
    else:
        w[name_w + '_w'] = G.sharedf(
            mask * 0.05 *
            np.random.randn(n_out, _n_in, size_kernel[0], size_kernel[1]))
        if maxweight > 0:
            w[name_w + '_w'].set_value(
                maxconstraint(w[name_w + '_w']).tag.test_value)

    w[name + '_b'] = G.sharedf(np.zeros((n_out, )))

    if l2norm or do_scale:
        if logscale:
            w[name + '_s'] = G.sharedf(np.zeros((n_out, )))
        else:
            w[name + '_s'] = G.sharedf(np.ones((n_out, )))
    elif do_constant_rescale:
        print 'WARNING: constant rescale, these weights arent saved'
        constant_rescale = G.sharedf(np.ones((n_out, )))

    def f(h, w):
        input_shape = h.tag.test_value.shape[1:]

        _input = h

        if pad_channel:
            h = N.conv.pad2dwithchannel(h, size_kernel)

        kerns = mask * w[name_w + '_w']
        if l2norm:
            kerns = l2normalize(kerns)
        if l2norm or do_scale:
            if logscale:
                kerns *= T.exp(logscale_scale * w[name + '_s']).dimshuffle(
                    0, 'x', 'x', 'x')
            else:
                kerns *= w[name + '_s'].dimshuffle(0, 'x', 'x', 'x')
        elif do_constant_rescale:
            kerns *= constant_rescale.dimshuffle(0, 'x', 'x', 'x')

        h = N.conv.dnn_conv(h, kerns, border_mode=border_mode)

        # Center
        if bn:  # mean-only batch norm
            h -= h.mean(axis=(0, 2, 3), keepdims=True)

        h += w[name + '_b'].dimshuffle('x', 0, 'x', 'x')

        if '__init' in w and not zeroinit:

            # Std
            data_std = h.std(axis=(0, 2, 3))
            num_zeros = (data_std.tag.test_value == 0).sum()
            if num_zeros > 0:
                print "Warning: Stdev=0 for " + str(
                    num_zeros
                ) + " features in " + name + ". Skipping data-dependent init."
            else:
                if name + '_s' in w:
                    if logscale:
                        w[name + '_s'].set_value(
                            -T.log(data_std).tag.test_value / logscale_scale)
                    else:
                        w[name + '_s'].set_value(
                            (1. / data_std).tag.test_value)
                elif do_constant_rescale:
                    constant_rescale.set_value((1. / data_std).tag.test_value)
                    #w[name+'_w'].set_value((kerns / std.dimshuffle(0,'x','x','x')).tag.test_value)

                h /= data_std.dimshuffle('x', 0, 'x', 'x')

                # Mean
                mean = h.mean(axis=(0, 2, 3))
                w[name + '_b'].set_value(-mean.tag.test_value)
                h -= mean.dimshuffle('x', 0, 'x', 'x')

            #print name, w[name+'_w'].get_value().mean(), w[name+'_w'].get_value().std(), w[name+'_w'].get_value().max()

        if not '__init' in w:
            output_shape = h.tag.test_value.shape[1:]
            print 'ar.conv2d', name, input_shape, output_shape, size_kernel, zerodiagonal, flipmask, pad_channel, border_mode, zeroinit, l2norm

        #print name, abs(h).max().tag.test_value, abs(h).min().tag.test_value
        #h = T.printing.Print(name)(h)

        return h

    # Normalize weights to _norm L2 norm
    # TODO: check whether only_upper_bounds here really helps
    # (the effect is a higher learning rate in the beginning of training)
    def postup(updates, w):
        updates[w[name_w + '_w']] = mask * updates[w[name_w + '_w']]
        if l2norm and maxweight > 0.:
            updates[w[name_w + '_w']] = maxconstraint(updates[w[name_w +
                                                                '_w']])
        return updates

    return G.Struct(__call__=f, w=w, postup=postup)
コード例 #21
0
ファイル: ar.py プロジェクト: SynthAI/SynthAI
def linear(name, n_in, n_out, diagonalzeros, l2norm=True, w={}):
    assert n_in % n_out == 0 or n_out % n_in == 0

    mask = np.ones((n_in, n_out), dtype=G.floatX)
    if n_out >= n_in:
        k = n_out / n_in
        for i in range(n_in):
            mask[i + 1:, i * k:(i + 1) * k] = 0
            if diagonalzeros:
                mask[i:i + 1, i * k:(i + 1) * k] = 0
    else:
        k = n_in / n_out
        for i in range(n_out):
            mask[(i + 1) * k:, i:i + 1] = 0
            if diagonalzeros:
                mask[i * k:(i + 1) * k:, i:i + 1] = 0

    # L2 normalization of weights
    def l2normalize(_w, axis=0):
        if diagonalzeros:
            # to prevent NaN gradients
            # TODO: smarter solution (also see below)
            if n_out >= n_in:
                _w = T.set_subtensor(_w[:, :n_out / n_in], 0.)
            else:
                _w = T.set_subtensor(_w[:, :1], 0.)
        targetnorm = 1.
        norm = T.sqrt((_w**2).sum(axis=axis, keepdims=True))
        norm += 1e-8
        new_w = _w * (targetnorm / norm)
        return new_w

    def maxconstraint(_w):
        return _w * (maxweight / T.maximum(maxweight,
                                           abs(_w).max(axis=0, keepdims=True)))

    w[name + '_w'] = G.sharedf(mask * 0.05 * np.random.randn(n_in, n_out))
    if maxweight > 0:
        w[name + '_w'].set_value(maxconstraint(w[name + '_w']).tag.test_value)

    w[name + '_b'] = G.sharedf(np.zeros((n_out, )))
    if l2norm:
        if logscale:
            w[name + '_s'] = G.sharedf(np.zeros((n_out, )))
        else:
            w[name + '_s'] = G.sharedf(np.ones((n_out, )))
    elif do_constant_rescale:
        print 'WARNING: constant rescale, these weights arent saved'
        constant_rescale = G.sharedf(np.zeros((n_out, )))

    def f(h, w):
        _input = h
        _w = mask * w[name + '_w']
        if l2norm:
            _w = l2normalize(_w)
        h = T.dot(h, _w)
        if l2norm:
            if logscale:
                h *= T.exp(logscale_scale * w[name + '_s'])
            else:
                h *= abs(w[name + '_s'])
        elif do_constant_rescale:
            h *= T.exp(constant_rescale)

        h += w[name + '_b']

        if '__init' in w:
            # Std
            std = (1. / init_stdev) * h.std(axis=0)
            std += (std <= 0)
            std += 1e-8
            if name + '_s' in w:
                if logscale:
                    w[name + '_s'].set_value(-T.log(std).tag.test_value /
                                             logscale_scale)
                else:
                    w[name + '_s'].set_value((1. / std).tag.test_value)
            elif do_constant_rescale:
                constant_rescale.set_value(-T.log(std).tag.test_value)
                #w[name+'_w'].set_value((_w / std.dimshuffle('x',0)).tag.test_value)

            h /= std.dimshuffle('x', 0)

            # Mean
            mean = h.mean(axis=0)
            w[name + '_b'].set_value(-mean.tag.test_value)
            h -= mean.dimshuffle('x', 0)

            #print name, w[name+'_w'].get_value().mean(), w[name+'_w'].get_value().std(), w[name+'_w'].get_value().max()

        #print name, abs(h).max().tag.test_value, abs(h).min().tag.test_value
        #h = T.printing.Print(name)(h)

        return h

    # Post updates: normalize weights to unit L2 norm
    def postup(updates, w):
        updates[w[name + '_w']] = mask * updates[w[name + '_w']]
        if l2norm and maxweight > 0.:
            updates[w[name + '_w']] = maxconstraint(updates[w[name + '_w']])
        return updates

    return G.Struct(__call__=f, postup=postup, w=w)
コード例 #22
0
def linear_l2(name, n_in, n_out, w):
    
    # L2 normalization of weights
    def l2normalize(_w):
        targetnorm=1.
        norm = T.sqrt((_w**2).sum(axis=0, keepdims=True))
        return _w * (targetnorm / norm)
    def maxconstraint(_w):
        return _w * (maxweight / T.maximum(maxweight, abs(_w).max(axis=0, keepdims=True)))
    
    w[name+'_w'] = G.sharedf(0.05*np.random.randn(n_in,n_out))
    
    if maxweight > 0:
        w[name+'_w'].set_value(maxconstraint(w[name+'_w']).tag.test_value)
    w[name+'_b'] = G.sharedf(np.zeros((n_out,)))
    if l2norm:
        if logscale:
            w[name+'_s'] = G.sharedf(np.zeros((n_out,)))
        else:
            w[name+'_s'] = G.sharedf(np.ones((n_out,)))
    else:
        print 'WARNING: constant rescale, these weights arent saved'
        constant_rescale = G.sharedf(np.zeros((n_out,)))
    
    
    def f(h, w):
        _w = w[name+'_w']
        if l2norm:
            _w = l2normalize(_w)
        h = T.dot(h, _w)
        if l2norm:
            if logscale:
                h *= T.exp(logscale_scale*w[name+'_s'])
            else:
                h *= abs(w[name+'_s'])
        else:
            h *= T.exp(constant_rescale)
        h += w[name+'_b']
        
        if '__init' in w:
            # Std
            std = (1./init_stdev) * h.std(axis=0) + 1e-8
            if name+'_s' in w:
                if logscale:
                    w[name+'_s'].set_value(-T.log(std).tag.test_value/logscale_scale)
                else:
                    w[name+'_s'].set_value((1./std).tag.test_value)
            else:
                constant_rescale.set_value(-T.log(std).tag.test_value)
                #w[name+'_w'].set_value((_w / std.dimshuffle('x',0)).tag.test_value)
            
            h /= std.dimshuffle('x',0)
            
            # Mean
            mean = h.mean(axis=0)
            w[name+'_b'].set_value(-mean.tag.test_value)
            h -= mean.dimshuffle('x',0)
            
            #print name, abs(w[name+'_w']).get_value().mean(), w[name+'_w'].get_value().std(), w[name+'_w'].get_value().max()

        #print name, abs(h).max().tag.test_value, abs(h).min().tag.test_value
        #h = T.printing.Print(name)(h)
        
        return h
    
    # Post updates: normalize weights to unit L2 norm
    def postup(updates, w):
        if l2norm and maxweight>0:
            updates[w[name+'_w']] = maxconstraint(updates[w[name+'_w']])
        return updates
    
    return G.Struct(__call__=f, postup=postup, w=w)
コード例 #23
0
ファイル: models.py プロジェクト: SynthAI/SynthAI
def fcvae(shape_x,
          depth_model,
          depth_ar,
          n_h1,
          n_h2,
          n_z,
          posterior,
          px='logistic',
          nl='softplus',
          alpha=0.002,
          beta1=0.1,
          beta2=0.001,
          share_w=False,
          data_init=None):
    _locals = locals()
    _locals.pop('data_init')
    print 'CVAE9 with ', _locals
    #assert posterior in ['diag1','diag2','iaf_linear','iaf_nonlinear']
    assert px in ['logistic', 'bernoulli']
    w = {}  # model params

    kernel_h = (1, 1)
    n_x = shape_x[0] * shape_x[1] * shape_x[2]

    # Input whitening
    if px == 'logistic':
        w['logsd_x'] = G.sharedf(0.)

    # encoder
    x_enc = N.conv.conv2d('x_enc', n_x, n_h1, (1, 1), w=w)
    x_dec = N.conv.conv2d('x_dec', n_h1, n_x, (1, 1), w=w)
    x_dec_nl = N.nonlinearity('x_dec_nl', nl, n_h1, w)

    layers = []
    for i in range(depth_model):
        name = str(i)
        if share_w:
            name = '[sharedw]' + str(i) + '[/sharedw]'
        layers.append(
            cvae_layer(name, posterior, n_h1, n_h2, n_z, depth_ar, False, nl,
                       kernel_h, share_w, w))

    # top-level value
    #w['h_top'] = G.sharedf(np.zeros((n_h1,)))
    w['h_top'] = G.sharedf(np.random.normal(0, 0.01, size=(n_h1, )))

    # Initialize variables
    x = T.tensor4('x')
    x.tag.test_value = data_init['x']
    n_batch_test = data_init['x'].shape[0]
    _x = T.clip(x / 255., 0, 1)

    # Objective function
    def f_cost(w, train=True):

        results = {}

        h = x_enc(_x.reshape((-1, n_x, 1, 1)) - .5, w)

        obj_logpz = 0
        obj_logqz = 0

        # bottom-up encoders
        for i in range(depth_model):
            h = layers[i].up(h, w)

        # top-level activations
        h = T.tile(w['h_top'].dimshuffle('x', 0, 'x', 'x'),
                   (_x.shape[0], 1, 1, 1))

        # top-down priors, posteriors and decoders
        for i in list(reversed(range(depth_model))):
            h, _obj_logqz, _obj_logpz = layers[i].down_q(h, train, w)
            obj_logqz += _obj_logqz
            obj_logpz += _obj_logpz
            results['cost_z' + str(i).zfill(3)] = _obj_logqz - _obj_logpz

        output = .1 * x_dec(x_dec_nl(h, w), w).reshape(
            (-1, shape_x[0], shape_x[1], shape_x[2]))

        # empirical distribution
        if px == 'logistic':
            mean_x = T.clip(output, -.5, .5)
            logsd_x = 0 * mean_x + w['logsd_x']
            obj_logpx = N.rand.discretized_logistic(mean_x, logsd_x, 1 / 255.,
                                                    _x - .5).logp

            obj = obj_logpz - obj_logqz + obj_logpx
            # Compute the bits per pixel
            obj *= (1. / np.prod(shape_x) * 1. / np.log(2.)).astype('float32')

        elif px == 'bernoulli':
            prob_x = T.nnet.sigmoid(output)
            prob_x = T.minimum(prob_x, 1 - 1e-7)
            prob_x = T.maximum(prob_x, 1e-7)
            #prob_x = T.printing.Print('prob_x')(prob_x)
            obj_logpx = N.rand.bernoulli(prob_x, _x).logp

            #obj_logqz = T.printing.Print('obj_logqz')(obj_logqz)
            #obj_logpz = T.printing.Print('obj_logpz')(obj_logpz)
            #obj_logpx = T.printing.Print('obj_logpx')(obj_logpx)
            obj = obj_logpz - obj_logqz + obj_logpx
            #obj = T.printing.Print('obj')(obj)

        results['cost_x'] = -obj_logpx
        results['cost'] = -obj
        return results

        #print 'obj_logpz', obj_logpz.tag.test_value
        #print 'obj_logqz', obj_logqz.tag.test_value
        #print 'obj_logpx', obj_x.tag.test_value
        #obj_logpz = T.printing.Print('obj_logpz')(obj_logpz)
        #obj_logqz = T.printing.Print('obj_logqz')(obj_logqz)
        #obj_x = T.printing.Print('obj_logpx')(obj_x)

    # Turns Gaussian noise 'eps' into a sample
    def f_decoder(eps, w):

        # top-level activations
        h = T.tile(w['h_top'].dimshuffle('x', 0, 'x', 'x'),
                   (eps['eps_0'].shape[0], 1, 1, 1))

        # top-down priors, posteriors and decoders
        for i in list(reversed(range(depth_model))):
            h = layers[i].down_p(h, eps['eps_' + str(i)], w)

        output = .1 * x_dec(x_dec_nl(h, w), w).reshape(
            (-1, shape_x[0], shape_x[1], shape_x[2]))
        if px == 'logistic':
            mean_x = T.clip(output[:, :, :, :] + .5, 0, 1)
        elif px == 'bernoulli':
            mean_x = T.nnet.sigmoid(output)
        image = (255. * T.clip(mean_x, 0, 1)).astype('uint8')
        return image

    def f_eps(n_batch, w):
        eps = {}
        for i in range(depth_model):
            eps['eps_' + str(i)] = G.rng_curand.normal((n_batch, n_z, 1, 1),
                                                       dtype=floatX)
        return eps

    def postup(updates, w):
        nodes = [x_enc, x_dec]
        for n in nodes:
            updates = n.postup(updates, w)
        for i in range(depth_model):
            updates = layers[i].postup(updates, w)

        return updates

    # Compile init function
    if data_init != None:
        w['__init'] = OrderedDict()
        f_cost(w)
        w.pop('__init')
        #for i in w: print i, abs(w[i].get_value()).min(), abs(w[i].get_value()).max(), abs(w[i].get_value()).mean()

    # Compile training function
    results = f_cost(w)
    updates, (w_avg, ) = G.misc.optim.AdaMaxAvg([w],
                                                results['cost'],
                                                alpha=-alpha,
                                                beta1=beta1,
                                                beta2=beta2,
                                                disconnected_inputs='ignore')
    #todo: replace postup with below
    #w['_updates'] = updates
    #f_cost(w)
    #updates = w.pop('_updates')

    updates = postup(updates, w)
    f_train = G.function({'x': x}, results['cost'], updates=updates)

    # Compile evaluation function
    results = f_cost(w_avg, False)
    f_eval = G.function({'x': x}, results)

    # Compile epsilon generating function
    n_batch = T.lscalar()
    n_batch.tag.test_value = 16
    eps = f_eps(n_batch, w)
    f_eps = G.function({'n_batch': n_batch}, eps)

    # Compile sampling function
    eps = {}
    for i in range(depth_model):
        eps['eps_' + str(i)] = T.tensor4('eps' + str(i))
        eps['eps_' + str(i)].tag.test_value = np.random.randn(
            n_batch_test, n_z, 1, 1).astype(floatX)
    image = f_decoder(eps, w_avg)
    f_decode = G.function(eps, image)

    return G.Struct(train=f_train,
                    eval=f_eval,
                    decode=f_decode,
                    eps=f_eps,
                    w=w,
                    w_avg=w_avg)
コード例 #24
0
ファイル: rand.py プロジェクト: SynthAI/SynthAI
def RandomVariable(sample, logp, entr, **params):
    return G.Struct(sample=sample, logp=logp, entr=entr, **params)
コード例 #25
0
def function(x, y, lazy=False, _debug=False, checknan='raise', **kwargs):
    # Default keyword arguments
    if not kwargs.has_key('on_unused_input'):
        kwargs['on_unused_input'] = 'warn'
    if not kwargs.has_key('mode'):
        kwargs['mode'] = default_function_mode
    # Order the input dict
    x = ndict.ordered(ndict.flatten(x))
    # Check the output dict
    return_single_y = False
    if not isinstance(y, dict):
        return_single_y = True
        y = {str(y): y}
    y = ndict.ordered(y)
    # Lazily compiled function (saves a lot of time)
    f = [None]

    def _compile(verbose=True):
        t0 = time.time()
        print 'Compiling... ',
        #print '[graphy] Compiling function '+str(x.keys())+' => '+str(y.keys())+' ...'
        sys.stdout.flush()
        f[0] = theano.function(x.values(), y.values(), **kwargs)
        print "%.2f" % (time.time() - t0), 's'

    if not lazy:
        _compile()
    # The function to be called
    def func(data, n_batch=0, randomorder=True, data_global={}):
        data = ndict.ordered(ndict.flatten(data))
        data_global = ndict.ordered(ndict.flatten(data_global))
        # Check if keys of 'x' and 'inputs' match
        allkeys = (data.keys() + data_global.keys())
        for i in range(len(data)):
            if x.keys()[i] not in allkeys:
                raise Exception('Non-matching keys:' + str(allkeys) + ' vs. ' +
                                str(x.keys()))
        # Compile function if not already done
        if f[0] == None:
            _compile()
        if n_batch <= 0:
            # Get results
            _data = data.copy()
            _data.update(data_global)
            inputs_ordered = ndict.orderedvals((_data, ))
            _result = f[0](*inputs_ordered)
            # Put it in a dictionary with the corresponding keys
            result = {y.keys()[i]: _result[i] for i in range(len(y))}
        else:
            # Minibatch-based evaluation.
            # This assumes that input and output are tensors, and the first dimension iterates of datapoints
            n_tot = data.itervalues().next().shape[0]
            n_minibatches = int(math.ceil(1. * n_tot / n_batch))

            n_tile = 1
            if n_batch > n_tot:
                assert n_batch % n_tot == 0
                n_tile = n_batch / n_tot

            indices = np.tile(np.arange(n_tot), n_tile)
            if randomorder:
                np.random.shuffle(indices)
                adict = dict(zip(np.tile(np.arange(n_tot), n_tile), indices))
                indices_inverse = sorted(adict, key=adict.get)

            results = []
            for i in range(n_minibatches):
                data_minibatch = ndict.getRowsFromIndices(
                    data, indices[i * n_batch:(i + 1) * n_batch])
                data_minibatch.update(data_global)
                inputs_ordered = ndict.orderedvals((data_minibatch, ))
                results.append(f[0](*inputs_ordered))
                if _debug:
                    print 'Function debug', i, results[-1]
                if checknan == 'raise':
                    if np.isnan(np.sum(results[-1])):
                        print results[-1]
                        raise Exception("NaN detected")
            result = {
                y.keys()[i]:
                np.concatenate([results[j][i] for j in range(n_minibatches)])
                for i in range(len(y))
            }
            if randomorder:
                result = ndict.getRowsFromIndices(result, indices_inverse)

        result = OrderedDict(sorted(result.items()))

        # Return result
        #raise Exception()
        if return_single_y:
            return result[result.keys()[0]]
        return result

    # Return the func
    return G.Struct(__call__=func, f=f)
コード例 #26
0
ファイル: conv.py プロジェクト: SynthAI/SynthAI
def conv2d(name, n_in, n_out, size_kernel=(3,3), pad_channel=True, border_mode='valid', downsample=1, upsample=1, datainit=True, zeroinit=False, l2norm=True,  w={}):
    
    # TODO FIX: blows up parameters if all inputs are 0
    
    if not pad_channel:
        border_mode = 'same'
        print 'No pad_channel, changing border_mode to same'

    if '[sharedw]' in name and '[/sharedw]' in name:
        name_w = name
        pre, b = name.split("[sharedw]")
        number, post = b.split("[/sharedw]")
        name_w = pre+"[s]"+post
        name = pre+number+post # Don't share the bias and scales
        #name = name_w # Also share the bias and scales
    else:
        name_w = name
    
    if type(downsample) == int:
        downsample = (downsample,downsample)
    assert type(downsample) == tuple
    assert border_mode in ['valid','full','same']
    
    _n_in = n_in
    _n_out = n_out
    if upsample > 1:
        _n_out = n_out * upsample**2
    
    if pad_channel:
        if size_kernel[0] > 1 or size_kernel[1] > 1:
            assert size_kernel[0] == size_kernel[1]
            assert border_mode == 'valid'
            _n_in += 1
        else:
            pad_channel = False
    
    if border_mode == 'same':
        assert size_kernel[0]%2 == 1
        border_mode = ((size_kernel[0]-1)/2,(size_kernel[1]-1)/2)
    
    def l2normalize(kerns):
        norm = T.sqrt((kerns**2).sum(axis=(1,2,3), keepdims=True))
        return kerns / norm
    def maxconstraint(kerns):
        return kerns * (maxweight / T.maximum(maxweight, abs(kerns).max(axis=(1,2,3), keepdims=True)))

    if zeroinit:
        w[name_w+'_w'] = G.sharedf(np.zeros((_n_out, _n_in, size_kernel[0], size_kernel[1])))
        datainit = False
    else: 
        w[name_w+'_w'] = G.sharedf(0.05*np.random.randn(_n_out, _n_in, size_kernel[0], size_kernel[1]))
        if maxweight > 0:
            w[name_w+'_w'].set_value(maxconstraint(w[name_w+'_w']).tag.test_value)
    
    w[name+'_b'] = G.sharedf(np.zeros((_n_out,)))
    if bias_logscale:
        w[name+'_bs'] = G.sharedf(0.)
    
    if l2norm:
        if logscale:
            w[name+'_s'] = G.sharedf(np.zeros((_n_out,)))
        else:
            w[name+'_s'] = G.sharedf(np.ones((_n_out,)))
    elif do_constant_rescale:
        print 'WARNING: constant rescale, these weights arent saved'
        constant_rescale = G.sharedf(np.ones((_n_out,)))
    
    
    def f(h, w):
        
        input_shape = h.tag.test_value.shape[1:]

        _input = h
        
        if pad_channel:
            h = pad2dwithchannel(h, size_kernel)

        kerns = w[name_w+'_w']
        #if name == '1_down_conv1':
        #    kerns = T.printing.Print('kerns 1')(kerns)
        if l2norm:
            kerns = l2normalize(kerns)
            if logscale:
                kerns *= T.exp(logscale_scale*w[name+'_s']).dimshuffle(0,'x','x','x')
            else:
                kerns *= w[name+'_s'].dimshuffle(0,'x','x','x')
        elif do_constant_rescale:
            kerns *= constant_rescale.dimshuffle(0,'x','x','x')
        
        #if name == '1_down_conv1':
        #    kerns = T.printing.Print('kerns 2')(kerns)
        
        h = dnn_conv(h, kerns, border_mode=border_mode, subsample=downsample)

        # Mean-only batch norm
        if bn: 
            h -= h.mean(axis=(0,2,3), keepdims=True)
        
        _b = w[name+'_b'].dimshuffle('x',0,'x','x')
        if bias_logscale:
            _b *= T.exp(logscale_scale * w[name+'_bs'])
        h += _b
        
        if '__init' in w and datainit:
            
            # Std
            data_std = h.std(axis=(0,2,3))
            num_zeros = (data_std.tag.test_value == 0).sum()
            if num_zeros > 0:
                print "Warning: Stdev=0 for "+str(num_zeros)+" features in "+name+". Skipping data-dependent init."
            else:
                
                std = (1./init_stdev) * data_std
                std += 1e-7
                
                if name+'_s' in w:
                    if logscale:
                        w[name+'_s'].set_value(-T.log(std).tag.test_value/logscale_scale)
                    else:
                        w[name+'_s'].set_value((1./std).tag.test_value)
                elif do_constant_rescale:
                    constant_rescale.set_value((1./std).tag.test_value)
                
                h /= std.dimshuffle('x',0,'x','x')
                
                # Mean
                mean = h.mean(axis=(0,2,3))
                w[name+'_b'].set_value(-mean.tag.test_value)
                h -= mean.dimshuffle('x',0,'x','x')
            
                #print name, w[name+'_w'].get_value().mean(), w[name+'_w'].get_value().std(), w[name+'_w'].get_value().max()
        
        if upsample>1:
            h = depool2d_split(h, factor=upsample)
        
        if not '__init' in w:
            output_shape = h.tag.test_value.shape[1:]
            print 'conv2d', name, input_shape, output_shape, size_kernel, pad_channel, border_mode, downsample, upsample
        
        #print name, abs(h).max().tag.test_value, abs(h).min().tag.test_value
        #h = T.printing.Print(name)(h)
        
        return h
    
    # Normalize weights to _norm L2 norm
    # TODO: check whether only_upper_bounds here really helps
    # (the effect is a higher learning rate in the beginning of training)
    def postup(updates, w):
        if l2norm and maxweight>0.:
            updates[w[name_w+'_w']] = maxconstraint(updates[w[name_w+'_w']])
        return updates
    
    return G.Struct(__call__=f, w=w, postup=postup)