Beispiel #1
0
def test_nested():
    notimpl = NotImplementedOp()
    ifelseifelseif = IfElseIfElseIf()

    x1 = tt.scalar("x1")
    x2 = tt.scalar("x2")
    c1 = tt.scalar("c1")
    c2 = tt.scalar("c2")
    t1 = ifelse(c1, x1, notimpl(x2))
    t1.name = "t1"
    t2 = t1 * 10
    t2.name = "t2"
    t3 = ifelse(c2, t2, x1 + t1)
    t3.name = "t3"
    t4 = ifelseifelseif(tt.eq(x1, x2), x1, tt.eq(x1, 5), x2, c2, t3, t3 + 0.5)
    t4.name = "t4"

    linker = theano.link.vm.VMLinker(lazy=False)
    f = function([c1, c2, x1, x2], t4, mode=Mode(linker=linker, optimizer="fast_run"))
    with pytest.raises(NotImplementedOpException):
        f(1, 0, np.array(10, dtype=x1.dtype), 0)

    linker = theano.link.vm.VMLinker(lazy=True)
    f = function([c1, c2, x1, x2], t4, mode=Mode(linker=linker, optimizer="fast_run"))
    assert f(1, 0, np.array(10, dtype=x1.dtype), 0) == 20.5
Beispiel #2
0
def more_complex_test():
    notimpl = NotImplementedOp()
    ifelseifelseif = IfElseIfElseIf()

    x1 = T.scalar('x1')
    x2 = T.scalar('x2')
    c1 = T.scalar('c1')
    c2 = T.scalar('c2')
    t1 = ifelse(c1, x1, notimpl(x2))
    t1.name = 't1'
    t2 = t1 * 10
    t2.name = 't2'
    t3 = ifelse(c2, t2, x1 + t1)
    t3.name = 't3'
    t4 = ifelseifelseif(T.eq(x1, x2), x1, T.eq(x1, 5), x2, c2, t3, t3 + 0.5)
    t4.name = 't4'

    f = function([c1, c2, x1, x2],
                 t4,
                 mode=Mode(linker='vm', optimizer='fast_run'))
    if theano.config.vm.lazy is False:
        try:
            f(1, 0, numpy.array(10, dtype=x1.dtype), 0)
            assert False
        except NotImplementedOp.E:
            pass
    else:
        print(f(1, 0, numpy.array(10, dtype=x1.dtype), 0))
        assert f(1, 0, numpy.array(10, dtype=x1.dtype), 0) == 20.5
    print('... passed')
Beispiel #3
0
def test_ifelse():
    a = tt.scalar()
    b = generic()
    c = generic()

    notimpl = NotImplementedOp()

    lazys = [True]
    # We need lazy to end up being True for this test.
    if theano.config.vm__lazy in [True, None]:
        lazys = [True, None]

    cloops = [True, False]

    if theano.config.cxx == "":
        cloops = [False]

    for cloop in cloops:
        for lazy in lazys:
            linker = theano.link.vm.VMLinker(use_cloop=cloop, lazy=lazy)
            f = function(
                [a, b, c],
                ifelse(a, notimpl(b), c),
                mode=Mode(linker=linker, optimizer="fast_run"),
            )

            with pytest.raises(NotImplementedOpException):
                f(1, "a", "b")

            assert f(0, "a", "b") == "b"
Beispiel #4
0
def test_ifelse():
    a = T.scalar()
    b = generic()
    c = generic()

    notimpl = NotImplementedOp()
    lazys = [True]
    # We need lazy to end up being True for this test.
    if theano.config.vm.lazy in [True, None]:
        lazys = [True, None]
    cloops = [True, False]
    if theano.config.cxx == "":
        cloops = [False]
    for cloop in cloops:
        for lazy in lazys:
            linker = theano.gof.vm.VM_Linker(use_cloop=cloop, lazy=lazy)
            f = function([a, b, c],
                         ifelse(a, notimpl(b), c),
                         mode=Mode(linker=linker, optimizer='fast_run'))

            try:
                # print "case 1"
                f(1, 'a', 'b')
                assert False
            except NotImplementedOp.E:
                pass
            # print "... passed"

            # print "case 2"
            # print f(0, 'a', 'b')
            assert f(0, 'a', 'b') == 'b'
Beispiel #5
0
def more_complex_test():
    notimpl = NotImplementedOp()
    ifelseifelseif = IfElseIfElseIf()

    x1 = T.scalar("x1")
    x2 = T.scalar("x2")
    c1 = T.scalar("c1")
    c2 = T.scalar("c2")
    t1 = ifelse(c1, x1, notimpl(x2))
    t1.name = "t1"
    t2 = t1 * 10
    t2.name = "t2"
    t3 = ifelse(c2, t2, x1 + t1)
    t3.name = "t3"
    t4 = ifelseifelseif(T.eq(x1, x2), x1, T.eq(x1, 5), x2, c2, t3, t3 + 0.5)
    t4.name = "t4"

    f = function([c1, c2, x1, x2], t4, mode=Mode(linker="vm", optimizer="fast_run"))
    if theano.config.vm.lazy is False:
        try:
            f(1, 0, np.array(10, dtype=x1.dtype), 0)
            assert False
        except NotImplementedOp.E:
            pass
    else:
        print(f(1, 0, np.array(10, dtype=x1.dtype), 0))
        assert f(1, 0, np.array(10, dtype=x1.dtype), 0) == 20.5
    print("... passed")
Beispiel #6
0
def test_ifelse():
    a = T.scalar()
    b = generic()
    c = generic()

    notimpl = NotImplementedOp()

    f = function([a,b,c], ifelse(a, notimpl(b), c),
            mode=Mode(linker='vm', optimizer='fast_run'))

    try:
        print "case 1"
        f( 1, 'a', 'b')
        assert False
    except NotImplementedOp.E:
        pass
    print "... passed"

    print "case 2"
    print f( 0, 'a', 'b')
    assert f( 0, 'a', 'b') == 'b'
    print "... passed"
Beispiel #7
0
def exec_multilayer_conv_nnet_old(conv_mode,
                                  ss,
                                  bsize,
                                  imshp,
                                  kshps,
                                  nkerns,
                                  unroll_batch=0,
                                  unroll_kern=0,
                                  img=T.dmatrix(),
                                  validate=True,
                                  conv_op_py=False,
                                  do_print=True,
                                  repeat=1,
                                  unroll_patch=False,
                                  unroll_patch_size=False,
                                  verbose=0):

    # build actual input images
    imgval = global_rng.rand(bsize, imshp[0], imshp[1], imshp[2])

    a = T.dmatrix()
    kerns = [a for i in nkerns]
    inputs4 = dmatrix4()
    kerns4 = dmatrix4()

    # for each layer
    ntot = 0
    tctot = 0
    tpytot = 0

    for kshp, kern, nkern, n_layer in zip(kshps, kerns, nkerns,
                                          range(len(nkerns))):
        if do_print:
            print '************* layer %i ***************' % n_layer

            print conv_mode, ss, n_layer, kshp, nkern

        # actual values
        w = global_rng.random_sample(N.r_[nkern, imshp[0], kshp])
        w_flip = flip(w, kshp).reshape(w.shape)

        ## manual implementation
        # check first stage
        padimg = imgval
        if conv_mode == 'full':
            padimg_shp = N.array(
                imshp[1:]) + 2 * (N.array(kshp) - N.array([1, 1]))
            padimg = N.zeros(N.r_[bsize, imshp[0], padimg_shp])
            padimg[:, :, kshp[0] - 1:-kshp[0] + 1,
                   kshp[1] - 1:-kshp[1] + 1] = imgval

        outshp = N.hstack(
            (nkern, ConvOp.getOutputShape(imshp[1:], kshp, ss, conv_mode)))

        time1 = time.time()
        outval = N.zeros(N.r_[bsize, outshp])
        if validate:
            # causes an atexit problem
            from scipy.signal.sigtools import _convolve2d
            from scipy.signal.signaltools import _valfrommode, _bvalfromboundary
            val = _valfrommode(conv_mode)
            bval = _bvalfromboundary('fill')
            for b in range(bsize):  # loop over batches
                for n in range(nkern):  # loop over filters
                    for i in range(imshp[0]):  # loop over input feature maps
                        outval[b,n,...] +=  _convolve2d(\
                            imgval[b,i,...], w_flip[n,i,...],1,val, bval, 0)[0::ss[0],0::ss[1]]
            ntot += time.time() - time1

        # ConvOp
        if unroll_patch and not unroll_patch_size:
            conv_op = ConvOp(dx=ss[0],
                             dy=ss[1],
                             output_mode=conv_mode,
                             unroll_patch=unroll_patch,
                             verbose=verbose)(inputs4, kerns4)
        else:
            conv_op = ConvOp(imshp,
                             kshp,
                             nkern,
                             bsize,
                             ss[0],
                             ss[1],
                             conv_mode,
                             unroll_batch=unroll_batch,
                             unroll_kern=unroll_kern,
                             unroll_patch=unroll_patch,
                             verbose=verbose)(inputs4, kerns4)
        l1shp = N.hstack(
            (nkern, ConvOp.getOutputShape(imshp[1:], kshp, ss, conv_mode)))
        propup2 = function([inputs4, kerns4], conv_op)
        propup3 = function([inputs4, kerns4], conv_op, mode=Mode(linker="py"))

        time1 = time.time()
        for i in range(repeat):
            hidval2_ = propup2(imgval, w_flip)
        hidval2 = hidval2_  #[:,:,0::ss[0],0::ss[1]]
        tctot += time.time() - time1

        if conv_op_py:
            time1 = time.time()
            for i in range(repeat):
                hidval3_ = propup3(imgval, w_flip)
            hidval3 = hidval3_  #[:,:,0::ss[0],0::ss[1]]
            tpytot += time.time() - time1
            assert (N.abs(hidval2 - hidval3) < 1e-5).all()
        else:
            tpytot += 0

        if validate:
            temp = N.abs(outval - hidval2)
            assert (temp < 1e-5).all()
        if validate and conv_op_py:
            temp = N.abs(outval - hidval3)
            assert (temp < 1e-5).all()

        imshp = tuple(outshp)
        imgval = outval.reshape(bsize, outshp[0], outshp[1], outshp[2])

    return tctot, tpytot, ntot
Beispiel #8
0
import numpy as np
from theano import function, Mode
from theano import tensor as T
from theano import config

a = T.vector()
b = T.log(a)
c = T.nnet.sigmoid(b)
d = T.sqrt(c)
e = T.concatenate((d, c), axis=0)
f = b * c * d
# This is the first bad line
g = e + f
h = g / c
fn = function([a], h, mode=Mode(optimizer='None'))
fn(np.ones((3, )).astype(a.dtype))
Beispiel #9
0
tag_lp=lp_table[tag_idx]
tf_get_tag_lp1=function([tag_idx], tag_lp)
print tf_get_tag_lp1(0)
tf_get_tag_lp2=function([tag_idx], lp_table[tag_idx])
print tf_get_tag_lp2(0)

"is ifelse lazy evaluated ?"
a = T.scalar('a')

def g(a):
    return T.sum(T.arange(a)) # compare with sum(range(a))

def f(a):
    return T.sum(T.arange(a)) # compare with sum(range(a))

f1=function([a], T.switch(T.gt(1,1), f(a), g(a+1)), mode=Mode(linker='cvm'), on_unused_input='ignore')
g1=function([a], ifelse(T.gt(1,1), f(a), g(a+1)), mode=Mode(linker='cvm'), on_unused_input='ignore')
timeit.timeit('f1(100000)', "from __main__ import f1", number=10000)
timeit.timeit('g1(100000)', "from __main__ import g1", number=10000)
# You must ensure that only theano ops are in the graph. Nothing else that might force an actual compilation before hand and destroy the laziness. 
"Problem with gradient computation in Theano with ifelse"
import theano
from theano import function, tensor
from theano.ifelse import ifelse


i = tensor.iscalar('i')
# Vector of i elements, each equal to i
a = tensor.alloc(i.astype('float64'), i)
m = a.max()
Beispiel #10
0
def jobman(_options, channel=None):

    ################### PARSE INPUT ARGUMENTS #######################
    o = parse_input_arguments(_options,
                              'RNN_theano/rnn_stream001/RNN_stream.ini')
    ####################### DEFINE THE TASK #########################

    mode = Mode(linker='cvm', optimizer='fast_run')
    rng = numpy.random.RandomState(o['seed'])
    train_set = spike_numbers(n_outs=o['n_outs'],
                              T=o['task_T'],
                              inrange=o['task_inrange'],
                              max_val=o['task_max_val'],
                              min_val=o['task_min_val'],
                              batches=o['task_train_batches'],
                              batch_size=o['task_train_batchsize'],
                              noise=o['task_noise'],
                              rng=rng)

    valid_set = spike_numbers(n_outs=o['n_outs'],
                              T=o['task_T'],
                              inrange=o['task_inrange'],
                              max_val=o['task_max_val'],
                              min_val=o['task_min_val'],
                              batches=o['task_valid_batches'],
                              batch_size=o['task_valid_batchsize'],
                              rng=rng)

    test_set = spike_numbers(n_outs=o['n_outs'],
                             T=o['task_T'],
                             inrange=o['task_inrange'],
                             max_val=o['task_max_val'],
                             min_val=o['task_min_val'],
                             batches=o['task_test_batches'],
                             batch_size=o['task_test_batchsize'],
                             rng=rng)
    if o['wout_pinv']:
        wout_set = spike_numbers(n_outs=o['n_outs'],
                                 T=o['task_T'],
                                 inrange=o['task_inrange'],
                                 max_val=o['task_max_val'],
                                 min_val=o['task_min_val'],
                                 batches=o['task_wout_batches'],
                                 batch_size=o['task_wout_batchsize'],
                                 noise=o['task_wout_noise'],
                                 rng=rng)

    ###################### DEFINE THE MODEL #########################

    def recurrent_fn(u_t, h_tm1, W_hh, W_ux, W_hy, b):
        x_t = TT.dot(W_ux, u_t)
        h_t = TT.tanh(TT.dot(W_hh, h_tm1) + x_t + b)
        y_t = TT.dot(W_hy, h_t)
        return h_t, y_t

    u = TT.tensor3('u')
    if o['error_over_all']:
        t = TT.tensor3('t')
    else:
        t = TT.matrix('t')
    h0 = TT.matrix('h0')
    b = shared_shape(
        floatX(
            numpy.random.uniform(size=(o['nhid'], ),
                                 low=-o['Wux_properties']['scale'],
                                 high=o['Wux_properties']['scale'])))

    alpha = TT.scalar('alpha')
    lr = TT.scalar('lr')

    W_hh = init(o['nhid'], o['nhid'], 'W_hh', o['Whh_style'],
                o['Whh_properties'], rng)

    W_ux = init(o['nhid'], train_set.n_ins, 'W_ux', o['Wux_style'],
                o['Wux_properties'], rng)

    W_hy = init(o['n_outs'], o['nhid'], 'W_hy', o['Why_style'],
                o['Why_properties'], rng)
    [h, y
     ], _ = theano.scan(recurrent_fn,
                        sequences=u,
                        outputs_info=[h0, None],
                        non_sequences=[W_hh, W_ux, W_hy,
                                       TT.shape_padright(b)],
                        name='recurrent_fn',
                        mode=mode)

    init_h = h.owner.inputs[0].owner.inputs[2]

    #h = theano.printing.Print('h',attrs=('shape',))(h)
    if o['error_over_all']:
        out_err = TT.mean(TT.mean((y - t)**2, axis=0), axis=1)
        err = out_err.mean()
    else:
        out_err = ((y[-1] - t)**2).mean(axis=1)
        err = out_err.mean()
    # Regularization term
    if o['reg_projection'] == 'h[-1]':
        cost = h[-1].sum()
    elif o['reg_projection'] == 'err':
        cost = err
    elif o['reg_projection'] == 'random':
        trng = TT.shared_randomstreams.RandomStreams(rng.randint(1e6))
        proj = trng.uniform(size=h[-1].shape)
        if o['sum_h2'] > 0:
            proj = TT.join(0, proj[:o['sum_h2']],
                           TT.zeros_like(proj[o['sum_h2']:]))
        cost = TT.sum(proj * h[-1])

    z, gh = TT.grad(cost, [init_h, h])
    z.name = '__z__'
    z = z[:-1] - gh
    if o['sum_h'] > 0:
        z2 = TT.sum(z[:, :o['sum_h']]**2, axis=1)
    else:
        z2 = TT.sum(z**2, axis=1)
    v1 = z2[:-1]
    v2 = z2[1:]
    ## ## v2 = theano.printing.Print('v2')(v2)
    # floatX(1e-14)
    ratios = TT.switch(TT.ge(v2, 1e-12), TT.sqrt(v1 / v2), floatX(1))
    norm_0 = TT.ones_like(ratios[0])
    norm_t, _ = theano.scan(lambda x, y: x * y,
                            sequences=ratios,
                            outputs_info=norm_0,
                            name='jacobian_products',
                            mode=mode)
    norm_term = TT.sum(TT.mean(norm_t, axis=1))
    if o['reg_cost'] == 'product':
        r = TT.mean(abs(TT.log(norm_t)), axis=1).sum()
    elif o['reg_cost'] == 'each':
        r = TT.mean(abs(TT.log(ratios)), axis=1).sum()
    elif o['reg_cost'] == 'product2':
        ratios2 = TT.switch(TT.ge(z2[-1], 1e-12), TT.sqrt(z2 / z2[-1]),
                            floatX(1))
        r = TT.mean(abs(TT.log(ratios2)), axis=1).sum()

    ratios = TT.switch(TT.ge(v2, 1e-12), TT.sqrt(v1 / v2), floatX(1e-12))[::-1]
    norm_0 = TT.ones_like(ratios[0])
    norm_t, _ = theano.scan(lambda x, y: x * y,
                            sequences=ratios,
                            outputs_info=norm_0,
                            name='jacobian_products',
                            mode=mode)
    norm_term = floatX(0.1) + TT.sum(TT.mean(norm_t, axis=1))
    gu = TT.grad(y[-1].sum(), u)

    if o['opt_alg'] == 'sgd':
        get_updates = lambda p, e, up: (sgd(
            p, e, lr=lr, scale=my1 / norm_term, updates=up)[0], [[], [
            ], [TT.constant(0) for x in p]])
    elif o['opt_alg'] == 'sgd_qn':
        get_updates = lambda p, e, up: sgd_qn(p,
                                              e,
                                              mylambda=floatX(o['mylambda']),
                                              t0=floatX(o['t0']),
                                              skip=floatX(o['skip']),
                                              scale=my1 / norm_term,
                                              lazy=o['lazy'],
                                              updates=up)

    if o['win_reg']:
        updates, why_extra = get_updates([W_hy], err, {})
        cost = err + alpha * r
        updates, extras = get_updates([W_ux, W_hh, b], cost, updates)
        b_Why = why_extra[2][0]
        b_Wux = extras[2][0]
        b_Whh = extras[2][1]
        b_b = extras[2][2]
    else:
        updates, extras1 = get_updates([W_hy, W_ux], err, {})
        cost = err + alpha * r
        updates, extras2 = get_updates([W_hh, b], cost, updates)
        b_Why = extras1[2][0]
        b_Wux = extras1[2][1]
        b_Whh = extras2[2][0]
        b_b = extras2[2][1]

    nhid = o['nhid']
    train_batchsize = o['task_train_batchsize']
    valid_batchsize = o['task_valid_batchsize']
    test_batchsize = o['task_test_batchsize']
    wout_batchsize = o['task_wout_batchsize']

    train_h0 = shared_shape(floatX(numpy.zeros((nhid, train_batchsize))))
    valid_h0 = shared_shape(floatX(numpy.zeros((nhid, valid_batchsize))))
    test_h0 = shared_shape(floatX(numpy.zeros((nhid, test_batchsize))))
    wout_h0 = shared_shape(floatX(numpy.zeros((nhid, wout_batchsize))))
    idx = TT.iscalar('idx')
    train_u, train_t = train_set(idx)
    u.tag.shape = copy.copy(train_u.tag.shape)
    t.tag.shape = copy.copy(train_t.tag.shape)
    train = theano.function([u, t, lr, alpha], [out_err, r, norm_term],
                            updates=updates,
                            mode=mode,
                            givens={h0: train_h0})

    valid_u, valid_t = valid_set(idx)
    u.tag.shape = copy.copy(valid_u.tag.shape)
    t.tag.shape = copy.copy(valid_t.tag.shape)
    valid = theano.function([u, t], [out_err, r, norm_term],
                            mode=mode,
                            givens={h0: valid_h0})

    test_u, test_t = test_set(idx)
    u.tag.shape = copy.copy(test_u.tag.shape)
    t.tag.shape = copy.copy(test_t.tag.shape)
    test = theano.function([u, t], [
        out_err, r, norm_term, W_hh, W_ux, W_hy, b, z, y, h, u, gu, t, b_Whh,
        b_Wux, b_Why, b_b
    ],
                           mode=mode,
                           givens={h0: test_h0})
    if o['wout_pinv']:
        wout_u, wout_t = wout_set.get_whole_tensors()

        def wiener_hopf_fn(u_t, t_t, H_tm1, Y_tm1, W_hh, W_ux, b, h0):
            def recurrent_fn(u_t, h_tm1, W_hh, W_ux, b):
                x_t = TT.dot(W_ux, u_t)
                h_t = TT.tanh(TT.dot(W_hh, h_tm1) + x_t + b)
                return h_t

            h_t, _ = theano.scan(recurrent_fn,
                                 sequences=u_t,
                                 outputs_info=h0,
                                 non_sequences=[W_hh, W_ux, b],
                                 name='recurrent_fn',
                                 mode=mode)
            H_t = H_tm1 + TT.dot(h_t[-1], h_t[-1].T)
            Y_t = Y_tm1 + TT.dot(h_t[-1], t_t.T)
            return H_t, Y_t

        H_0 = shared_shape(numpy.zeros((o['nhid'], o['nhid']),
                                       dtype=theano.config.floatX),
                           name='H0')
        Y_0 = shared_shape(numpy.zeros((o['nhid'], o['n_outs']),
                                       dtype=theano.config.floatX),
                           name='Y0')
        all_u = TT.tensor4('whole_u')
        all_t = TT.tensor3('whole_t')
        [H, Y], _ = theano.scan(
            wiener_hopf_fn,
            sequences=[all_u, all_t],
            outputs_info=[H_0, Y_0],
            non_sequences=[W_hh, W_ux, TT.shape_padright(b), h0],
            name='wiener_hopf_fn',
            mode=mode)
        length = TT.cast(all_u.shape[0] * all_u.shape[3],
                         dtype=theano.config.floatX)
        H = H[-1] / length
        Y = Y[-1] / length
        H = H + floatX(o['wiener_lambda']) * TT.eye(o['nhid'])
        W_hy_solve = theano_linalg.solve(H, Y).T
        wout = theano.function([idx], [],
                               mode=mode,
                               updates={W_hy: W_hy_solve},
                               givens={
                                   all_u: wout_u,
                                   all_t: wout_t,
                                   h0: wout_h0
                               })
    '''
    theano.printing.pydotprint(train, 'train.png', high_contrast=True,
                               with_ids= True)
    for idx,node in enumerate(train.maker.env.toposort()):
        if node.op.__class__.__name__ == 'Scan':
            theano.printing.pydotprint(node.op.fn,
                                       ('train%d_'%idx)+node.op.name,
                                       high_contrast = True,
                                       with_ids = True)

    theano.printing.pydotprint(train, 'valid.png', high_contrast=True,
                              with_ids = True)
    for idx,node in enumerate(train.maker.env.toposort()):
        if node.op.__class__.__name__ == 'Scan':
            theano.printing.pydotprint(node.op.fn,
                                       ('valid%d_'%idx)+node.op.name,
                                       high_contrast = True,
                                      with_ids = True)
    theano.printing.pydotprint(train, 'test.png', high_contrast=True,
                              with_ids = True)
    for idx,node in enumerate(train.maker.env.toposort()):
        if node.op.__class__.__name__ == 'Scan':
            theano.printing.pydotprint(node.op.fn,
                                       ('test%d_'%idx)+node.op.name,
                                       high_contrast = True,
                                      with_ids = True)
    if o['wout_pinv']:
        theano.printing.pydotprint(train, 'wout.png', high_contrast=True,
                                  with_ids = True)
        for idx,node in enumerate(train.maker.env.toposort()):
            if node.op.__class__.__name__ == 'Scan':
                theano.printing.pydotprint(node.op.fn,
                                       ('wout%d_'%idx)+node.op.name,
                                       high_contrast = True,
                                          with_ids= True)

    '''
    valid_set.refresh()

    #import GPUscan.ipdb; GPUscan.ipdb.set_trace()
    #rval = valid(valid_set.data_u[0],valid_set.data_t[0])

    #################### DEFINE THE MAIN LOOP #######################

    data = {}
    fix_len = o['max_storage_numpy']  #int(o['NN']/o['small_step'])
    avg_train_err = numpy.zeros((o['small_step'], o['n_outs']))
    avg_train_reg = numpy.zeros((o['small_step'], ))
    avg_train_norm = numpy.zeros((o['small_step'], ))
    avg_valid_err = numpy.zeros((o['small_step'], o['n_outs']))
    avg_valid_reg = numpy.zeros((o['small_step'], ))
    avg_valid_norm = numpy.zeros((o['small_step'], ))

    data['options'] = o
    data['train_err'] = -1 * numpy.ones((fix_len, o['n_outs']))
    data['valid_err'] = -1 * numpy.ones((fix_len, o['n_outs']))
    data['train_reg'] = -1 * numpy.ones((fix_len, ))
    data['valid_reg'] = -1 * numpy.ones((fix_len, ))
    data['train_norm'] = numpy.zeros((fix_len, ))
    data['valid_norm'] = numpy.zeros((fix_len, ))

    data['test_err'] = [None] * o['max_storage']
    data['test_idx'] = [None] * o['max_storage']
    data['test_reg'] = [None] * o['max_storage']
    data['test_norm'] = [None] * o['max_storage']
    data['y'] = [None] * o['max_storage']
    data['z'] = [None] * o['max_storage']
    data['t'] = [None] * o['max_storage']
    data['h'] = [None] * o['max_storage']
    data['u'] = [None] * o['max_storage']
    data['gu'] = [None] * o['max_storage']
    data['W_hh'] = [None] * o['max_storage']
    data['W_ux'] = [None] * o['max_storage']
    data['W_hy'] = [None] * o['max_storage']
    data['b'] = [None] * o['max_storage']
    data['b_ux'] = [None] * o['max_storage']
    data['b_hy'] = [None] * o['max_storage']
    data['b_hh'] = [None] * o['max_storage']
    data['b_b'] = [None] * o['max_storage']
    storage_exceeded = False
    stop = False

    old_rval = numpy.inf
    patience = o['patience']
    n_train = o['task_train_batches']
    n_valid = o['task_valid_batches']
    n_test = o['task_test_batches']
    n_test_runs = 0
    test_pos = 0

    valid_set.refresh()
    test_set.refresh()
    kdx = 0
    lr_v = floatX(o['lr'])
    alpha_v = floatX(o['alpha'])
    lr_f = 1
    if o['lr_scheme']:
        lr_f = o['lr_scheme'][1] / (o['NN'] - o['lr_scheme'][0])
    alpha_r = 1
    if o['alpha_scheme']:
        alpha_r = float(o['alpha_scheme'][1] - o['alpha_scheme'][0])

    st = time.time()
    if channel:
        try:
            channel.save()
        except:
            pass
    for idx in xrange(int(o['NN'])):
        if o['lr_scheme'] and idx > o['lr_scheme'][0]:
            lr_v = floatX(o['lr'] * 1. / (1. +
                                          (idx - o['lr_scheme'][0]) * lr_f))
        if o['alpha_scheme']:
            if idx < o['alpha_scheme'][0]:
                alpha_v = floatX(0)
            elif idx < o['alpha_scheme'][1]:
                pos = 2. * (idx - o['alpha_scheme'][0]) / alpha_r - 1.
                alpha_v = floatX(numpy.exp(-pos**2 / 0.2) * o['alpha'])
            else:
                alpha_v = floatX(0)

        jdx = idx % o['small_step']
        avg_train_err[jdx, :] = 0
        avg_train_reg[jdx] = 0
        avg_train_norm[jdx] = 0

        avg_valid_err[jdx, :] = 0
        avg_valid_reg[jdx] = 0
        avg_valid_norm[jdx] = 0

        if o['wout_pinv'] and (idx % o['test_step'] == 0):
            wout_set.refresh()
            print(
                '* Re-computing W_hy using closed-form '
                'regularized wiener hopf formula')
            st_wout = time.time()
            wout(0)
            ed_wout = time.time()
            print '** It took ', ed_wout - st_wout, 'secs'
            print '** Average weight', abs(W_hy.get_value(borrow=True)).mean()

        print '*Re-generate training set '
        st_gen = time.time()
        train_set.refresh()
        print '**Generation took', time.time() - st_gen, 'secs'
        for k in xrange(o['task_train_batches']):
            rval = train(train_set.data_u[k], train_set.data_t[k], lr_v,
                         alpha_v)
            print '[',idx,'/',patience,'][',k,'/',n_train,'][train]', rval[0].mean(), \
                    rval[1], rval[2], (1./rval[2])*lr_v, alpha_v
            avg_train_err[jdx, :] += rval[0]
            avg_train_reg[jdx] += rval[1]
            avg_train_norm[jdx] += rval[2]
        train_set.clean()
        print '**Epoch took', time.time() - st, 'secs'
        avg_train_err[jdx] /= n_train
        avg_train_reg[jdx] /= n_train
        avg_train_norm[jdx] /= n_train
        st = time.time()

        for k in xrange(n_valid):
            rval = valid(valid_set.data_u[k], valid_set.data_t[k])
            print '[',idx,'/',patience,'][',k,'/',n_valid,'][valid]', rval[0].mean(), \
                    rval[1], rval[2]
            avg_valid_err[jdx] += rval[0]
            avg_valid_reg[jdx] += rval[1]
            avg_valid_norm[jdx] += rval[2]

        avg_valid_err[jdx] /= n_valid
        avg_valid_reg[jdx] /= n_valid
        avg_valid_norm[jdx] /= n_valid
        if idx >= o['small_step'] and idx % o['small_step'] == 0:
            kdx += 1
            if kdx >= o['max_storage_numpy']:
                kdx = o['max_storage_numpy'] // 3
                storage_exceeded = True

            data['steps'] = idx
            data['kdx'] = kdx
            data['storage_exceeded'] = storage_exceeded
            data['train_err'][kdx] = avg_train_err.mean()
            data['valid_err'][kdx] = avg_valid_err.mean()
            data['train_reg'][kdx] = avg_train_reg.mean()
            data['valid_reg'][kdx] = avg_valid_reg.mean()
            data['train_norm'][kdx] = avg_train_norm.mean()
            data['valid_norm'][kdx] = avg_valid_norm.mean()
            if channel:
                try:
                    _options['trainerr'] = data['train_err'][kdx].mean()
                    _options['maxtrainerr'] = data['train_err'][kdx].max()
                    _options['trainreg'] = data['train_reg'][kdx]
                    _options['trainnorm'] = data['train_norm'][kdx]
                    _options['validerr'] = data['valid_err'][kdx].mean()
                    _options['maxvaliderr'] = data['valid_err'][kdx].max()
                    _options['validreg'] = data['valid_reg'][kdx]
                    _options['validnorm'] = data['valid_norm'][kdx]
                    _options['steps'] = idx
                    _options['patience'] = patience
                    channel.save()
                except:
                    pass

                test_err = []
                test_reg = []
                test_norm = []

                for k in xrange(n_test):
                    rval = test(test_set.data_u[k], test_set.data_t[k])
                    print '[',idx,'][',k,'/',n_test,'][test]',rval[0].mean()\
                        , rval[1], rval[2]
                    test_err += [rval[0]]
                    test_reg += [rval[1]]
                    test_norm += [rval[2]]
                    test_z = rval[7][:, :, :10]
                    test_y = rval[8][:, :, :10]
                    test_h = rval[9][:, :, :10]
                    test_u = rval[10][:, :, :10]
                    test_gu = rval[11][:, :, :10]
                    test_t = rval[12][:, :10]
                data['test_idx'][test_pos] = idx
                data['test_pos'] = test_pos
                data['y'][test_pos] = test_y
                data['z'][test_pos] = test_z
                data['t'][test_pos] = test_t
                data['h'][test_pos] = test_h
                data['u'][test_pos] = test_u
                data['gu'][test_pos] = test_gu
                data['test_err'][test_pos] = test_err
                data['test_reg'][test_pos] = test_reg
                data['test_norm'][test_pos] = test_norm
                data['W_hh'][test_pos] = rval[3]
                data['W_ux'][test_pos] = rval[4]
                data['W_hy'][test_pos] = rval[5]
                data['b'][test_pos] = rval[6]
                data['b_hh'][test_pos] = rval[13]
                data['b_ux'][test_pos] = rval[14]
                data['b_hy'][test_pos] = rval[15]
                data['b_b'][test_pos] = rval[16]
            cPickle.dump(
                data,
                open(
                    os.path.join(configs.results_folder(), o['path'],
                                 '%s_backup.pkl' % o['name']), 'wb'))

        print '** ', avg_valid_err[jdx].mean(), ' < ', old_rval, ' ? '
        if avg_valid_err[jdx].mean() < old_rval:

            patience += o['patience_incr']
            if avg_valid_err[jdx].mean() < old_rval * 0.997:

                test_err = []
                test_reg = []
                test_norm = []

                for k in xrange(n_test):
                    rval = test(test_set.data_u[k], test_set.data_t[k])
                    print '[',idx,'][',k,'/',n_test,'][test]',rval[0].mean()\
                        , rval[1], rval[2]
                    test_err += [rval[0]]
                    test_reg += [rval[1]]
                    test_norm += [rval[2]]
                    test_z = rval[7][:, :, :10]
                    test_y = rval[8][:, :, :10]
                    test_h = rval[9][:, :, :10]
                    test_u = rval[10][:, :, :10]
                    test_gu = rval[11][:, :, :10]
                    test_t = rval[12][:, :10]
                data['test_idx'][test_pos] = idx
                data['test_pos'] = test_pos
                data['y'][test_pos] = test_y
                data['z'][test_pos] = test_z
                data['t'][test_pos] = test_t
                data['h'][test_pos] = test_h
                data['u'][test_pos] = test_u
                data['gu'][test_pos] = test_gu
                data['test_err'][test_pos] = test_err
                data['test_reg'][test_pos] = test_reg
                data['test_norm'][test_pos] = test_norm
                data['W_hh'][test_pos] = rval[3]
                data['W_ux'][test_pos] = rval[4]
                data['W_hy'][test_pos] = rval[5]
                data['b'][test_pos] = rval[6]
                data['b_hh'][test_pos] = rval[13]
                data['b_ux'][test_pos] = rval[14]
                data['b_hy'][test_pos] = rval[15]
                data['b_b'][test_pos] = rval[16]

                cPickle.dump(
                    data,
                    open(
                        os.path.join(configs.results_folder(), o['path'],
                                     '%s.pkl' % o['name']), 'wb'))
                n_test_runs += 1
                test_pos += 1
                if test_pos >= o['max_storage']:
                    test_pos = test_pos - o['go_back']
                if numpy.mean(test_err) < 5e-5:
                    patience = idx - 5
                    break

            old_rval = avg_valid_err[jdx].mean()
        if idx > patience:
            break
Beispiel #11
0
def test_WEIRD_STUFF():
    n_vis = 3

    rng = N.random.RandomState(unittest_tools.fetch_seed(7722342))
    x = rng.randn(10, n_vis)
    y = rng.randn(10, n_vis)

    #set y to be like x with a lag of LAG
    LAG = 4
    y[LAG:] = x[:-LAG, 0:n_vis]

    minimizer_fn1 = sgd_minimizer(stepsize=0.001, WEIRD_STUFF=False)
    minimizer_fn2 = sgd_minimizer(stepsize=0.001, WEIRD_STUFF=True)
    rnn_module1 = ExampleRNN(n_vis, minimizer_fn1)
    rnn_module2 = ExampleRNN(n_vis, minimizer_fn2)
    rnn1 = rnn_module1.make(mode='FAST_RUN')
    #    rnn2 = rnn_module1.make(mode='FAST_COMPILE')#work
    #    rnn2 = rnn_module1.make(mode='FAST_RUN')#fail
    rnn2 = rnn_module2.make(mode=Mode('c|py', 'fast_run'))  #fail
    #    rnn2 = rnn_module1.make(mode=Mode('c|py', 'fast_run').excluding("inplace"))#work
    #    rnn2 = rnn_module1.make(mode=Mode('c|py', 'fast_compile'))#work
    #    rnn2 = rnn_module1.make(mode=Mode('py', 'fast_run_stable'))#work
    #    rnn2 = rnn_module1.make(mode=Mode('py', 'merge'))#work
    #    rnn2 = rnn_module1.make(mode=Mode('c|py', 'fast_run').excluding("inplace_opt"))#work
    #    rnn2 = rnn_module1.make(mode=Mode('py', 'fast_run'))#fail
    m = Mode('py', 'fast_run')
    for n in m.optimizer:
        print n.name

    if 0:
        topo1 = rnn1.minimizer.step_cost.maker.env.toposort()
        topo2 = rnn2.minimizer.step_cost.maker.env.toposort()
        for i in range(len(topo1)):
            print '1', i, topo1[i]
            print '2', i, topo2[i]
    if 1:
        topo1 = rnn1.minimizer.step.maker.env.toposort()
        topo2 = rnn2.minimizer.step.maker.env.toposort()
        for i in range(len(topo1)):
            print '1', i, topo1[i]
            print '2', i, topo2[i]
    import theano.printing

    print len(rnn1.minimizer.step.maker.inputs)
    print len(rnn2.minimizer.step.maker.inputs)
    print rnn1.minimizer.step.maker.inputs
    print rnn2.minimizer.step.maker.inputs

    #    for i in range(1,len(rnn1.minimizer.step.maker.inputs)):
    #        print "valid update:",theano.printing.pp(rnn1.minimizer.step.maker.inputs[i].update),
    #        print rnn1.minimizer.step.maker.inputs[i].update.name
    #        print "other update",theano.printing.pp(rnn2.minimizer.step.maker.inputs[i].update),
    #        print rnn2.minimizer.step.maker.inputs[i].update.name
    #    print dir(rnn1.minimizer.step.maker.inputs[5].update)
    #    print dir(rnn2.minimizer.step.maker.inputs[5].update)

    niter = 3
    for i in xrange(niter):
        print rnn1.minimizer.step_cost(x, y)
        print rnn2.minimizer.step_cost(x, y)

        #    assert rnn1.n_vis != rnn2.n_vis or slef.n_hid != rnn2.n_hid or rnn1.n_out != rnn2.n_out
        assert (N.abs(rnn1.z0 - rnn2.z0) < 1e-8).all()
        print(N.abs(rnn1.w - rnn2.w) < 1e-8).all()
        print(N.abs(rnn1.w - rnn2.w))
        print rnn1.w
        print rnn2.w
        assert (N.abs(rnn1.w - rnn2.w) < 1e-8).all()