Beispiel #1
0
def learnAndPredict(Ti, C, TOList):
 
    rng = np.random.RandomState(SEED)
    learning_rate = learning_rate0
    print np.mean(Ti[1000,:])
    aminW = np.amin(Ti[:1000,:])
    amaxW = np.amax(Ti[:1000,:]) 
    Ti[:1000,:] = (Ti[:1000,:] - aminW) / (amaxW - aminW)
    astdW = np.std(Ti[:1000,:])
    ameanW = np.mean(Ti[:1000,:])
    Ti[:1000,:] = (Ti[:1000,:] - ameanW) / astdW
    aminacW = np.amin(Ti[1000,:])
    amaxacW = np.amax(Ti[1000,:])
    print aminW, amaxW, aminacW, amaxacW
    Ti[1000,:] =  (Ti[1000,:] - aminacW) / (amaxacW - aminacW)
    astdacW = np.std(Ti[1000,:])
    ameanacW = np.mean(Ti[1000,:])
    Ti[1000,:] =  (Ti[1000,:] - ameanacW) / astdacW
    
    ile__ = len(TOList)
    ileList = np.zeros(ile__)
    for titer in range(len(TOList)):
        print np.mean(TOList[titer][1000,:])
        TOList[titer][:1000,:] = (TOList[titer][:1000,:] - aminW)/(amaxW - aminW)
        TOList[titer][:1000,:] = (TOList[titer][:1000,:] - ameanW)/astdW
        TOList[titer][1000,:] =  (TOList[titer][1000,:] - aminacW)/(amaxacW - aminacW)
        TOList[titer][1000,:] =  (TOList[titer][1000,:] - ameanacW)/astdacW
        _, ileList[titer] = TOList[titer].shape
        
    _, ile = Ti.shape
    N = NN
  
    data = []; yyy = []; need = 1; BYL = {}; j= 0; dwa = 0; ONES = []; ZEROS = []
    for i in range(NN):
        for j in range(NN):
            if i!= j:
                if C[i][j]==1:
                    ONES.append((i,j))
                else:
                    ZEROS.append((i,j))
    Nones = len(ONES)
    rng.shuffle(ONES)
    Nzeros = len(ZEROS)
    print Nones
    print Nzeros
    Needed = NUM_TRAIN/2
    onesPerPair = Needed / Nones + 1
    onesIter = 0
    jj = 0
    while jj < NUM_TRAIN:
        if jj%300000 == 0:
            print jj/300000,
        need = 1 - need
        if need == 1:
            pairNo = onesIter % Nones
            ppp = onesIter / Nones
            s,t = ONES[pairNo]
            shift = rng.randint(0, ile - L)
            onesIter += 1
        if need == 0:
            zer = rng.randint(Nzeros)
            s,t = ZEROS[zer]
            del ZEROS[zer]
            Nzeros -= 1
            shift = rng.randint(0, ile - L)
        x = np.hstack(( Ti[s][shift:shift+L], Ti[t][shift:shift+L], Ti[1000][shift:shift+L]))
        y = C[s][t]
        data.append(x); yyy.append(y)
        jj+=1

    data = np.array(data, dtype=theano.config.floatX)  
    is_train = np.array(  ([0]*96 + [1,1,2,2]) * (NUM_TRAIN / 100))
    yyy = np.array(yyy)
    
    train_set_x0, train_set_y0 = np.array(data[is_train==0]), yyy[is_train==0]
    test_set_x,   test_set_y = np.array(data[is_train==1]), yyy[is_train==1]
    valid_set_x, valid_set_y = np.array(data[is_train==2]), yyy[is_train==2]
    n_train_batches = len(train_set_y0) / batch_size
    n_valid_batches = len(valid_set_y)  / batch_size
    n_test_batches  = len(test_set_y)  / batch_size  
    epoch = T.scalar() 
    index = T.lscalar() 
    x = T.matrix('x')   
    inone2 = T.matrix('inone2') 
    y = T.ivector('y') 
    print '... building the model'
#-------- my layers -------------------
    
    #---------------------
    layer0_input = x.reshape((batch_size, 1, 3, L))
    Cx = 5
    layer0 = ConvolutionalLayer(rng, input=layer0_input,
            image_shape=(batch_size, 1, 3, L),
            filter_shape=(nkerns[0], 1, 2, Cx), poolsize=(1, 1), fac = 0)
    ONE = (3 - 2 + 1) / 1
    L2 = (L - Cx + 1) / 1
    #---------------------
    Cx2 = 5
    layer1 = ConvolutionalLayer(rng, input=layer0.output,
            image_shape=(batch_size, nkerns[0], ONE, L2),
            filter_shape=(nkerns[1], nkerns[0], 2, Cx2), poolsize=(1, 1), activation=ReLU, fac = 0)
    ONE = (ONE - 2 + 1) /1
    L3 = (L2 - Cx2 + 1) /1
    #---------------------
    Cx3 = 1
    layer1b = ConvolutionalLayer(rng, input=layer1.output,
            image_shape=(batch_size, nkerns[1], ONE, L3),
            filter_shape=(nkerns[2], nkerns[1], 1, Cx3), poolsize=(1, POOL), activation=ReLU, fac = 0)
    ONE = (ONE - 1 + 1) /1
    L4 = (L3 - Cx3 + 1) /POOL
    
    REGx = 100
    #---------------------    
    layer2_input = layer1b.output.flatten(2) 
    print layer2_input.shape
    use_b = False
    layer2 =         HiddenLayer(rng, input=layer2_input, n_in=nkerns[2]*L4 , n_out=REGx, activation=T.tanh,
                                 use_bias = use_b)
    layer3 =  LogisticRegression(input=layer2.output, n_in=REGx, n_out=2)
 
    
    cost = layer3.negative_log_likelihood(y)
    out_x2 = theano.shared(np.asarray(np.zeros((N,L)), dtype=theano.config.floatX))
    inone2 = theano.shared(np.asarray(np.zeros((1,L)), dtype=theano.config.floatX))
    inone3 = theano.shared(np.asarray(np.zeros((1,L)), dtype=theano.config.floatX))
    inone4 = theano.shared(np.asarray(np.zeros((1,L)), dtype=theano.config.floatX))
    test_set_x = theano.shared(np.asarray(test_set_x, dtype=theano.config.floatX))
    train_set_x = theano.shared(np.asarray(train_set_x0, dtype=theano.config.floatX))
    train_set_y = T.cast(theano.shared(np.asarray(train_set_y0, dtype=theano.config.floatX)), 'int32')
    test_set_y = T.cast(theano.shared(np.asarray(test_set_y, dtype=theano.config.floatX)), 'int32')
    valid_set_y =  T.cast(theano.shared(np.asarray(valid_set_y, dtype=theano.config.floatX)), 'int32')
    valid_set_x = theano.shared(np.asarray(valid_set_x, dtype=theano.config.floatX))   
    
    test_model = theano.function([index], layer3.errors(y),
             givens={
                x: test_set_x[index * batch_size: (index + 1) * batch_size],
                y: test_set_y[index * batch_size: (index + 1) * batch_size]})

    validate_model = theano.function([index], layer3.errors(y),
            givens={
                x: valid_set_x[index * batch_size: (index + 1) * batch_size],
                y: valid_set_y[index * batch_size: (index + 1) * batch_size]})

       
    mom_start = 0.5; mom_end = 0.98;  mom_epoch_interval = n_epochs * 1.0
    #### @@@@@@@@@@@
    class_params0  =  [layer3, layer2, layer1, layer1b, layer0]  
    class_params = [ param for layer in class_params0 for param in layer.params ]

    gparams = []
    for param in class_params:
        gparam = T.grad(cost, param)
        gparams.append(gparam)
    gparams_mom = []
    for param in class_params:
        gparam_mom = theano.shared(np.zeros(param.get_value(borrow=True).shape,
            dtype=theano.config.floatX))
        gparams_mom.append(gparam_mom)
    mom = ifelse(epoch < mom_epoch_interval,
            mom_start*(1.0 - epoch/mom_epoch_interval) + mom_end*(epoch/mom_epoch_interval),
            mom_end)
    updates = OrderedDict()
    for gparam_mom, gparam in zip(gparams_mom, gparams):
        updates[gparam_mom] = mom * gparam_mom - (1. - mom) * learning_rate * gparam
    for param, gparam_mom in zip(class_params, gparams_mom):
        stepped_param = param + updates[gparam_mom]
        squared_filter_length_limit = 15.0
        if param.get_value(borrow=True).ndim == 2:
            col_norms = T.sqrt(T.sum(T.sqr(stepped_param), axis=0))
            desired_norms = T.clip(col_norms, 0, T.sqrt(squared_filter_length_limit))
            scale = desired_norms / (1e-7 + col_norms)
            updates[param] = stepped_param * scale
        else:
            updates[param] = stepped_param

    output = cost
    train_model = theano.function(inputs=[epoch, index], outputs=output,
            updates=updates,
            givens={
                x: train_set_x[index * batch_size:(index + 1) * batch_size],
                y: train_set_y[index * batch_size:(index + 1) * batch_size]})
    
    keep = theano.function([index], layer3.errorsFull(y),
            givens={
                x: train_set_x[index * batch_size:(index + 1) * batch_size],
                y: train_set_y[index * batch_size:(index + 1) * batch_size]}, on_unused_input='warn')

    timer = time.clock()
    print "finished reading", (timer - start_time0) /60. , "minutes "
             
    # TRAIN MODEL # 
    print '... training'
    validation_frequency = n_train_batches; best_params = None; best_validation_loss = np.inf
    best_iter = 0; test_score = 0.;  epochc = 0;
    
    while (epochc < n_epochs):
        epochc = epochc + 1            
        learning_rate = learning_rate0 * (1.2 - ((1.0 * epochc)/n_epochs))
        for minibatch_index in xrange(n_train_batches):      
            iter = (epochc - 1) * n_train_batches + minibatch_index
            cost_ij = train_model(epochc, minibatch_index)  
            if (iter + 1) % validation_frequency == 0:
                validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
                this_validation_loss = np.mean(validation_losses)
                print(' %i) err %.2f ' %  (epochc, this_validation_loss/10)), L, nkerns, REGx, "|", Cx, Cx2, Cx3, batch_size
                if this_validation_loss < best_validation_loss or epochc % 30 == 0:
                    best_validation_loss = this_validation_loss
                    best_iter = iter
                    test_losses = [test_model(i) for i in xrange(n_test_batches)]
                    test_score = np.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of best '
                           'model %f %%') % (epochc, minibatch_index + 1, n_train_batches, test_score/10))
    ############        
    timel = time.clock()
    print "finished learning", (timel - timer) /60. , "minutes "
    ppm = theano.function([index], layer3.pred_proba_mine(),
        givens={
            x: T.horizontal_stack(T.tile(inone2, (batch_size ,1)), 
               out_x2[index * batch_size: (index + 1) * batch_size], T.tile(inone3, (batch_size ,1))),
            y: train_set_y[0 * (batch_size): (0 + 1) * (batch_size)]
            }, on_unused_input='warn')

    NONZERO = (N*N-N)
    gc.collect()
    RESList = [np.zeros((N,N)) for it in range(ile__)]
    for __net in range(ile__):
        TO = TOList[__net]
        ileO = ileList[__net]
        RES  = RESList[__net]
        shift = 0.1 
        DELTAshift = (ileO-L) / (Q-1)
        print "DELTAshift:", DELTAshift
        for q in range (Q):
            dataO = [];  print (q+1),"/", Q , "  ",
            out_x2.set_value(np.asarray(np.array(TO[:,shift:shift+L]), dtype=theano.config.floatX)) 
            PARTIAL = np.zeros((N,N))
            inone3.set_value(np.asarray(np.array(TO[1000][shift:shift+L]).reshape(1,L), dtype=theano.config.floatX))
            for i in range(N):
                inone2.set_value(np.asarray(np.array(TO[i][shift:shift+L]).reshape(1,L), dtype=theano.config.floatX))
                p = [ppm(ii) for ii in xrange( N / batch_size)]
                for pos in range(N):
                    if pos != i:
                        PARTIAL[i][pos] += p[pos / batch_size][pos % batch_size][1]
            for i in range(N):
                for j in range(N):
                    RES[i][j] += PARTIAL[i][j]
            shift += DELTAshift
        print "Finished", __net
        RESList[__net] = RES/np.max(RES)            
        gc.collect()
        
    end_time = time.clock()
    print "finished predicting", (end_time - timel) /60. , "minutes ", str(nkerns), "using SEED = ", SEED
    print('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time0) / 60.))
    return RESList
def learnAndPredict(Ti, C, TOList):

    rng = np.random.RandomState(SEED)
    learning_rate = learning_rate0
    print np.mean(Ti[1000, :])
    aminW = np.amin(Ti[:1000, :])
    amaxW = np.amax(Ti[:1000, :])
    Ti[:1000, :] = (Ti[:1000, :] - aminW) / (amaxW - aminW)
    astdW = np.std(Ti[:1000, :])
    ameanW = np.mean(Ti[:1000, :])
    Ti[:1000, :] = (Ti[:1000, :] - ameanW) / astdW
    aminacW = np.amin(Ti[1000, :])
    amaxacW = np.amax(Ti[1000, :])
    print aminW, amaxW, aminacW, amaxacW
    Ti[1000, :] = (Ti[1000, :] - aminacW) / (amaxacW - aminacW)
    astdacW = np.std(Ti[1000, :])
    ameanacW = np.mean(Ti[1000, :])
    Ti[1000, :] = (Ti[1000, :] - ameanacW) / astdacW

    ile__ = len(TOList)
    ileList = np.zeros(ile__)
    for titer in range(len(TOList)):
        print np.mean(TOList[titer][1000, :])
        TOList[titer][:1000, :] = (TOList[titer][:1000, :] - aminW) / (amaxW -
                                                                       aminW)
        TOList[titer][:1000, :] = (TOList[titer][:1000, :] - ameanW) / astdW
        TOList[titer][1000, :] = (TOList[titer][1000, :] -
                                  aminacW) / (amaxacW - aminacW)
        TOList[titer][1000, :] = (TOList[titer][1000, :] - ameanacW) / astdacW
        _, ileList[titer] = TOList[titer].shape

    _, ile = Ti.shape
    N = NN

    data = []
    yyy = []
    need = 1
    BYL = {}
    j = 0
    dwa = 0
    ONES = []
    ZEROS = []
    for i in range(NN):
        for j in range(NN):
            if i != j:
                if C[i][j] == 1:
                    ONES.append((i, j))
                else:
                    ZEROS.append((i, j))
    Nones = len(ONES)
    rng.shuffle(ONES)
    Nzeros = len(ZEROS)
    print Nones
    print Nzeros
    Needed = NUM_TRAIN / 2
    onesPerPair = Needed / Nones + 1
    onesIter = 0
    jj = 0
    while jj < NUM_TRAIN:
        if jj % 300000 == 0:
            print jj / 300000,
        need = 1 - need
        if need == 1:
            pairNo = onesIter % Nones
            ppp = onesIter / Nones
            s, t = ONES[pairNo]
            shift = rng.randint(0, ile - L)
            onesIter += 1
        if need == 0:
            zer = rng.randint(Nzeros)
            s, t = ZEROS[zer]
            del ZEROS[zer]
            Nzeros -= 1
            shift = rng.randint(0, ile - L)
        x = np.hstack((Ti[s][shift:shift + L], Ti[t][shift:shift + L],
                       Ti[1000][shift:shift + L]))
        y = C[s][t]
        data.append(x)
        yyy.append(y)
        jj += 1

    data = np.array(data, dtype=theano.config.floatX)
    is_train = np.array(([0] * 96 + [1, 1, 2, 2]) * (NUM_TRAIN / 100))
    yyy = np.array(yyy)

    train_set_x0, train_set_y0 = np.array(
        data[is_train == 0]), yyy[is_train == 0]
    test_set_x, test_set_y = np.array(data[is_train == 1]), yyy[is_train == 1]
    valid_set_x, valid_set_y = np.array(
        data[is_train == 2]), yyy[is_train == 2]
    n_train_batches = len(train_set_y0) / batch_size
    n_valid_batches = len(valid_set_y) / batch_size
    n_test_batches = len(test_set_y) / batch_size
    epoch = T.scalar()
    index = T.lscalar()
    x = T.matrix('x')
    inone2 = T.matrix('inone2')
    y = T.ivector('y')
    print '... building the model'
    #-------- my layers -------------------

    #---------------------
    layer0_input = x.reshape((batch_size, 1, 3, L))
    Cx = 5
    layer0 = ConvolutionalLayer(rng,
                                input=layer0_input,
                                image_shape=(batch_size, 1, 3, L),
                                filter_shape=(nkerns[0], 1, 2, Cx),
                                poolsize=(1, 1),
                                fac=0)
    ONE = (3 - 2 + 1) / 1
    L2 = (L - Cx + 1) / 1
    #---------------------
    Cx2 = 5
    layer1 = ConvolutionalLayer(rng,
                                input=layer0.output,
                                image_shape=(batch_size, nkerns[0], ONE, L2),
                                filter_shape=(nkerns[1], nkerns[0], 2, Cx2),
                                poolsize=(1, 1),
                                activation=ReLU,
                                fac=0)
    ONE = (ONE - 2 + 1) / 1
    L3 = (L2 - Cx2 + 1) / 1
    #---------------------
    Cx3 = 1
    layer1b = ConvolutionalLayer(rng,
                                 input=layer1.output,
                                 image_shape=(batch_size, nkerns[1], ONE, L3),
                                 filter_shape=(nkerns[2], nkerns[1], 1, Cx3),
                                 poolsize=(1, POOL),
                                 activation=ReLU,
                                 fac=0)
    ONE = (ONE - 1 + 1) / 1
    L4 = (L3 - Cx3 + 1) / POOL

    REGx = 100
    #---------------------
    layer2_input = layer1b.output.flatten(2)
    print layer2_input.shape
    use_b = False
    layer2 = HiddenLayer(rng,
                         input=layer2_input,
                         n_in=nkerns[2] * L4,
                         n_out=REGx,
                         activation=T.tanh,
                         use_bias=use_b)
    layer3 = LogisticRegression(input=layer2.output, n_in=REGx, n_out=2)

    cost = layer3.negative_log_likelihood(y)
    out_x2 = theano.shared(
        np.asarray(np.zeros((N, L)), dtype=theano.config.floatX))
    inone2 = theano.shared(
        np.asarray(np.zeros((1, L)), dtype=theano.config.floatX))
    inone3 = theano.shared(
        np.asarray(np.zeros((1, L)), dtype=theano.config.floatX))
    inone4 = theano.shared(
        np.asarray(np.zeros((1, L)), dtype=theano.config.floatX))
    test_set_x = theano.shared(
        np.asarray(test_set_x, dtype=theano.config.floatX))
    train_set_x = theano.shared(
        np.asarray(train_set_x0, dtype=theano.config.floatX))
    train_set_y = T.cast(
        theano.shared(np.asarray(train_set_y0, dtype=theano.config.floatX)),
        'int32')
    test_set_y = T.cast(
        theano.shared(np.asarray(test_set_y, dtype=theano.config.floatX)),
        'int32')
    valid_set_y = T.cast(
        theano.shared(np.asarray(valid_set_y, dtype=theano.config.floatX)),
        'int32')
    valid_set_x = theano.shared(
        np.asarray(valid_set_x, dtype=theano.config.floatX))

    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    mom_start = 0.5
    mom_end = 0.98
    mom_epoch_interval = n_epochs * 1.0
    #### @@@@@@@@@@@
    class_params0 = [layer3, layer2, layer1, layer1b, layer0]
    class_params = [param for layer in class_params0 for param in layer.params]

    gparams = []
    for param in class_params:
        gparam = T.grad(cost, param)
        gparams.append(gparam)
    gparams_mom = []
    for param in class_params:
        gparam_mom = theano.shared(
            np.zeros(param.get_value(borrow=True).shape,
                     dtype=theano.config.floatX))
        gparams_mom.append(gparam_mom)
    mom = ifelse(
        epoch < mom_epoch_interval,
        mom_start * (1.0 - epoch / mom_epoch_interval) + mom_end *
        (epoch / mom_epoch_interval), mom_end)
    updates = OrderedDict()
    for gparam_mom, gparam in zip(gparams_mom, gparams):
        updates[gparam_mom] = mom * gparam_mom - (1. -
                                                  mom) * learning_rate * gparam
    for param, gparam_mom in zip(class_params, gparams_mom):
        stepped_param = param + updates[gparam_mom]
        squared_filter_length_limit = 15.0
        if param.get_value(borrow=True).ndim == 2:
            col_norms = T.sqrt(T.sum(T.sqr(stepped_param), axis=0))
            desired_norms = T.clip(col_norms, 0,
                                   T.sqrt(squared_filter_length_limit))
            scale = desired_norms / (1e-7 + col_norms)
            updates[param] = stepped_param * scale
        else:
            updates[param] = stepped_param

    output = cost
    train_model = theano.function(
        inputs=[epoch, index],
        outputs=output,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    keep = theano.function(
        [index],
        layer3.errorsFull(y),
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        },
        on_unused_input='warn')

    timer = time.clock()
    print "finished reading", (timer - start_time0) / 60., "minutes "

    # TRAIN MODEL #
    print '... training'
    validation_frequency = n_train_batches
    best_params = None
    best_validation_loss = np.inf
    best_iter = 0
    test_score = 0.
    epochc = 0

    while (epochc < n_epochs):
        epochc = epochc + 1
        learning_rate = learning_rate0 * (1.2 - ((1.0 * epochc) / n_epochs))
        for minibatch_index in xrange(n_train_batches):
            iter = (epochc - 1) * n_train_batches + minibatch_index
            cost_ij = train_model(epochc, minibatch_index)
            if (iter + 1) % validation_frequency == 0:
                validation_losses = [
                    validate_model(i) for i in xrange(n_valid_batches)
                ]
                this_validation_loss = np.mean(validation_losses)
                print(' %i) err %.2f ' % (epochc, this_validation_loss / 10)
                      ), L, nkerns, REGx, "|", Cx, Cx2, Cx3, batch_size
                if this_validation_loss < best_validation_loss or epochc % 30 == 0:
                    best_validation_loss = this_validation_loss
                    best_iter = iter
                    test_losses = [
                        test_model(i) for i in xrange(n_test_batches)
                    ]
                    test_score = np.mean(test_losses)
                    print(
                        ('     epoch %i, minibatch %i/%i, test error of best '
                         'model %f %%') % (epochc, minibatch_index + 1,
                                           n_train_batches, test_score / 10))
    ############
    timel = time.clock()
    print "finished learning", (timel - timer) / 60., "minutes "
    ppm = theano.function(
        [index],
        layer3.pred_proba_mine(),
        givens={
            x:
            T.horizontal_stack(
                T.tile(inone2, (batch_size, 1)),
                out_x2[index * batch_size:(index + 1) * batch_size],
                T.tile(inone3, (batch_size, 1))),
            y:
            train_set_y[0 * (batch_size):(0 + 1) * (batch_size)]
        },
        on_unused_input='warn')

    NONZERO = (N * N - N)
    gc.collect()
    RESList = [np.zeros((N, N)) for it in range(ile__)]
    for __net in range(ile__):
        TO = TOList[__net]
        ileO = ileList[__net]
        RES = RESList[__net]
        shift = 0.1
        DELTAshift = (ileO - L) / (Q - 1)
        print "DELTAshift:", DELTAshift
        for q in range(Q):
            dataO = []
            print(q + 1), "/", Q, "  ",
            out_x2.set_value(
                np.asarray(np.array(TO[:, shift:shift + L]),
                           dtype=theano.config.floatX))
            PARTIAL = np.zeros((N, N))
            inone3.set_value(
                np.asarray(np.array(TO[1000][shift:shift + L]).reshape(1, L),
                           dtype=theano.config.floatX))
            for i in range(N):
                inone2.set_value(
                    np.asarray(np.array(TO[i][shift:shift + L]).reshape(1, L),
                               dtype=theano.config.floatX))
                p = [ppm(ii) for ii in xrange(N / batch_size)]
                for pos in range(N):
                    if pos != i:
                        PARTIAL[i][pos] += p[pos / batch_size][pos %
                                                               batch_size][1]
            for i in range(N):
                for j in range(N):
                    RES[i][j] += PARTIAL[i][j]
            shift += DELTAshift
        print "Finished", __net
        RESList[__net] = RES / np.max(RES)
        gc.collect()

    end_time = time.clock()
    print "finished predicting", (end_time - timel) / 60., "minutes ", str(
        nkerns), "using SEED = ", SEED
    print('The code for file ' + os.path.split(__file__)[1] +
          ' ran for %.2fm' % ((end_time - start_time0) / 60.))
    return RESList
def train_logisticRegression(learning_rate=0.13,
                             n_epochs=1000,
                             dataset="mnist.pkl.gz",
                             batch_size=600):

    ###############################################################
    # Get Data
    ###############################################################

    # Load datasets
    datasets = shared_dataset(dataset)
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # Visualize some data samples
    plot_image(train_set_x.get_value(borrow=True)[10], 28, 28)
    plot_image(valid_set_x.get_value(borrow=True)[15], 28, 28)
    plot_image(test_set_x.get_value(borrow=True)[5], 28, 28)

    # Split sets into batches
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size

    ###############################################################
    # Build model
    ###############################################################

    # Allocate symbolic variables
    index = T.lscalar()
    x = T.matrix('x')
    y = T.ivector('y')

    # Build classifier
    classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10)

    # Define gradient descent
    cost = classifier.negative_log_likelihood(y)
    g_W = T.grad(cost=cost, wrt=classifier.W)
    g_b = T.grad(cost=cost, wrt=classifier.b)
    updates = [(classifier.W, classifier.W - g_W * learning_rate),
               (classifier.b, classifier.b - g_b * learning_rate)]

    # Test function
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errorRate(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # Validation function
    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errorRate(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # Training function
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    ###############################################################
    # Train Model
    ###############################################################

    print("Training the model...")
    patience = 5000  # look at this many batches regardless
    patience_increase = 2  # wait this much longer when a new best is found

    improvement_threshold = 0.995  # a relative improvement of this much is considered significant

    validation_frequency = min(n_train_batches, patience / 2)

    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = timeit.default_timer()

    done_looping = False
    epoch = 0
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for batch_index in range(n_train_batches):
            batch_avg_cost = train_model(batch_index)

            iter = (epoch - 1) * n_train_batches + batch_index
            if (iter + 1) % validation_frequency == 0:
                validation_losses = [
                    validate_model(i) for i in range(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, batch %i/%i, validation error rate %f %%' %
                      (epoch, batch_index + 1, n_train_batches,
                       this_validation_loss * 100))

                if this_validation_loss < best_validation_loss:
                    if this_validation_loss < best_validation_loss * improvement_threshold:
                        patience = max(patience, iter * patience_increase)
                    best_validation_loss = this_validation_loss

                    test_losses = [
                        test_model(i) for i in range(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    print('    epoch %i, batch %i/%i, test error rate %f %%' %
                          (epoch, batch_index + 1, n_train_batches,
                           test_score * 100))

                    with open('best_model.pkl', 'wb') as f:
                        _pickle.dump(classifier, f)

                if (patience <= iter):
                    done_looping = True
                    break

    end_time = timeit.default_timer()

    print(('Optimization completed with best validation loss of %f %%,'
           'with test score of %f %%.') %
          (best_validation_loss * 100., test_score * 100.))

    print('The code ran for %d epochs, withiin %f seconds.' %
          (epoch, end_time - start_time))
Beispiel #4
0
class SdA(object):
    def __init__(
        self,
        numpy_rng,
        n_ins,
        n_outs,
        hidden_layers_sizes,
        corruption_levels=[0.1, 0.1],
        theano_rng=None
    ):
        """ This class is made to support a variable number of layers.
        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights
        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`
        :type n_ins: int
        :param n_ins: dimension of the input to the sdA
        :type n_layers_sizes: list of ints
        :param n_layers_sizes: intermediate layers size, must contain
                               at least one value
        :type n_outs: int
        :param n_outs: dimension of the output of the network
        :type corruption_levels: list of float
        :param corruption_levels: amount of corruption to use for each
                                  layer
        """

        self.sigmoid_layers = []
        self.dA_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)
        self.n_ins=n_ins
        self.n_outs=n_outs
        
        # allocate symbolic variables for the data
        self.x = T.matrix('x')
        self.y = T.ivector('y')

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden units of
            # the layer below or the input size if we are on the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the hidden
            # layer below or the input of the SdA if you are on the first
            # layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(
                rng=numpy_rng,
                input=layer_input,
                n_in=input_size,
                n_out=hidden_layers_sizes[i],
                activation=T.nnet.sigmoid
            )
            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)
            self.params.append(sigmoid_layer.theta)

            # Construct a denoising autoencoder that shared weights with this layer
            dA_layer = dA(
                numpy_rng=numpy_rng,
                theano_rng=theano_rng,
                input=layer_input,
                n_visible=input_size,
                n_hidden=hidden_layers_sizes[i],
                theta=sigmoid_layer.theta
            )
            
            self.dA_layers.append(dA_layer)

        sda_input = T.matrix('sda_input')
        self.da_layers_output_size = hidden_layers_sizes[-1]
        self.get_da_output = theano.function(
            inputs=[sda_input],
            outputs=self.sigmoid_layers[-1].output.reshape((-1, self.da_layers_output_size)),
            givens={
                self.x: sda_input
            }
        )
        
        self.logLayer = LogisticRegression(
            rng = numpy.random.RandomState(),
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs
        )
        #self.params.extend(self.logLayer.params)
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        self.errors = self.logLayer.errors(self.y)
Beispiel #5
0
def evaluate_lenet5(datasets,
                    learning_rate=0.1,
                    n_epochs=10,
                    nkerns=[20, 50],
                    batch_size=2):
    """ Demonstrates lenet on MNIST dataset

    :param datasets:
    :param batch_size:
    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    rng = numpy.random.RandomState(1234)

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (28, 28) is the size of MNIST images.
    layer0_input = x.reshape((batch_size, 1, 1, 47))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
    # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
    layer0 = LeNetConvPoolLayer(rng,
                                input=layer0_input,
                                image_shape=(batch_size, 1, 1, 47),
                                filter_shape=(nkerns[0], 1, 1, 6),
                                poolsize=(1, 2))

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
    # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
    layer1 = LeNetConvPoolLayer(rng,
                                input=layer0.output,
                                image_shape=(batch_size, nkerns[0], 1, 21),
                                filter_shape=(nkerns[1], nkerns[0], 1, 6),
                                poolsize=(1, 2))

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
    # or (500, 50 * 4 * 4) = (500, 800) with the default values.
    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(rng,
                         input=layer2_input,
                         n_in=nkerns[1] * 8,
                         n_out=500,
                         activation=T.tanh)

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [(param_i, param_i - learning_rate * grad_i)
               for param_i, grad_i in zip(params, grads)]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })
    # end-snippet-1

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in range(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print('training @ iter = ', iter)
            cost_ij = train_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in range(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    # improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss * \
                            improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i) for i in range(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print(
        ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' %
         ((end_time - start_time) / 60.)),
        file=sys.stderr)
Beispiel #6
0
def evaluate_lenet5(
    learning_rate = 0.1,
    n_epochs = 200,
    dataset = 'mnist.pkl.gz',
    nkerns = [20, 50],
    batch_size = 500):

    """
    learning_rate (type: float;
                content: learning rate used (factor for the stochastic gradient)

    n_epochs (type: int;
             content: maximal number of epochs to run the optimizer)

    dataset (type: string;
            content: path to the dataset used for training /testing (MNIST here))

    nkerns (type: list of ints;
            content: number of kernels on each layer
    """

    # Initialise random number (used to initialise weights)
    rng = numpy.random.RandomState(23455)

    ## --------------------------------------------------------------------------------------
    ##  Load MNIST data (using load_data() [defined above], and the dataset path)
    ## --------------------------------------------------------------------------------------
    datasets = load_data(dataset)
    train_set_x, train_set_y = datasets[0] # devided into training set...
    valid_set_x, valid_set_y = datasets[1] # validation set
    test_set_x, test_set_y = datasets[2] # and test set

    # Compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size


    #########################################################################################
    #                                    BUILD THE MODEL                                    #
    #########################################################################################
    print('... building the model')

    # Allocate (initialise) symbolic variables and generate symbolic variables for input (x and y represent a minibatch)
    index = T.lscalar()  # index to a [mini]batch (lscalar() returns a zero-dimension value)
    x = T.matrix('x')  # data, presented as rasterized images
    y = T.ivector('y')  # labels, presented as 1D vector of [int] labels
    
    ## --------------------------------------------------------------------------------------
    ##  Define the FIRST layer
    ## --------------------------------------------------------------------------------------
    
    # Reshape matrix of rasterized images of shape (batch_size, 28 * 28) to a 4D tensor,
    # compatible with our LeNetConvPoolLayer. (28, 28) is the size of MNIST images.
    layer0_input = x.reshape((batch_size, 1, 28, 28))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
    # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
    layer0 = LeNetConvPoolLayer(
        rng,
        input = layer0_input,
        image_shape = (batch_size, 1, 28, 28),
        filter_shape = (nkerns[0], 1, 5, 5),
        poolsize = (2, 2)
    )

    ## --------------------------------------------------------------------------------------
    ##  Define the SECOND layer
    ## --------------------------------------------------------------------------------------

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
    # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=(batch_size, nkerns[0], 12, 12),
        filter_shape=(nkerns[1], nkerns[0], 5, 5),
        poolsize=(2, 2)
    )

    ## --------------------------------------------------------------------------------------
    ##  Define the THIRD layer
    ## --------------------------------------------------------------------------------------

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
    # or (500, 50 * 4 * 4) = (500, 800) with the default values.
    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(
        rng,
        input=layer2_input,
        n_in=nkerns[1] * 4 * 4,
        n_out=500,
        activation=T.tanh
    )

    ## --------------------------------------------------------------------------------------
    ##  Define the FOURTH layer
    ## --------------------------------------------------------------------------------------

    # Classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)


    ## --------------------------------------------------------------------------------------
    ##  Define cost and test functions
    ## --------------------------------------------------------------------------------------
    cost = layer3.negative_log_likelihood(y) # Calulate the cost (negative_log_likelihood)

    # Compile a Theano function that computes the mistakes that are made by the model on a minibatch
    # Both for the test model
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )
    # And for the validation model
    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    # Create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # Create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    ##  Specify how to update the parameters of the model
    """ train_model is a function that updates the model parameters by SGD.
    Since this model has many parameters, it would be tedious to manually
    create an update rule for each model parameter. We thus create the
    updates list by automatically looping over all (params[i], grads[i]) pairs.
    """
    updates = [
        (param_i, param_i - learning_rate * grad_i)
        for param_i, grad_i in zip(params, grads)
    ]

    # Compile a Theano function `train_model` that returns the cost, but at the same time updates
    # the parameter of the model based on the rules defined in `updates`.
    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    #########################################################################################
    #                                       TRAIN MODEL                                     #
    #########################################################################################
    print('... training the model')

    ## --------------------------------------------------------------------------------------
    ##  Define early-stopping parameters
    ## --------------------------------------------------------------------------------------
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is found
    improvement_threshold = 0.995  # a relative improvement of this much is considered significant
    validation_frequency = min(n_train_batches, patience // 2)
                                  # go through this many minibatches before checking the network
                                  # on the validation set; in this case we check every epoch
    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    ## --------------------------------------------------------------------------------------
    ##  Start iterating loop (i.e. through multibatches for repeated SGD)
    ## --------------------------------------------------------------------------------------
    epoch = 0
    done_looping = False
    # Loop through epochs
    while (epoch < n_epochs) and (not done_looping): # n_epochs defined in definition of this large function
        epoch = epoch + 1 # Increment epoch on each loop

        # Loop through minibatches
        for minibatch_index in range(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index # iteration number

            ## On every 100 iterations...
            if iter % 100 == 0:
                print('training @ iter = ', iter)
                cost_ij = train_model(minibatch_index)

            # When the iteration is fully divisible by the validation frequency
            if (iter + 1) % validation_frequency == 0:

                # Check for performance (zero-one loss) on validation data set
                validation_losses = [
                    validate_model(i)
                    for i in range(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)

                # Print current validation test results
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (
                          epoch,
                          minibatch_index + 1,
                          n_train_batches,
                          this_validation_loss * 100.
                      )
                )

                # If we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    # ...and if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # Save the best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # Test it on the test set
                    test_losses = [
                        test_model(i)
                        for i in range(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)

                    # Print test results
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

                    ## -----------------------------------------------------------------
                    ##  Save model parameters using cPickle
                    ## -----------------------------------------------------------------
                    fname = 'bestCNNModel.pkl'
                    saveFile = open(fname, 'wb')

                    # model weights
                    cPickle.dump(layer0.W, saveFile)
                    cPickle.dump(layer0.b, saveFile)
                    cPickle.dump(layer1.W, saveFile)
                    cPickle.dump(layer1.b, saveFile)
                    cPickle.dump(layer2.W, saveFile)
                    cPickle.dump(layer2.b, saveFile)

                    """
                    # hyperparameters and performance
                    cPickle.dump(learning_rate, saveFile)
                    cPickle.dump(best_validation_loss, saveFile)
                    cPickle.dump(test_score, saveFile)
                    cPickle.dump(test_losses, saveFile)
                    cPickle.dump(nkerns, saveFile)
                    cPickle.dump(n_epochs, saveFile)
                    cPickle.dump(batch_size, saveFile)
                    """
                    saveFile.close()

            # Else, if patience is expired
            if patience <= iter:
                done_looping = True # Break the loop
                break

    # Now that the loop has ended...
    end_time = timeit.default_timer() # note the time of loop ending

    # Print the ending results
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print(('The code for file ' +
           os.path.split(__file__)[1] +
           ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
Beispiel #7
0
class SdA(object):
    def __init__(self,
                 numpy_rng,
                 n_ins,
                 n_outs,
                 hidden_layers_sizes,
                 corruption_levels=[0.1, 0.1],
                 theano_rng=None):
        """ This class is made to support a variable number of layers.
        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights
        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`
        :type n_ins: int
        :param n_ins: dimension of the input to the sdA
        :type n_layers_sizes: list of ints
        :param n_layers_sizes: intermediate layers size, must contain
                               at least one value
        :type n_outs: int
        :param n_outs: dimension of the output of the network
        :type corruption_levels: list of float
        :param corruption_levels: amount of corruption to use for each
                                  layer
        """

        self.sigmoid_layers = []
        self.dA_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)
        self.n_ins = n_ins
        self.n_outs = n_outs

        # allocate symbolic variables for the data
        self.x = T.matrix('x')
        self.y = T.ivector('y')

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2**30))

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden units of
            # the layer below or the input size if we are on the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the hidden
            # layer below or the input of the SdA if you are on the first
            # layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)
            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)
            self.params.append(sigmoid_layer.theta)

            # Construct a denoising autoencoder that shared weights with this layer
            dA_layer = dA(numpy_rng=numpy_rng,
                          theano_rng=theano_rng,
                          input=layer_input,
                          n_visible=input_size,
                          n_hidden=hidden_layers_sizes[i],
                          theta=sigmoid_layer.theta)

            self.dA_layers.append(dA_layer)

        sda_input = T.matrix('sda_input')
        self.da_layers_output_size = hidden_layers_sizes[-1]
        self.get_da_output = theano.function(
            inputs=[sda_input],
            outputs=self.sigmoid_layers[-1].output.reshape(
                (-1, self.da_layers_output_size)),
            givens={self.x: sda_input})

        self.logLayer = LogisticRegression(
            rng=numpy.random.RandomState(),
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs)
        #self.params.extend(self.logLayer.params)
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        self.errors = self.logLayer.errors(self.y)