Ejemplo n.º 1
0
def train(learning_rate = 0.1, n_epochs = 300,
          nkerns = [20,50],batch_size = 1000):
    rng = np.random.RandomState()
    ##第一步,准备数据
    from csvread import read_csv
    train_x , train_y, test_x = read_csv()
    train_set_x = theano.shared(np.asarray(train_x[0:len(train_x)*4/5,:],dtype = theano.config.floatX),borrow = True)
    train_set_y = T.cast(theano.shared(np.asarray(train_y[0:len(train_y)*4/5],dtype=theano.config.floatX),borrow = True),'int32')
    valid_set_x = theano.shared(np.asarray(train_x[len(train_x)*4/5:len(train_x)*5/6,:],dtype = theano.config.floatX),borrow = True)
    valid_set_y = T.cast(theano.shared(np.asarray(train_y[len(train_y)*4/5:len(train_x)*5/6],dtype=theano.config.floatX),borrow = True),'int32')
    test_set_x = theano.shared(np.asarray(train_x[len(train_x)*5/6:len(train_x),:],dtype = theano.config.floatX),borrow = True)
    test_set_y = T.cast(theano.shared(np.asarray(train_y[len(train_x)*5/6:len(train_y)],dtype=theano.config.floatX),borrow = True),'int32')

    n_train_batches = train_set_x.get_value(borrow= True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow = True).shape[0]
    n_test_batches = test_set_x.get_value(borrow = True).shape[0]

    n_train_batches /=batch_size
    n_valid_batches /=batch_size
    n_test_batches /=batch_size

    index = T.lscalar()
    x = T.matrix('x')
    y = T.ivector('y')

    ##构建三个模型

    layer0_input =  x.reshape((batch_size,1,28,28))

    digit_Net = CNN(
        rng=rng,
        input = layer0_input,
        nkerns = nkerns,
        batch_size = batch_size
    )

    cost = digit_Net.layer4.negative_log_likelihood(y)

    test_model = theano.function(
        [index],
        digit_Net.layer4.errors(y),
        givens={
            x:test_set_x[index*batch_size:(index+1)*batch_size],
            y:test_set_y[index*batch_size:(index+1)*batch_size]
        }
    )

    valid_model = theano.function(
        [index],
        digit_Net.layer4.errors(y),
        givens={
            x:valid_set_x[index*batch_size:(index+1)*batch_size],
            y:valid_set_y[index*batch_size:(index+1)*batch_size]
        }
    )

    grads = T.grad(cost,digit_Net.params)

    updates = [
        (param_i , param_i - learning_rate * grad_i)
        for param_i,grad_i in zip(digit_Net.params,grads)
    ]

    train_model = theano.function(
        [index],
        cost,
        updates = updates,
        givens={
            x:train_set_x[index * batch_size:(index+1)*batch_size],
            y:train_set_y[index*batch_size:(index+1) * batch_size]
        }
    )

    ##开始计算,训练过程

    print '---training'

    patience = 10000
    patience_increase =2

    improvement_threshold = 0.995

    validation_frequency = min(n_train_batches,patience/2)

    best_validation_loss = numpy.inf
    best_iter =0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while(epoch<n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):
            iter = (epoch -1 ) * n_train_batches + minibatch_index

            if iter % 100 ==1:
                print  'training @ iter = ',iter
            cost_ij = train_model(minibatch_index)
            if(iter + 1 ) % validation_frequency == 0:
                validation_losses = [valid_model(i)for i in xrange(n_valid_batches)]
                this_validation_loss = np.mean(validation_losses)

                print ('epoch %i , minibatch %i/%i,validation error %f %%'%
                       (
                           epoch,minibatch_index+1,n_train_batches,this_validation_loss*100.
                       ))
                if this_validation_loss < best_validation_loss:
                    if this_validation_loss < best_validation_loss*improvement_threshold:
                        patience = max(patience,iter*patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    test_losses = [
                        test_model(i)
                        for i in xrange(n_test_batches)
                    ]

                    test_score = np.mean(test_losses)
                    print (' epoch %i , minibatch %i/%i, test error of ,'
                            'with the best performance %f %%'%
                           (
                               epoch,best_validation_loss * 100.,best_iter + 1,test_score * 100.
                           )
                            )

            if patience <= iter :
                done_looping = True
                break

    end_time = timeit.default_timer()
    print ('Optimization complete')
    print ('best validation score of %f %% obtained at iteration %i,'
           'with test performance %f %%'%
           (best_validation_loss * 100., best_iter + 1, test_score * 100.))

    print >> sys.stderr, ('the code for file ' +
                         os.path.split(__file__)[1] +
                         'ran for %.2fm' %((end_time - start_time)/60.))

    f = open('params','wb')
    cPickle.dump(digit_Net.__getstate__(),f,protocol=cPickle.HIGHEST_PROTOCOL)
    f.close()
Ejemplo n.º 2
0
def predict():
    index = T.lscalar()
    x = T.matrix('x')
    y = T.ivector('y')
    batch_size = 1000
    rng = np.random.RandomState()
    digit_Net = CNN(
        rng=rng,
        input = x,
        nkerns = [20,50],
        batch_size = batch_size
    )
    print 'predict ====='
    f = open('params','rb')
    #print cPickle.load(f)
    digit_Net.__setstate__(cPickle.load(f))
    f.close()

    RET = []
    from csvread import read_csv
    train_x , train_y, test_x = read_csv()
    print 'test_X:',len(test_x),test_x.shape

    test_data = theano.shared(np.asarray(test_x,dtype = theano.config.floatX),borrow = True)
    n_test_batches = test_data.get_value(borrow=True).shape[0]
    n_test_batches /= batch_size

    predict_model = theano.function([index],outputs=digit_Net.layer4.predict(),
                                    givens={
                                        x:test_data[index*batch_size:(index+1)*batch_size]
                                    })

    for it in xrange(n_test_batches):
        p = predict_model(it)
        p = np.argmax(p,axis=1)
        p = p.astype(int)
        for k in range(len(p)):
            RET.append(p[k])

    #for it in range(len(test_x)/2):
    #    test_data = test_x[it:it+1]
    #    N = len(test_data)
    #    print 'N:',N
    #    test_data = theano.shared(np.asarray(test_data,dtype=theano.config.floatX))
    #
    #
    #    test_labels = T.cast(theano.shared(np.asarray(np.zeros(batch_size),dtype=theano.config.floatX)),'int32')
    #
    #
    #    ppm = theano.function([index],outputs=digit_Net.layer4.predict(),
    #                          givens={
    #                              x:test_data[index:index+1],
    #                              y:test_labels
    #                          },
    #                          on_unused_input = 'warn')
    #
    #    p = [ppm(0)]
    #
    #    p = np.argmax(p,axis = 1)
    #
    #    p = p.astype(int)
    #    RET.append(p)


    print RET
    subm = np.empty((len(RET),2))
    subm[:,0] = np.arange(1,len(RET)+1)
    subm[:,1] = RET[:]

    np.savetxt('submission.csv',subm,fmt = '%d',delimiter=',',header = 'ImageId,Label',comments='')