def jobman_entrypoint(state, channel):
    # record mercurial versions of each package
    pylearn.version.record_versions(state,[theano,ift6266,pylearn])
    # TODO: remove this, bad for number of simultaneous requests on DB
    channel.save()

    # For test runs, we don't want to use the whole dataset so
    # reduce it to fewer elements if asked to.
    rtt = None
    if state.has_key('reduce_train_to'):
        rtt = state['reduce_train_to']
    elif REDUCE_TRAIN_TO:
        rtt = REDUCE_TRAIN_TO
        
    if state.has_key('decrease_lr'):
        decrease_lr = state['decrease_lr']
    else :
        decrease_lr = 0
        
    if state.has_key('decrease_lr_pretrain'):
        dec=state['decrease_lr_pretrain']
    else :
        dec=0
 
    n_ins = 32*32

    if state.has_key('subdataset'):
        subdataset_name=state['subdataset']
    else:
        subdataset_name=SUBDATASET_NIST

    #n_outs = 62 # 10 digits, 26*2 (lower, capitals)
    if subdataset_name == "upper":
	n_outs = 26
	subdataset = datasets.nist_upper()
	examples_per_epoch = NIST_UPPER_TRAIN_SIZE
    elif subdataset_name == "lower":
	n_outs = 26
	subdataset = datasets.nist_lower()
	examples_per_epoch = NIST_LOWER_TRAIN_SIZE
    elif subdataset_name == "digits":
	n_outs = 10
	subdataset = datasets.nist_digits()
	examples_per_epoch = NIST_DIGITS_TRAIN_SIZE
    else:
	n_outs = 62
	subdataset = datasets.nist_all()
	examples_per_epoch = NIST_ALL_TRAIN_SIZE
    
    print 'Using subdataset ', subdataset_name

    #To be sure variables will not be only in the if statement
    PATH = ''
    nom_reptrain = ''
    nom_serie = ""
    if state['pretrain_choice'] == 0:
        nom_serie="series_NIST.h5"
    elif state['pretrain_choice'] == 1:
        nom_serie="series_P07.h5"

    series = create_series(state.num_hidden_layers,nom_serie)


    print "Creating optimizer with state, ", state

    optimizer = SdaSgdOptimizer(dataset_name=subdataset_name,\
				    dataset=subdataset,\
                                    hyperparameters=state, \
                                    n_ins=n_ins, n_outs=n_outs,\
                                    examples_per_epoch=examples_per_epoch, \
                                    series=series,
                                    max_minibatches=rtt)

    parameters=[]
    #Number of files of P07 used for pretraining
    nb_file=0

    print('\n\tpretraining with NIST\n')

    optimizer.pretrain(subdataset, decrease = dec) 

    channel.save()
    
    #Set some of the parameters used for the finetuning
    if state.has_key('finetune_set'):
        finetune_choice=state['finetune_set']
    else:
        finetune_choice=FINETUNE_SET
    
    if state.has_key('max_finetuning_epochs'):
        max_finetune_epoch_NIST=state['max_finetuning_epochs']
    else:
        max_finetune_epoch_NIST=MAX_FINETUNING_EPOCHS
    
    if state.has_key('max_finetuning_epochs_P07'):
        max_finetune_epoch_P07=state['max_finetuning_epochs_P07']
    else:
        max_finetune_epoch_P07=max_finetune_epoch_NIST
    
    #Decide how the finetune is done
    
    if finetune_choice == 0:
        print('\n\n\tfinetune with NIST\n\n')
        optimizer.reload_parameters('params_pretrain.txt')
        optimizer.finetune(subdataset,subdataset,max_finetune_epoch_NIST,ind_test=1,decrease=decrease_lr)
        channel.save()
    if finetune_choice == 1:
        print('\n\n\tfinetune with P07\n\n')
        optimizer.reload_parameters('params_pretrain.txt')
        optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0,decrease=decrease_lr)
        channel.save()
    if finetune_choice == 2:
        print('\n\n\tfinetune with P07 followed by NIST\n\n')
        optimizer.reload_parameters('params_pretrain.txt')
        optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=20,decrease=decrease_lr)
        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=21,decrease=decrease_lr)
        channel.save()
    if finetune_choice == 3:
        print('\n\n\tfinetune with NIST only on the logistic regression on top (but validation on P07).\n\
        All hidden units output are input of the logistic regression\n\n')
        optimizer.reload_parameters('params_pretrain.txt')
        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,special=1,decrease=decrease_lr)
        
        
    if finetune_choice==-1:
        print('\nSERIE OF 4 DIFFERENT FINETUNINGS')
        print('\n\n\tfinetune with NIST\n\n')
        sys.stdout.flush()
        optimizer.reload_parameters('params_pretrain.txt')
        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,decrease=decrease_lr)
        channel.save()
        print('\n\n\tfinetune with P07\n\n')
        sys.stdout.flush()
        optimizer.reload_parameters('params_pretrain.txt')
        optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0,decrease=decrease_lr)
        channel.save()
        print('\n\n\tfinetune with P07 (done earlier) followed by NIST (written here)\n\n')
        sys.stdout.flush()
        optimizer.reload_parameters('params_finetune_P07.txt')
        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=21,decrease=decrease_lr)
        channel.save()
        print('\n\n\tfinetune with NIST only on the logistic regression on top.\n\
        All hidden units output are input of the logistic regression\n\n')
        sys.stdout.flush()
        optimizer.reload_parameters('params_pretrain.txt')
        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,special=1,decrease=decrease_lr)
        channel.save()
    
    channel.save()

    return channel.COMPLETE
 args = sys.argv[1:]
 
 if len(args) > 0 and args[0] == 'sigmoid':
     type = 0
 elif len(args) > 0 and args[0] == 'tanh':
     type = 1
 
 part = 2    #0=train, 1=valid, 2=test
 
 PATH = ''   #Can be changed too if model is not in the current drectory
 
 if os.path.exists(PATH+'params_finetune_NIST.txt'):
     start_time = time.clock()  
     print ('\n finetune = NIST ')
     print "NIST DIGITS"
     test_data(PATH+'params_finetune_NIST.txt',datasets.nist_digits(),part=part,type=type)
     print "NIST LOWER CASE"
     test_data(PATH+'params_finetune_NIST.txt',datasets.nist_lower(),part=part,type=type)
     print "NIST UPPER CASE"
     test_data(PATH+'params_finetune_NIST.txt',datasets.nist_upper(),part=part,type=type)
     end_time = time.clock()
     print ('It took %f minutes' %((end_time-start_time)/60.))
     
 
 if os.path.exists(PATH+'params_finetune_P07.txt'):
     start_time = time.clock()  
     print ('\n finetune = P07 ')
     print "NIST DIGITS"
     test_data(PATH+'params_finetune_P07.txt',datasets.nist_digits(),part=part,type=type)
     print "NIST LOWER CASE"
     test_data(PATH+'params_finetune_P07.txt',datasets.nist_lower(),part=part,type=type)
def log_reg( learning_rate = 0.13, nb_max_examples =1000000, batch_size = 50, \
                    dataset=datasets.nist_digits(), image_size = 32 * 32, nb_class = 10,  \
                    patience = 5000, patience_increase = 2, improvement_threshold = 0.995):
    
    #28 * 28 = 784
    """
    Demonstrate stochastic gradient descent optimization of a log-linear 
    model

    This is demonstrated on MNIST.
    
    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic 
                          gradient)

    :type nb_max_examples: int
    :param nb_max_examples: maximal number of epochs to run the optimizer 
    
    :type batch_size: int  
    :param batch_size:  size of the minibatch

    :type dataset: dataset
    :param dataset: a dataset instance from ift6266.datasets
                        
    :type image_size: int
    :param image_size: size of the input image in pixels (width * height)
    
    :type nb_class: int
    :param nb_class: number of classes
    
    :type patience: int
    :param patience: look as this many examples regardless
    
    :type patience_increase: int
    :param patience_increase: wait this much longer when a new best is found
    
    :type improvement_threshold: float
    :param improvement_threshold: a relative improvement of this much is considered significant


    """
    #--------------------------------------------------------------------------------------------------------------------
    # Build actual model
    #--------------------------------------------------------------------------------------------------------------------
    
    print '... building the model'

    # allocate symbolic variables for the data
    index = T.lscalar( )    # index to a [mini]batch 
    x        = T.matrix('x')  # the data is presented as rasterized images
    y        = T.ivector('y') # the labels are presented as 1D vector of 
                           # [int] labels

    # construct the logistic regression class
    
    classifier = LogisticRegression( input = x, n_in = image_size, n_out = nb_class )

    # the cost we minimize during training is the negative log likelihood of 
    # the model in symbolic format
    cost = classifier.negative_log_likelihood( y ) 

    # compiling a Theano function that computes the mistakes that are made by 
    # the model on a minibatch
    test_model = theano.function( inputs = [ x, y ], 
            outputs = classifier.errors( y ))

    validate_model = theano.function( inputs = [ x, y ], 
            outputs = classifier.errors( y ))

    # compute the gradient of cost with respect to theta = ( W, b ) 
    g_W = T.grad( cost = cost, wrt = classifier.W )
    g_b  = T.grad( cost = cost, wrt = classifier.b )

    # specify how to update the parameters of the model as a dictionary
    updates = { classifier.W: classifier.W - learning_rate * g_W,\
                         classifier.b: classifier.b  - learning_rate * g_b}

    # compiling a Theano function `train_model` that returns the cost, but in 
    # the same time updates the parameter of the model based on the rules 
    # defined in `updates`
    train_model = theano.function( inputs = [ x, y ], 
            outputs = cost, 
            updates = updates)

    #--------------------------------------------------------------------------------------------------------------------
    # Train model
    #--------------------------------------------------------------------------------------------------------------------
   
    print '... training the model'
    # early-stopping parameters
    patience              = 5000  # look as this many examples regardless
    patience_increase     = 2     # wait this much longer when a new best is 
                                  # found
    improvement_threshold = 0.995 # a relative improvement of this much is 
                                  # considered significant
    validation_frequency  = patience * 0.5
                                  # go through this many 
                                  # minibatche before checking the network 
                                  # on the validation set; in this case we 
                                  # check every epoch 

    best_params          = None
    best_validation_loss = float('inf')
    test_score           = 0.
    start_time           = time.clock()

    done_looping = False 
    n_iters      = nb_max_examples / batch_size
    epoch        = 0
    iter        = 0
    
    while ( iter < n_iters ) and ( not done_looping ):
        
      epoch = epoch + 1
      for x, y in dataset.train(batch_size):

        minibatch_avg_cost = train_model( x, y )
        # iteration number
        iter += 1

        if iter % validation_frequency == 0: 
            # compute zero-one loss on validation set 
            validation_losses     = [ validate_model( xv, yv ) for xv, yv in dataset.valid(batch_size) ]
            this_validation_loss = numpy.mean( validation_losses )

            print('epoch %i, iter %i, validation error %f %%' % \
                 ( epoch, iter, this_validation_loss*100. ) )


            # if we got the best validation score until now
            if this_validation_loss < best_validation_loss:
                #improve patience if loss improvement is good enough
                if this_validation_loss < best_validation_loss *  \
                       improvement_threshold :
                    patience = max( patience, iter * patience_increase )

                best_validation_loss = this_validation_loss
                # test it on the test set

                test_losses = [test_model(xt, yt) for xt, yt in dataset.test(batch_size)]
                test_score  = numpy.mean(test_losses)

                print(('     epoch %i, iter %i, test error of best ' 
                       'model %f %%') % \
                  (epoch, iter, test_score*100.))

        if patience <= iter :
                done_looping = True
                break

    end_time = time.clock()
    print(('Optimization complete with best validation score of %f %%,'
           'with test performance %f %%') %  
                 ( best_validation_loss * 100., test_score * 100.))
    print ('The code ran for %f minutes' % ((end_time-start_time) / 60.))
    
    return best_validation_loss, test_score, iter*batch_size, (end_time-start_time) / 60.
def jobman_entrypoint(state, channel):
    # record mercurial versions of each package
    pylearn.version.record_versions(state,[theano,ift6266,pylearn])
    # TODO: remove this, bad for number of simultaneous requests on DB
    channel.save()

    # For test runs, we don't want to use the whole dataset so
    # reduce it to fewer elements if asked to.
    rtt = None
    if state.has_key('reduce_train_to'):
        rtt = state['reduce_train_to']
    elif REDUCE_TRAIN_TO:
        rtt = REDUCE_TRAIN_TO
 
    n_ins = 32*32
    n_outs = 62 # 10 digits, 26*2 (lower, capitals)
     
    examples_per_epoch = NIST_ALL_TRAIN_SIZE

    PATH = ''
    NIST_BY_CLASS=0



    print "Creating optimizer with state, ", state

    optimizer = SdaSgdOptimizer(dataset=datasets.nist_all(), 
                                    hyperparameters=state, \
                                    n_ins=n_ins, n_outs=n_outs,\
                                    examples_per_epoch=examples_per_epoch, \
                                    max_minibatches=rtt)	


    
    

    if os.path.exists(PATH+'params_finetune_NIST.txt'):
        print ('\n finetune = NIST ')
        optimizer.reload_parameters(PATH+'params_finetune_NIST.txt')
        if NIST_BY_CLASS == 1:
            print "NIST DIGITS"
            optimizer.training_error(datasets.nist_digits(),part=2)
            print "NIST LOWER CASE"
            optimizer.training_error(datasets.nist_lower(),part=2)
            print "NIST UPPER CASE"
            optimizer.training_error(datasets.nist_upper(),part=2)
        else:
            print "P07 valid"
            optimizer.training_error(datasets.nist_P07(),part=1)
            print "PNIST valid"
            optimizer.training_error(datasets.PNIST07(),part=1)
        
    
    if os.path.exists(PATH+'params_finetune_P07.txt'):
        print ('\n finetune = P07 ')
        optimizer.reload_parameters(PATH+'params_finetune_P07.txt')
        if NIST_BY_CLASS == 1:
            print "NIST DIGITS"
            optimizer.training_error(datasets.nist_digits(),part=2)
            print "NIST LOWER CASE"
            optimizer.training_error(datasets.nist_lower(),part=2)
            print "NIST UPPER CASE"
            optimizer.training_error(datasets.nist_upper(),part=2)
        else:
            print "P07 valid"
            optimizer.training_error(datasets.nist_P07(),part=1)
            print "PNIST valid"
            optimizer.training_error(datasets.PNIST07(),part=1)

    
    if os.path.exists(PATH+'params_finetune_NIST_then_P07.txt'):
        print ('\n finetune = NIST then P07')
        optimizer.reload_parameters(PATH+'params_finetune_NIST_then_P07.txt')
        if NIST_BY_CLASS == 1:
            print "NIST DIGITS"
            optimizer.training_error(datasets.nist_digits(),part=2)
            print "NIST LOWER CASE"
            optimizer.training_error(datasets.nist_lower(),part=2)
            print "NIST UPPER CASE"
            optimizer.training_error(datasets.nist_upper(),part=2)
        else:
            print "P07 valid"
            optimizer.training_error(datasets.nist_P07(),part=1)
            print "PNIST valid"
            optimizer.training_error(datasets.PNIST07(),part=1)
    
    if os.path.exists(PATH+'params_finetune_P07_then_NIST.txt'):
        print ('\n finetune = P07 then NIST')
        optimizer.reload_parameters(PATH+'params_finetune_P07_then_NIST.txt')
        if NIST_BY_CLASS == 1:
            print "NIST DIGITS"
            optimizer.training_error(datasets.nist_digits(),part=2)
            print "NIST LOWER CASE"
            optimizer.training_error(datasets.nist_lower(),part=2)
            print "NIST UPPER CASE"
            optimizer.training_error(datasets.nist_upper(),part=2)
        else:
            print "P07 valid"
            optimizer.training_error(datasets.nist_P07(),part=1)
            print "PNIST valid"
            optimizer.training_error(datasets.PNIST07(),part=1)
    
    if os.path.exists(PATH+'params_finetune_PNIST07.txt'):
        print ('\n finetune = PNIST07')
        optimizer.reload_parameters(PATH+'params_finetune_PNIST07.txt')
        if NIST_BY_CLASS == 1:
            print "NIST DIGITS"
            optimizer.training_error(datasets.nist_digits(),part=2)
            print "NIST LOWER CASE"
            optimizer.training_error(datasets.nist_lower(),part=2)
            print "NIST UPPER CASE"
            optimizer.training_error(datasets.nist_upper(),part=2)
        else:
            print "P07 valid"
            optimizer.training_error(datasets.nist_P07(),part=1)
            print "PNIST valid"
            optimizer.training_error(datasets.PNIST07(),part=1)
        
    if os.path.exists(PATH+'params_finetune_PNIST07_then_NIST.txt'):
        print ('\n finetune = PNIST07 then NIST')
        optimizer.reload_parameters(PATH+'params_finetune_PNIST07_then_NIST.txt')
        if NIST_BY_CLASS == 1:
            print "NIST DIGITS"
            optimizer.training_error(datasets.nist_digits(),part=2)
            print "NIST LOWER CASE"
            optimizer.training_error(datasets.nist_lower(),part=2)
            print "NIST UPPER CASE"
            optimizer.training_error(datasets.nist_upper(),part=2)
        else:
            print "P07 valid"
            optimizer.training_error(datasets.nist_P07(),part=1)
            print "PNIST valid"
            optimizer.training_error(datasets.PNIST07(),part=1)
    
    channel.save()

    return channel.COMPLETE
def test_error(model_file):
    
    print((' test error on all NIST'))
    # load the model
    a=numpy.load(model_file)
    W1=a['W1']
    W2=a['W2']
    b1=a['b1']
    b2=a['b2']
    configuration=a['config']
    #configuration = [learning_rate,nb_max_exemples,nb_hidden,adaptive_lr]
    learning_rate = configuration[0]
    nb_max_exemples = configuration[1]
    nb_hidden = configuration[2]
    adaptive_lr =  configuration[3]
	
    if(len(configuration) == 6):
        detection_mode = configuration[4]
        reduce_label = configuration[5]
    else:
        detection_mode = 0
        reduce_label = 0

    # define the batch size
    batch_size=20
    #define the nb of target
    nb_targets = 62
    
    # create the mlp
    ishape     = (32,32) # this is the size of NIST images

    # allocate symbolic variables for the data
    x = T.fmatrix()  # the data is presented as rasterized images
    y = T.lvector()  # the labels are presented as 1D vector of 
                          # [long int] labels

    
    # construct the logistic regression class
    classifier = MLP( input=x,\
                        n_in=32*32,\
                        n_hidden=nb_hidden,\
                        n_out=nb_targets,
                        learning_rate=learning_rate,\
                        detection_mode=detection_mode)
		
    		
    # set the weight into the model
    classifier.W1.value = W1
    classifier.b1.value = b1
    classifier.W2.value = W2
    classifier.b2.value = b2

						
    # compiling a theano function that computes the mistakes that are made by 
    # the model on a minibatch
    test_model = theano.function([x,y], classifier.errors(y))
	
    # test it on the test set
    
    # load NIST ALL
    dataset=datasets.nist_all()
    test_score = 0.
    temp =0
    for xt,yt in dataset.test(batch_size):
        if reduce_label:
            yt[yt > 35] = yt[yt > 35]-26
        test_score += test_model(xt,yt)
        temp = temp+1
    test_score /= temp

    print(( ' test error NIST ALL : %f %%') %(test_score*100.0))
	
    # load NIST DIGITS
    dataset=datasets.nist_digits()
    test_score = 0.
    temp =0
    for xt,yt in dataset.test(batch_size):
        if reduce_label:
            yt[yt > 35] = yt[yt > 35]-26
        test_score += test_model(xt,yt)
        temp = temp+1
    test_score /= temp

    print(( ' test error NIST digits : %f %%') %(test_score*100.0))
	
    # load NIST lower
    dataset=datasets.nist_lower()
    test_score = 0.
    temp =0
    for xt,yt in dataset.test(batch_size):
        if reduce_label:
            yt[yt > 35] = yt[yt > 35]-26
        test_score += test_model(xt,yt)
        temp = temp+1
    test_score /= temp

    print(( ' test error NIST lower : %f %%') %(test_score*100.0))
	
    # load NIST upper
    dataset=datasets.nist_upper()
    test_score = 0.
    temp =0
    for xt,yt in dataset.test(batch_size):
        if reduce_label:
            yt[yt > 35] = yt[yt > 35]-26
        test_score += test_model(xt,yt)
        temp = temp+1
    test_score /= temp

    print(( ' test error NIST upper : %f %%') %(test_score*100.0))
def jobman_entrypoint(state, channel):
    global TEST_RUN
    minibatch_size = state.minibatch_size

    print_every = 100000
    COMPUTE_ERROR_EVERY = 10**7 / minibatch_size # compute error every 10 million examples
    if TEST_RUN:
        print_every = 100
        COMPUTE_ERROR_EVERY = 1000 / minibatch_size

    print "entrypoint, state is"
    print state

    ######################
    # select dataset and dataset subset, plus adjust epoch num to make number
    # of examples seen independent of dataset
    # exemple: pour le cas DIGITS_ONLY, il faut changer le nombre d'époques
    # et pour le cas NIST pur (pas de transformations), il faut multiplier par 100
    # en partant car on a pas les variations

    # compute this in terms of the P07 dataset size (=80M)
    MINIBATCHES_TO_SEE = state.n_epochs * 8 * (10**6) / minibatch_size

    if state.train_on == 'NIST' and state.train_subset == 'ALL':
        dataset_obj = datasets.nist_all()
    elif state.train_on == 'NIST' and state.train_subset == 'DIGITS_ONLY':
        dataset_obj = datasets.nist_digits()
    elif state.train_on == 'NISTP' and state.train_subset == 'ALL':
        dataset_obj = datasets.PNIST07()
    elif state.train_on == 'NISTP' and state.train_subset == 'DIGITS_ONLY':
        dataset_obj = PNIST07_digits
    elif state.train_on == 'P07' and state.train_subset == 'ALL':
        dataset_obj = datasets.nist_P07()
    elif state.train_on == 'P07' and state.train_subset == 'DIGITS_ONLY':
        dataset_obj = datasets.P07_digits

    dataset = dataset_obj
    
    if state.train_subset == 'ALL':
        n_classes = 62
    elif state.train_subset == 'DIGITS_ONLY':
        n_classes = 10
    else:
        raise NotImplementedError()

    ###############################
    # construct model

    print "constructing model..."
    x     = T.matrix('x')
    y     = T.ivector('y')

    rng = numpy.random.RandomState(state.rng_seed)

    # construct the MLP class
    model = MLP(rng = rng, input=x, n_in=N_INPUTS,
                        n_hidden_layers = state.n_hidden_layers,
                        n_hidden = state.n_hidden, n_out=n_classes)


    # cost and training fn
    cost = T.mean(model.negative_log_likelihood(y)) \
                 + state.L1_reg * model.L1 \
                 + state.L2_reg * model.L2_sqr 

    print "L1, L2: ", state.L1_reg, state.L2_reg

    gradient_nll_wrt_params = []
    for param in model.params:
        gparam = T.grad(cost, param)
        gradient_nll_wrt_params.append(gparam)

    learning_rate = 10**float(state.learning_rate_log10)
    print "Learning rate", learning_rate

    train_updates = {}
    for param, gparam in zip(model.params, gradient_nll_wrt_params):
        train_updates[param] = param - learning_rate * gparam

    train_fn = theano.function([x,y], cost, updates=train_updates)

    #######################
    # create series
    basedir = os.getcwd()

    h5f = tables.openFile(os.path.join(basedir, "series.h5"), "w")

    series = {}
    add_error_series(series, "training_error", h5f,
                    index_names=('minibatch_idx',), use_accumulator=True,
                    reduce_every=REDUCE_EVERY)

    ##########################
    # training loop

    start_time = time.clock()

    print "begin training..."
    print "will train for", MINIBATCHES_TO_SEE, "examples"

    mb_idx = 0

    while(mb_idx*minibatch_size<nb_max_exemples):

        last_costs = []

        for mb_x, mb_y in dataset.train(minibatch_size):
            if TEST_RUN and mb_idx > 1000:
                break
                
            last_cost = train_fn(mb_x, mb_y)
            series["training_error"].append((mb_idx,), last_cost)

            last_costs.append(last_cost)
            if (len(last_costs)+1) % print_every == 0:
                print "Mean over last", print_every, "minibatches: ", numpy.mean(last_costs)
                last_costs = []

            if (mb_idx+1) % COMPUTE_ERROR_EVERY == 0:
                # compute errors
                print "computing errors on all datasets..."
                print "Time since training began: ", (time.clock()-start_time)/60., "minutes"
                compute_and_save_errors(state, model, series, h5f, mb_idx)

        channel.save()

        sys.stdout.flush()

    end_time = time.clock()

    print "-"*80
    print "Finished. Training took", (end_time-start_time)/60., "minutes"
    print state
                                       index_names=('iter',),
                                       title='Test error (class)')
    
    return series

class PrintSeries(object):
    def append(self, idx, v):
        print idx, v

if __name__ == '__main__':
    from ift6266 import datasets
    from sgd_opt import sgd_opt
    import sys, time
    
    batch_size = 100
    dset = datasets.nist_digits(1000)

    pretrain_funcs, trainf, evalf, net = build_funcs(
        img_size = (32, 32),
        batch_size=batch_size, filter_sizes=[(5,5), (3,3)],
        num_filters=[20, 4], subs=[(2,2), (2,2)], noise=[0.2, 0.2],
        mlp_sizes=[500], out_size=10, dtype=numpy.float32,
        pretrain_lr=0.001, train_lr=0.1)
    
    t_it = repeat_itf(dset.train, batch_size)
    pretrain_fs, train, valid, test = massage_funcs(
        t_it, t_it, dset, batch_size,
        pretrain_funcs, trainf, evalf)

    print "pretraining ...",
    sys.stdout.flush()