def test_params(learning_rate,
                n_epochs,
                window_size,
                datasets,
                output_folder,
                base_folder):
    """
    Demonstrate stochastic gradient descent optimization of a log-linear
    model

    This is demonstrated on ICHI.

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    """
    
    # split the datasets
    (train_set_x, train_set_y) = datasets[0]
    (valid_set_x, valid_set_y) = datasets[1]
    (test_set_x, test_set_y) = datasets[2]

    # compute number of examples given in datasets
    n_train_samples =  train_set_x.get_value(borrow=True).shape[0] - window_size + 1    
    n_valid_samples = valid_set_x.get_value(borrow=True).shape[0] - window_size + 1    
    n_test_samples = test_set_x.get_value(borrow=True).shape[0] - window_size + 1
    
    
    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # generate symbolic variables for input (x and y represent a
    # minibatch)
    x = T.matrix('x')  # data, presented as window with x, y, x for each sample
    y = T.iscalar('y')  # labels, presented as int label

    # construct the logistic regression class
    # Each ICHI input has size window_size*3
    classifier = LogisticRegression(input=x, n_in=window_size*3, n_out=7)
    classifier.print_log_reg_types()

    # the cost we minimize during training is the negative log likelihood of
    # the model in symbolic format
    cost = classifier.negative_log_likelihood(y)
    predict = classifier.predict()

    # compiling a Theano function that computes the mistakes that are made by
    # the model on a row
    test_model = theano.function(
        inputs=[index],
        outputs=[classifier.errors(y), predict, y],
        givens={
            x: test_set_x[index: index + window_size],
            y: test_set_y[index + window_size - 1]
        }
    )

    validate_model = theano.function(
        inputs=[index],
        outputs=[classifier.errors(y), predict, y],
        givens={
            x: valid_set_x[index: index + window_size],
            y: valid_set_y[index + window_size - 1]
        }
    )

    # compute the gradient of cost with respect to theta = (W,b)
    g_W = T.grad(cost=cost, wrt=classifier.W)
    g_b = T.grad(cost=cost, wrt=classifier.b)

    # start-snippet-3
    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs.
    updates = [(classifier.W, classifier.W - learning_rate * g_W),
               (classifier.b, classifier.b - learning_rate * g_b)]

    # compiling a Theano function `train_model` that returns the cost, but in
    # the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=[cost, classifier.errors(y), predict, y],
        updates=updates,
        givens={
            x: train_set_x[index: index + window_size],
            y: train_set_y[index + window_size - 1]
        }
    )
    # end-snippet-3

    ###############
    # TRAIN MODEL #
    ###############
    print '... training the model'
    # early-stopping parameters
    patience = n_train_samples*2  # look as this many examples regardless
    patience_increase = 25  # wait this much longer when a new best is
                                  # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                  # considered significant
    validation_frequency = patience / 4

    best_validation_loss = numpy.inf
    start_time = time.clock()

    done_looping = False
    epoch = 0
    iter = 0
    train_cost_array = []
    train_error_array = []
    valid_error_array = []
    test_error_array = []
    cur_train_cost =[]
    cur_train_error = []
    train_confusion_matrix = numpy.zeros((7, 7))
    valid_confusion_matrix = numpy.zeros((7, 7))
    print(n_train_samples, 'train_samples')
    
    while (epoch < n_epochs) and (not done_looping):
        train_confusion_matrix = zero_in_array(train_confusion_matrix)
        for index in xrange(n_train_samples):            
            sample_cost, sample_error, cur_pred, cur_actual = train_model(index)
            # iteration number
            iter = epoch * n_train_samples + index
                
            cur_train_cost.append(sample_cost)
            cur_train_error.append(sample_error)
            train_confusion_matrix[cur_actual][cur_pred] += 1
        
            if (iter + 1) % validation_frequency == 0:
                valid_confusion_matrix = zero_in_array(valid_confusion_matrix)
                # compute zero-one loss on validation set
                validation_losses = []
                for i in xrange(n_valid_samples):
                    validation_loss, cur_pred, cur_actual = validate_model(i)
                    validation_losses.append(validation_loss)
                    valid_confusion_matrix[cur_actual][cur_pred] += 1
    
                this_validation_loss = float(numpy.mean(validation_losses))*100                 
                valid_error_array.append([])
                valid_error_array[-1].append(float(iter)/n_train_samples)
                valid_error_array[-1].append(this_validation_loss)
                        
                print(
                    'epoch %i, iter %i/%i, validation error %f %%' %
                    (
                        epoch,
                        index + 1,
                        n_train_samples,
                        this_validation_loss
                    )
                )
       
                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                        improvement_threshold:
                        patience = max(patience, iter * patience_increase)
        
                    best_validation_loss = this_validation_loss
                    # test it on the test set
                         
                    test_result = [test_model(i)
                                   for i in xrange(n_test_samples)]
                    test_result = numpy.asarray(test_result)
                    test_losses = test_result[:,0]
                    test_score = float(numpy.mean(test_losses))*100
                            
                    test_error_array.append([])
                    test_error_array[-1].append(float(iter)/n_train_samples)
                    test_error_array[-1].append(test_score)
        
                    print(
                        (
                            '     epoch %i, iter %i/%i, test error of'
                            ' best model %f %%'
                        ) %
                        (
                            epoch,
                            index + 1,
                            n_train_samples,
                            test_score
                        )
                    )
            if patience*4 <= iter:
                done_looping = True
                print('Done looping')
                break
                           
        train_cost_array.append([])
        train_cost_array[-1].append(float(iter)/n_train_samples)
        train_cost_array[-1].append(float(numpy.mean(cur_train_cost)))
        cur_train_cost =[]
       
        train_error_array.append([])
        train_error_array[-1].append(float(iter)/n_train_samples)
        train_error_array[-1].append(float(numpy.mean(cur_train_error)*100))
        cur_train_error =[]
                
        epoch = epoch + 1
        gc.collect()
            
    test_confusion_matrix = zero_in_array(numpy.zeros((7, 7)))
    test_losses = []
    for i in xrange(n_test_samples):
        test_loss, cur_pred, cur_actual = test_model(i)
        test_losses.append(test_loss)
        test_confusion_matrix[cur_actual][cur_pred] += 1
    
    test_score = numpy.mean(test_losses)*100
    test_error_array.append([])
    test_error_array[-1].append(float(iter)/n_train_samples)
    test_error_array[-1].append(test_score)
    
    visualize_logistic(train_cost=train_cost_array,
                    train_error=train_error_array, 
                    valid_error=valid_error_array,
                    test_error=test_error_array,
                    window_size=window_size,
                    learning_rate=learning_rate,
                    output_folder=output_folder,
                    base_folder=base_folder)
    end_time = time.clock()
    print(
        (
           'Optimization complete with best validation score of %f %%,'
           'with test performance %f %%'
        )
        % (best_validation_loss, test_score)
    )
    print 'The code run for %d epochs, with %f epochs/sec' % (
        epoch, 1. * epoch / (end_time - start_time))
    print >> sys.stderr, ('The code for file ' +
                         os.path.split(__file__)[1] +
                              ' ran for %.1fs' % ((end_time - start_time)))
    print(train_confusion_matrix, 'train_confusion_matrix')
    print(valid_confusion_matrix, 'valid_confusion_matrix')
    print(test_confusion_matrix, 'test_confusion_matrix')
def test_params(datasets, output_folder, base_folder,
                window_size, n_epochs=50):
    """Demonstrate conjugate gradient optimization of a log-linear model

    This is demonstrated on ICHI.

    :type n_epochs: int
    :param n_epochs: number of epochs to run the optimizer

    """
    #############
    # LOAD DATA #
    #############
    
    # split the datasets
    (train_set_x, train_set_y) = datasets[0]
    (valid_set_x, valid_set_y) = datasets[1]
    (test_set_x, test_set_y) = datasets[2]

        # compute number of examples given in datasets
    n_train_samples =  train_set_x.get_value(borrow=True).shape[0] - window_size + 1    
    n_valid_samples = valid_set_x.get_value(borrow=True).shape[0] - window_size + 1    
    n_test_samples = test_set_x.get_value(borrow=True).shape[0] - window_size + 1
    
    n_in = window_size*3  # number of input units
    n_out = 7  # number of output units

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data
    index = T.lscalar()  

    # generate symbolic variables for input
    x = T.matrix('x')  # data, presented as window with x, y, x for each sample
    y = T.iscalar('y')  # labels, presented as int label

    # construct the logistic regression class
    classifier = LogisticRegression(input=x, n_in=n_in, n_out=n_out)

    # the cost we minimize during training is the negative log likelihood of
    # the model in symbolic format
    cost = classifier.negative_log_likelihood(y)

    # compile a theano function that computes the mistakes that are made by
    # the model on a minibatch
    test_model = theano.function(
        [index],
        classifier.errors(y),
        givens={
            x: test_set_x[index:index + window_size],
            y: test_set_y[index + window_size - 1]
        },
        name="test"
    )

    validate_model = theano.function(
        [index],
        classifier.errors(y),
        givens={
            x: valid_set_x[index: index + window_size],
            y: valid_set_y[index + window_size - 1]
        },
        name="validate"
    )

    #  compile a theano function that returns the cost
    conj_cost = theano.function(
        inputs=[index],
        outputs=[cost, classifier.errors(y), classifier.predict(), y],
        givens={
            x: train_set_x[index: index + window_size],
            y: train_set_y[index + window_size - 1]
        },
        name="conj_cost"
    )

    # compile a theano function that returns the gradient with respect to theta
    conj_grad = theano.function(
        [index],
        T.grad(cost, classifier.theta),
        givens={
            x: train_set_x[index: index + window_size],
            y: train_set_y[index + window_size - 1]
        },
        name="conj_grad"
    )
    
    classifier.train_cost_array = []
    classifier.train_error_array = []
    train_confusion_matrix = numpy.zeros((7, 7))
    classifier.epoch = 0

    # creates a function that computes the average cost on the training set
    def train_fn(theta_value):
        classifier.theta.set_value(theta_value, borrow=True)
        cur_train_cost = []
        cur_train_error =[]
        for i in xrange(n_train_samples):
            sample_cost, sample_error, cur_pred, cur_actual = conj_cost(i)
            cur_train_cost.append(sample_cost)
            cur_train_error.append(sample_error)
            train_confusion_matrix[cur_actual][cur_pred] += 1
        
        this_train_loss = float(numpy.mean(cur_train_cost))  
        classifier.train_cost_array.append([])
        classifier.train_cost_array[-1].append(classifier.epoch)
        classifier.train_cost_array[-1].append(this_train_loss)
       
        classifier.train_error_array.append([])
        classifier.train_error_array[-1].append(classifier.epoch)
        classifier.train_error_array[-1].append(float(numpy.mean(cur_train_error)*100))
                
        classifier.epoch += 1
        
        return this_train_loss

    # creates a function that computes the average gradient of cost with
    # respect to theta
    def train_fn_grad(theta_value):
        classifier.theta.set_value(theta_value, borrow=True)
        grad = conj_grad(0)
        for i in xrange(1, n_train_samples):
            grad += conj_grad(i)
        return grad / n_train_samples

    classifier.validation_scores = [numpy.inf, 0]
    classifier.valid_error_array = []
    classifier.test_error_array = []

    # creates the validation function
    def callback(theta_value):
        classifier.theta.set_value(theta_value, borrow=True)
        #compute the validation loss
        validation_losses = [validate_model(i)
                             for i in xrange(n_valid_samples)]
        this_validation_loss = float(numpy.mean(validation_losses) * 100.,)
        print('validation error %f %%' % (this_validation_loss))
        classifier.valid_error_array.append([])
        classifier.valid_error_array[-1].append(classifier.epoch)
        classifier.valid_error_array[-1].append(this_validation_loss)

        # check if it is better then best validation score got until now
        if this_validation_loss < classifier.validation_scores[0]:
            # if so, replace the old one, and compute the score on the
            # testing dataset
            classifier.validation_scores[0] = this_validation_loss
            test_losses = [test_model(i)
                           for i in xrange(n_test_samples)]
            classifier.validation_scores[1] = float(numpy.mean(test_losses))
            classifier.test_error_array.append([])
            classifier.test_error_array[-1].append(classifier.epoch)
            classifier.test_error_array[-1].append(classifier.validation_scores[1])

    ###############
    # TRAIN MODEL #
    ###############

    # using scipy conjugate gradient optimizer
    import scipy.optimize
    print ("Optimizing using scipy.optimize.fmin_cg...")
    start_time = timeit.default_timer()
    best_theta = scipy.optimize.fmin_cg(
        f=train_fn,
        x0=numpy.zeros((n_in + 1) * n_out, dtype=x.dtype),
        fprime=train_fn_grad,
        callback=callback,
        disp=0,
        maxiter=n_epochs
    )
    visualize_logistic(train_cost=classifier.train_cost_array,
                       train_error=classifier.train_error_array,
                       valid_error=classifier.valid_error_array,
                       test_error=classifier.test_error_array,
                       window_size=window_size,
                       learning_rate=0,
                       output_folder=output_folder,
                       base_folder=base_folder)
    end_time = timeit.default_timer()
    print(
        (
            'Optimization complete with best validation score of %f %%, with '
            'test performance %f %%'
        )
        % (classifier.validation_scores[0] * 100., classifier.validation_scores[1] * 100.)
    )

    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.1fs' % ((end_time - start_time)))