def main():
    # load the training and validation data sets
    train_X, test_X, train_y, test_y = load_data_cv('data/train.csv')

    X = T.ftensor4()
    Y = T.fmatrix()

    # set up theano functions to generate output by feeding data through network
    output_layer = lasagne_model()
    output_train = lasagne.layers.get_output(output_layer, X)
    output_valid = lasagne.layers.get_output(output_layer, X, deterministic=True)

    # set up the loss that we aim to minimize
    loss_train = T.mean(T.nnet.categorical_crossentropy(output_train, Y))
    loss_valid = T.mean(T.nnet.categorical_crossentropy(output_valid, Y))

    # prediction functions for classifications
    pred = T.argmax(output_train, axis=1)
    pred_valid = T.argmax(output_valid, axis=1)

    # get parameters from network and set up sgd with nesterov momentum to update parameters
    params = lasagne.layers.get_all_params(output_layer)
    updates = nesterov_momentum(loss_train, params, learning_rate=0.003, momentum=0.9)

    # set up training and prediction functions
    train = theano.function(inputs=[X, Y], outputs=loss_train, updates=updates, allow_input_downcast=True)
    valid = theano.function(inputs=[X, Y], outputs=loss_valid, allow_input_downcast=True)
    predict_valid = theano.function(inputs=[X], outputs=pred_valid, allow_input_downcast=True)

    # loop over training functions for however many iterations, print information while training
    train_eval = []
    valid_eval = []
    valid_acc = []
    try:
        for i in range(45):
            train_loss = batch_iterator(train_X, train_y, BATCHSIZE, train)
            train_eval.append(train_loss)
            valid_loss = valid(test_X, test_y)
            valid_eval.append(valid_loss)
            acc = np.mean(np.argmax(test_y, axis=1) == predict_valid(test_X))
            valid_acc.append(acc)
            print 'iter:', i, '| Tloss:', train_loss, '| Vloss:', valid_loss, '| valid acc:', acc

    except KeyboardInterrupt:
        pass

    # save weights
    all_params = helper.get_all_param_values(output_layer)
    f = gzip.open('data/weights.pklz', 'wb')
    pickle.dump(all_params, f)
    f.close()

    # plot loss and accuracy
    train_eval = np.array(train_eval)
    valid_eval = np.array(valid_eval)
    valid_acc = np.array(valid_acc)
    sns.set_style("whitegrid")
    pyplot.plot(train_eval, linewidth = 3, label = 'train loss')
    pyplot.plot(valid_eval, linewidth = 3, label = 'valid loss')
    pyplot.legend(loc = 2)
    pyplot.twinx()
    pyplot.plot(valid_acc, linewidth = 3, label = 'valid accuracy', color = 'r')
    pyplot.grid()
    pyplot.ylim([.9,1])
    pyplot.legend(loc = 1)
    pyplot.savefig('data/training_plot.png')
def main():
    # load model and parameters
    output_layer = lasagne_model()
    f = gzip.open('data/weights.pklz', 'rb')
    all_params = pickle.load(f)
    f.close()

    X = T.ftensor4()
    Y = T.fmatrix()

    # set up theano functions to generate output by feeding data through network
    output_layer = lasagne_model()
    output_train = lasagne.layers.get_output(output_layer, X)
    output_valid = lasagne.layers.get_output(output_layer, X, deterministic=True)

    # set up the loss that we aim to minimize
    loss_train = T.mean(T.nnet.categorical_crossentropy(output_train, Y))
    loss_valid = T.mean(T.nnet.categorical_crossentropy(output_valid, Y))

    # prediction functions for classifications
    pred = T.argmax(output_train, axis=1)
    pred_valid = T.argmax(output_valid, axis=1)

    # get parameters from network and set up sgd with nesterov momentum to update parameters
    helper.set_all_param_values(output_layer, all_params)
    params = lasagne.layers.get_all_params(output_layer)
    updates = nesterov_momentum(loss_train, params, learning_rate=0.0001, momentum=0.9)

    # set up training and prediction functions
    train = theano.function(inputs=[X, Y], outputs=loss_train, updates=updates, allow_input_downcast=True)
    valid = theano.function(inputs=[X, Y], outputs=loss_valid, allow_input_downcast=True)
    predict_valid = theano.function(inputs=[X], outputs=pred_valid, allow_input_downcast=True)

    # fine tune network
    train_X, test_X, train_y, test_y = load_data_cv('data/train.csv')
    train_eval = []
    valid_eval = []
    valid_acc = []
    try:
        for i in range(5):
            train_loss = batch_iterator_no_aug(train_X, train_y, BATCHSIZE, train)
            train_eval.append(train_loss)
            valid_loss = valid(test_X, test_y)
            valid_eval.append(valid_loss)
            acc = np.mean(np.argmax(test_y, axis=1) == predict_valid(test_X))
            valid_acc.append(acc)
            print 'iter:', i, '| Tloss:', train_loss, '| Vloss:', valid_loss, '| valid acc:', acc

    except KeyboardInterrupt:
        pass

    # after training create output for kaggle
    testing_inputs = load_test_data('data/test.csv')
    predictions = []
    for j in range((testing_inputs.shape[0] + BATCHSIZE -1) // BATCHSIZE):
        sl = slice(j * BATCHSIZE, (j + 1) * BATCHSIZE)
        X_batch = testing_inputs[sl]
        predictions.extend(predict_valid(X_batch))
    out = pd.read_csv('data/convnet_preds.csv')
    out['Label'] = predictions
    out.to_csv('preds/convnet_preds.csv', index = False)