コード例 #1
0
def errorAnalysisRundown():
    clbl_vec, flbl_vec = process_qc.label_structure('../exp/label_struct_bi')
    predsFile = '../exp/predictions'
    goldFile = '../exp/goldrs'
    textFile = '../data/QC/Chinese_qc/finaltest'
    findErrors(predsFile, goldFile, textFile, clbl_vec)
    findCorrects(predsFile, goldFile, textFile, clbl_vec)
コード例 #2
0
def errorAnalysisRundown():
    clbl_vec, flbl_vec = process_qc.label_structure('../exp/label_struct_bi')
    predsFile = '../exp/predictions'
    goldFile = '../exp/goldrs'
    textFile = '../data/QC/Chinese_qc/finaltest'
    findErrors(predsFile, goldFile, textFile, clbl_vec)
    findCorrects(predsFile, goldFile, textFile, clbl_vec)
コード例 #3
0
def lbl2index():
    c_vec, f_vec = label_structure('./label_structure_new')
    c2idx = {}
    f2idx = {}
    for i in xrange(len(c_vec)):
        c2idx[c_vec[i]] = i
    for i in xrange(len(f_vec)):
        f2idx[f_vec[i]] = i
    return c2idx, f2idx
コード例 #4
0
def lbl2index():
    c_vec, f_vec = label_structure("./label_structure_new")
    c2idx = {}
    f2idx = {}
    for i in xrange(len(c_vec)):
        c2idx[c_vec[i]] = i
    for i in xrange(len(f_vec)):
        f2idx[f_vec[i]] = i
    return c2idx, f2idx
コード例 #5
0
def outputErrorInstances():
    bestfFile = _pathBase_ + 'exp/bestjointcnnfrs'
    goldfrsFile = _pathBase_ + 'exp/goldfrs'
    rcnstrct_sents_file = _pathBase_ + 'exp/reconstructed_sentences'
    maxplrsFile = _pathBase_ + 'exp/feature_map_max'

    error_output_file = _pathBase_ + 'exp/error_max_pool_info'

    with open(bestfFile, 'r') as reader:
        predlblseq = [int(line) for line in reader.readlines()]
    with open(goldfrsFile, 'r') as reader:
        goldlblseq = [int(line) for line in reader.readlines()]
    with open(rcnstrct_sents_file, 'r') as reader:
        sentseq = [line.rstrip() for line in reader.readlines()]
    with open(maxplrsFile, 'r') as reader:
        maxplseq = [[int(strint) for strint in line.split()]
                    for line in reader.readlines()]
    assert len(goldlblseq) == len(predlblseq) == len(sentseq) == len(maxplseq)

    c_vec, f_vec = label_structure('label_structure_new')

    error_instances = []
    maxplinfolines = []
    count = 0
    for i in xrange(len(goldlblseq)):
        if goldlblseq[i] != predlblseq[i]:
            count += 1
            words = sentseq[i].split()
            error_instances.append(f_vec[predlblseq[i]] + '\t' +
                                   f_vec[goldlblseq[i]] + '\t' + sentseq[i])
            maxplinfo = ''
            for j in xrange(len(maxplseq[i])):
                argmax = maxplseq[i][j]
                threegram = str(j) + ':' + words[argmax] + '/' + words[
                    argmax + 1] + '/' + words[argmax + 2]
                maxplinfo += threegram + ' '
            maxplinfolines.append(maxplinfo.rstrip())

    assert len(error_instances) == len(maxplinfolines)
    print count

    with open(error_output_file, 'w') as writer:
        for i in xrange(len(error_instances)):
            writer.write(error_instances[i] + '\n')
            writer.write(maxplinfolines[i] + '\n')
コード例 #6
0
def outputErrorInstances():
    bestfFile = _pathBase_ + "exp/bestjointcnnfrs"
    goldfrsFile = _pathBase_ + "exp/goldfrs"
    rcnstrct_sents_file = _pathBase_ + "exp/reconstructed_sentences"
    maxplrsFile = _pathBase_ + "exp/feature_map_max"

    error_output_file = _pathBase_ + "exp/error_max_pool_info"

    with open(bestfFile, "r") as reader:
        predlblseq = [int(line) for line in reader.readlines()]
    with open(goldfrsFile, "r") as reader:
        goldlblseq = [int(line) for line in reader.readlines()]
    with open(rcnstrct_sents_file, "r") as reader:
        sentseq = [line.rstrip() for line in reader.readlines()]
    with open(maxplrsFile, "r") as reader:
        maxplseq = [[int(strint) for strint in line.split()] for line in reader.readlines()]
    assert len(goldlblseq) == len(predlblseq) == len(sentseq) == len(maxplseq)

    c_vec, f_vec = label_structure("label_structure_new")

    error_instances = []
    maxplinfolines = []
    count = 0
    for i in xrange(len(goldlblseq)):
        if goldlblseq[i] != predlblseq[i]:
            count += 1
            words = sentseq[i].split()
            error_instances.append(f_vec[predlblseq[i]] + "\t" + f_vec[goldlblseq[i]] + "\t" + sentseq[i])
            maxplinfo = ""
            for j in xrange(len(maxplseq[i])):
                argmax = maxplseq[i][j]
                threegram = str(j) + ":" + words[argmax] + "/" + words[argmax + 1] + "/" + words[argmax + 2]
                maxplinfo += threegram + " "
            maxplinfolines.append(maxplinfo.rstrip())

    assert len(error_instances) == len(maxplinfolines)
    print count

    with open(error_output_file, "w") as writer:
        for i in xrange(len(error_instances)):
            writer.write(error_instances[i] + "\n")
            writer.write(maxplinfolines[i] + "\n")
コード例 #7
0
def train_joint_conv_net(w2vFile,
                         dataFile,
                         labelStructureFile,
                         cfswitch,
                         filter_hs,
                         n_epochs=1000,
                         batch_size=50,
                         feature_maps=100,
                         hasmlphidden=False,
                         usefscore=False):
    """
    function: learning and testing sentence level Question Classification Task
            in a joint fashion, ie. adding the loss function of coarse label prediction
            and fine label prediction together.
    :param w2vFile: the path of the word embedding file(pickle file with numpy
            array value, produced by word2vec.py module)
    :param dataFile: the dataset file produced by process_data.py module
    :param labelStructureFile: a file that describes label structure of coarse and fine
            grains. It is produced in produce_data.py in outputlabelstructure()
    "param filter_h: sliding window size.
            *** warning ***
            you cannot just change window size here, if you want to use a different window
            for the experiment. YOU NEED TO RE-PRODUCE A NEW DATASET IN process_data.py
            WITH THE CORRESPONDING WINDOW SIZE.
    :param n_epochs: the number of epochs the training needs to run
    :param batch_size: the size of the mini-batch
    :param feature_maps: how many dimensions you want the abstract sentence
            representation to be
    :param mlphiddensize: the size of the hidden layer in MLP
    :param logFile: the output file of the brief info of each epoch results, basically a
            save for the print out
    :param logTest: keep track of results on test set
    :return: a tuple of best fine grained prediction accuracy and its corresponding
            coarse grained prediction accuracy
    """
    """
    Loading and preparing data
    """
    datasets = load(dataFile)
    clbl_vec, flbl_vec = process_qc.label_structure(labelStructureFile)
    trainDataSetIndex = 0
    testDataSetIndex = 1
    validDataSetIndex = 2
    sentenceIndex = 0
    clblIndex = 1  # coarse label(clbl) index in the dataset structure
    flblIndex = 2  # fine label(flbl) index

    if cfswitch == 'c':
        lblIndex = clblIndex
        label_vec = clbl_vec
    elif cfswitch == 'f':
        lblIndex = flblIndex
        label_vec = flbl_vec
    else:
        print 'wrong arg value in: cfswtich!'
        sys.exit()

    label_size = len(label_vec)

    if hasmlphidden:
        layer_size = [feature_maps * len(filter_hs), 100, label_size]
    else:
        layer_size = [feature_maps * len(filter_hs), label_size]

    # train part
    train_y = shared_store(datasets[trainDataSetIndex][lblIndex])
    train_x = shared_store(datasets[trainDataSetIndex][sentenceIndex])

    # test part
    gold_test_y = datasets[testDataSetIndex][lblIndex]
    test_x = shared_store(datasets[testDataSetIndex][sentenceIndex])

    # valid part
    gold_valid_y = datasets[validDataSetIndex][lblIndex]
    valid_x = shared_store(datasets[validDataSetIndex][sentenceIndex])

    w2v = load(w2vFile)
    img_w = w2v.shape[1]  # the dimension of the word embedding
    img_h = len(datasets[trainDataSetIndex][sentenceIndex]
                [0])  # length of each sentence
    filter_w = img_w  # word embedding dimension
    image_shapes = []
    filter_shapes = []
    for i in xrange(len(filter_hs)):
        image_shapes.append((batch_size, 1, img_h, img_w * filter_hs[i]))
        filter_shapes.append((feature_maps, 1, 1, filter_w * filter_hs[i]))

    pool_size = (img_h, 1)

    train_size = len(datasets[trainDataSetIndex][sentenceIndex])
    print 'number of sentences in training set: ' + str(train_size)
    print 'max sentence length: ' + str(
        len(datasets[trainDataSetIndex][sentenceIndex][0]))
    print 'train data shape: ' + str(
        datasets[trainDataSetIndex][sentenceIndex].shape)
    print 'word embedding dim: ' + str(w2v.shape[1])
    """
    Building model in theano language, less comments here.
    You can refer to Theano web site for more details
    """
    batch_index = T.lvector('hello_batch_index')
    x = T.itensor3('hello_x')
    y = T.ivector('hello_y')
    w2v_shared = theano.shared(value=w2v, name='w2v', borrow=True)
    rng = np.random.RandomState(3435)

    conv_layer_outputs = []
    conv_layers = []
    for i in xrange(len(filter_hs)):
        input = w2v_shared[x.flatten()].reshape(
            (x.shape[0], 1, x.shape[1],
             x.shape[2] * img_w))[:, :, :, 0:filter_hs[i] * img_w]

        conv_layer = LeNetConvPoolLayer(rng,
                                        input=input,
                                        filter_shape=filter_shapes[i],
                                        poolsize=pool_size,
                                        image_shape=image_shapes[i],
                                        non_linear="relu")

        conv_layers.append(conv_layer)
        conv_layer_outputs.append(conv_layer.output.flatten(2))

    mlp_input = T.concatenate(conv_layer_outputs, 1)

    classifier = MLPDropout(
        rng=rng,
        input=mlp_input,
        layer_sizes=layer_size,  # [feature_maps * len(filter_hs), label_size],
        dropout_rate=0.5,
        activation=Iden)

    params = []
    for conv_layer in conv_layers:
        params += conv_layer.params
    params += classifier.params

    cost = classifier.negative_log_likelihood(y)
    updates = sgd_updates_adadelta(params, cost)

    n_batches = train_x.shape.eval()[0] / batch_size

    train_model = theano.function(
        inputs=[batch_index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_x[batch_index],
            y: train_y[batch_index],
        },
    )
    """
    Building test model
    """
    test_conv_layer_outputs = []
    for i, conv_layer in enumerate(conv_layers):
        test_input = w2v_shared[x.flatten()].reshape(
            (x.shape[0], 1, x.shape[1],
             x.shape[2] * img_w))[:, :, :, 0:filter_hs[i] * img_w]
        test_conv_layer_outputs.append(
            conv_layer.conv_layer_output(test_input,
                                         (test_x.shape.eval()[0], 1, img_h,
                                          img_w * filter_hs[i])).flatten(2))
    test_prediction = classifier.predict(
        T.concatenate(test_conv_layer_outputs, 1))

    # test on test set
    test_model = theano.function(inputs=[],
                                 outputs=test_prediction,
                                 givens={
                                     x: test_x,
                                 })

    # test on valid set
    valid_model = theano.function(inputs=[],
                                  outputs=test_prediction,
                                  givens={
                                      x: valid_x,
                                  })
    """
    Training part
    """
    print 'training....'
    best_valid_ep = 0
    best_valid_acc = 0.
    best_test_ep = 0
    best_test_acc = 0.
    final_acc = 0.
    epoch = 0
    last_acc = 0.

    # create gold value sequences, required by the eval.py
    with open('../exp/goldrs', 'w') as writer:
        for lbl in gold_test_y:
            writer.write(str(lbl) + '\n')

    # training loop
    while (epoch < n_epochs):
        epoch += 1
        print '************* epoch ' + str(epoch)
        batch_indexes = range(train_size)
        rng.shuffle(batch_indexes)
        for bchidx in xrange(n_batches):
            random_indexes = batch_indexes[bchidx * batch_size:(bchidx + 1) *
                                           batch_size]
            train_cost = train_model(random_indexes)

        test_y_preds = test_model()
        valid_y_preds = valid_model()
        if usefscore:
            test_acc = eval.fscore(gold_test_y, test_y_preds)
            valid_acc = eval.fscore(gold_valid_y, valid_y_preds)
        else:
            test_acc = eval.accuracy(gold_test_y, test_y_preds)
            valid_acc = eval.accuracy(gold_valid_y, valid_y_preds)
        if valid_acc > best_valid_acc:
            best_valid_acc = valid_acc
            best_valid_ep = epoch
            if final_acc < test_acc:
                final_acc = test_acc
                with open('../exp/predictions', 'w') as writer:
                    for lblidx in test_y_preds:
                        writer.write(str(lblidx) + '\n')
        if test_acc > best_test_acc:
            best_test_acc = test_acc
            best_test_ep = epoch
            # output predictions

        print 'test accuracy is: ' + str(test_acc)
        print 'valid accuracy is: ' + str(valid_acc)
        print 'current best valid prediction accuracy is: ' + str(
            best_valid_acc) + ' at epoch ' + str(best_valid_ep)
        print 'current best final prediction accuracy is: ' + str(
            final_acc) + ' at epoch ' + str(best_valid_ep)
        print 'current best test prediction accuracy is: ' + str(
            best_test_acc) + ' at epoch ' + str(best_test_ep)
        last_acc = test_acc
    # final_acc = last_acc
    return final_acc
コード例 #8
0
def confusionMatrix():
    switch = 'c'
    with open(_pathBase_ + 'exp/bestcnn' + switch + 'rs', 'r') as reader:
        rslines = reader.readlines()
    with open(_pathBase_ + 'exp/goldrs', 'r') as reader:
        truelines = reader.readlines()
    with open(_pathBase_ + 'data/boschtest_new', 'r') as reader:
        testlines = reader.readlines()
    assert len(truelines) == len(rslines) == len(testlines)
    c2idx, f2idx = lbl2index()
    predslines = [int(line) for line in rslines]

    testcseq = [c2idx[line.split()[0].split(':')[0]] for line in testlines]
    testfseq = [f2idx[line.split()[0]] for line in testlines]

    c_vec, f_vec = label_structure('./label_structure_new')
    if switch == 'c':
        lbl_vec = c_vec
        goldlines = testcseq
    else:
        lbl_vec = f_vec
        goldlines = testfseq

    cm = [[0 for i in xrange(len(lbl_vec))] for j in xrange(len(lbl_vec))]

    error_instances = []
    for i in xrange(len(goldlines)):
        glbl = goldlines[i]
        plbl = predslines[i]
        print plbl
        print glbl
        cm[plbl][glbl] += 1
        if plbl != glbl:
            error_instances.append(lbl_vec[plbl] + '\t' + testlines[i])

    tmpline = ''
    for i in xrange(len(lbl_vec)):
        tmpline += str(i) + ': ' + lbl_vec[i] + '\t'
        if i % 5 == 0:
            print tmpline
            tmpline = ''
    print tmpline

    tmpline = '    ['
    for i in xrange(len(cm)):
        if i < 10:
            tmpline += ' ' + str(i) + ', '
        else:
            tmpline += str(i) + ', '
    print tmpline.rstrip(', ') + ']'

    for i in xrange(len(cm)):
        row = cm[i]
        tmpline = '['
        for num in row:
            if num >= 10:
                tmpline += str(num) + ', '
            else:
                tmpline += ' ' + str(num) + ', '
        tmpline = tmpline.rstrip(', ') + ']'
        if i < 10:
            print '[ ' + str(i) + ']' + tmpline
        else:
            print '[' + str(i) + ']' + tmpline
    for line in error_instances:
        print line.strip()
コード例 #9
0
def confusionMatrix():
    switch = "c"
    with open(_pathBase_ + "exp/bestcnn" + switch + "rs", "r") as reader:
        rslines = reader.readlines()
    with open(_pathBase_ + "exp/goldrs", "r") as reader:
        truelines = reader.readlines()
    with open(_pathBase_ + "data/boschtest_new", "r") as reader:
        testlines = reader.readlines()
    assert len(truelines) == len(rslines) == len(testlines)
    c2idx, f2idx = lbl2index()
    predslines = [int(line) for line in rslines]

    testcseq = [c2idx[line.split()[0].split(":")[0]] for line in testlines]
    testfseq = [f2idx[line.split()[0]] for line in testlines]

    c_vec, f_vec = label_structure("./label_structure_new")
    if switch == "c":
        lbl_vec = c_vec
        goldlines = testcseq
    else:
        lbl_vec = f_vec
        goldlines = testfseq

    cm = [[0 for i in xrange(len(lbl_vec))] for j in xrange(len(lbl_vec))]

    error_instances = []
    for i in xrange(len(goldlines)):
        glbl = goldlines[i]
        plbl = predslines[i]
        print plbl
        print glbl
        cm[plbl][glbl] += 1
        if plbl != glbl:
            error_instances.append(lbl_vec[plbl] + "\t" + testlines[i])

    tmpline = ""
    for i in xrange(len(lbl_vec)):
        tmpline += str(i) + ": " + lbl_vec[i] + "\t"
        if i % 5 == 0:
            print tmpline
            tmpline = ""
    print tmpline

    tmpline = "    ["
    for i in xrange(len(cm)):
        if i < 10:
            tmpline += " " + str(i) + ", "
        else:
            tmpline += str(i) + ", "
    print tmpline.rstrip(", ") + "]"

    for i in xrange(len(cm)):
        row = cm[i]
        tmpline = "["
        for num in row:
            if num >= 10:
                tmpline += str(num) + ", "
            else:
                tmpline += " " + str(num) + ", "
        tmpline = tmpline.rstrip(", ") + "]"
        if i < 10:
            print "[ " + str(i) + "]" + tmpline
        else:
            print "[" + str(i) + "]" + tmpline
    for line in error_instances:
        print line.strip()
コード例 #10
0
def train_joint_conv_net(
        w2vFile,
        dataFile,
        labelStructureFile,
        cfswitch,
        filter_hs,
        n_epochs=1000,
        batch_size=50,
        feature_maps=100,
        hasmlphidden=False,
        usefscore=False
):
    """
    function: learning and testing sentence level Question Classification Task
            in a joint fashion, ie. adding the loss function of coarse label prediction
            and fine label prediction together.
    :param w2vFile: the path of the word embedding file(pickle file with numpy
            array value, produced by word2vec.py module)
    :param dataFile: the dataset file produced by process_data.py module
    :param labelStructureFile: a file that describes label structure of coarse and fine
            grains. It is produced in produce_data.py in outputlabelstructure()
    "param filter_h: sliding window size.
            *** warning ***
            you cannot just change window size here, if you want to use a different window
            for the experiment. YOU NEED TO RE-PRODUCE A NEW DATASET IN process_data.py
            WITH THE CORRESPONDING WINDOW SIZE.
    :param n_epochs: the number of epochs the training needs to run
    :param batch_size: the size of the mini-batch
    :param feature_maps: how many dimensions you want the abstract sentence
            representation to be
    :param mlphiddensize: the size of the hidden layer in MLP
    :param logFile: the output file of the brief info of each epoch results, basically a
            save for the print out
    :param logTest: keep track of results on test set
    :return: a tuple of best fine grained prediction accuracy and its corresponding
            coarse grained prediction accuracy
    """

    """
    Loading and preparing data
    """
    datasets = load(dataFile)
    clbl_vec, flbl_vec = process_qc.label_structure(labelStructureFile)
    trainDataSetIndex = 0
    testDataSetIndex = 1
    validDataSetIndex = 2
    sentenceIndex = 0
    clblIndex = 1  # coarse label(clbl) index in the dataset structure
    flblIndex = 2  # fine label(flbl) index

    if cfswitch == 'c':
        lblIndex = clblIndex
        label_vec = clbl_vec
    elif cfswitch == 'f':
        lblIndex = flblIndex
        label_vec = flbl_vec
    else:
        print 'wrong arg value in: cfswtich!'
        sys.exit()

    label_size = len(label_vec)

    if hasmlphidden:
        layer_size = [feature_maps * len(filter_hs), 100, label_size]
    else:
        layer_size = [feature_maps * len(filter_hs), label_size]

    # train part
    train_y = shared_store(datasets[trainDataSetIndex][lblIndex])
    train_x = shared_store(datasets[trainDataSetIndex][sentenceIndex])

    # test part
    gold_test_y = datasets[testDataSetIndex][lblIndex]
    test_x = shared_store(datasets[testDataSetIndex][sentenceIndex])

    # valid part
    gold_valid_y = datasets[validDataSetIndex][lblIndex]
    valid_x = shared_store(datasets[validDataSetIndex][sentenceIndex])

    w2v = load(w2vFile)
    img_w = w2v.shape[1]  # the dimension of the word embedding
    img_h = len(datasets[trainDataSetIndex][sentenceIndex][0])  # length of each sentence
    filter_w = img_w  # word embedding dimension
    image_shapes = []
    filter_shapes = []
    for i in xrange(len(filter_hs)):
        image_shapes.append((batch_size, 1, img_h, img_w * filter_hs[i]))
        filter_shapes.append((feature_maps, 1, 1, filter_w * filter_hs[i]))

    pool_size = (img_h, 1)

    train_size = len(datasets[trainDataSetIndex][sentenceIndex])
    print 'number of sentences in training set: ' + str(train_size)
    print 'max sentence length: ' + str(len(datasets[trainDataSetIndex][sentenceIndex][0]))
    print 'train data shape: ' + str(datasets[trainDataSetIndex][sentenceIndex].shape)
    print 'word embedding dim: ' + str(w2v.shape[1])

    """
    Building model in theano language, less comments here.
    You can refer to Theano web site for more details
    """
    batch_index = T.lvector('hello_batch_index')
    x = T.itensor3('hello_x')
    y = T.ivector('hello_y')
    w2v_shared = theano.shared(value=w2v, name='w2v', borrow=True)
    rng = np.random.RandomState(3435)

    conv_layer_outputs = []
    conv_layers = []
    for i in xrange(len(filter_hs)):
        input = w2v_shared[x.flatten()].reshape(
            (x.shape[0], 1, x.shape[1], x.shape[2] * img_w)
        )[:, :, :, 0:filter_hs[i] * img_w]

        conv_layer = LeNetConvPoolLayer(
            rng,
            input=input,
            filter_shape=filter_shapes[i],
            poolsize=pool_size,
            image_shape=image_shapes[i],
            non_linear="relu"
        )

        conv_layers.append(conv_layer)
        conv_layer_outputs.append(conv_layer.output.flatten(2))

    mlp_input = T.concatenate(conv_layer_outputs, 1)

    classifier = MLPDropout(
        rng=rng,
        input=mlp_input,
        layer_sizes=layer_size,  # [feature_maps * len(filter_hs), label_size],
        dropout_rate=0.5,
        activation=Iden
    )

    params = []
    for conv_layer in conv_layers:
        params += conv_layer.params
    params += classifier.params

    cost = classifier.negative_log_likelihood(y)
    updates = sgd_updates_adadelta(params, cost)

    n_batches = train_x.shape.eval()[0] / batch_size

    train_model = theano.function(
        inputs=[batch_index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_x[batch_index],
            y: train_y[batch_index],
        },
    )

    """
    Building test model
    """
    test_conv_layer_outputs = []
    for i, conv_layer in enumerate(conv_layers):
        test_input = w2v_shared[x.flatten()].reshape(
            (x.shape[0], 1, x.shape[1], x.shape[2] * img_w)
        )[:, :, :, 0:filter_hs[i] * img_w]
        test_conv_layer_outputs.append(
            conv_layer.conv_layer_output(
                test_input,
                (test_x.shape.eval()[0], 1, img_h, img_w * filter_hs[i])
            ).flatten(2)
        )
    test_prediction = classifier.predict(T.concatenate(test_conv_layer_outputs, 1))

    # test on test set
    test_model = theano.function(
        inputs=[],
        outputs=test_prediction,
        givens={
            x: test_x,
        }
    )

    # test on valid set
    valid_model = theano.function(
        inputs=[],
        outputs=test_prediction,
        givens={
            x: valid_x,
        }
    )

    """
    Training part
    """
    print 'training....'
    best_valid_ep = 0
    best_valid_acc = 0.
    best_test_ep = 0
    best_test_acc = 0.
    final_acc = 0.
    epoch = 0
    last_acc = 0.

    # create gold value sequences, required by the eval.py
    with open('../exp/goldrs', 'w') as writer:
        for lbl in gold_test_y:
            writer.write(str(lbl) + '\n')

    # training loop
    while (epoch < n_epochs):
        epoch += 1
        print '************* epoch ' + str(epoch)
        batch_indexes = range(train_size)
        rng.shuffle(batch_indexes)
        for bchidx in xrange(n_batches):
            random_indexes = batch_indexes[bchidx * batch_size:(bchidx + 1) * batch_size]
            train_cost = train_model(random_indexes)

        test_y_preds = test_model()
        valid_y_preds = valid_model()
        if usefscore:
            test_acc = eval.fscore(gold_test_y, test_y_preds)
            valid_acc = eval.fscore(gold_valid_y, valid_y_preds)
        else:
            test_acc = eval.accuracy(gold_test_y, test_y_preds)
            valid_acc = eval.accuracy(gold_valid_y, valid_y_preds)
        if valid_acc > best_valid_acc:
            best_valid_acc = valid_acc
            best_valid_ep = epoch
            if final_acc < test_acc:
                final_acc = test_acc
                with open('../exp/predictions', 'w') as writer:
                    for lblidx in test_y_preds:
                        writer.write(str(lblidx) + '\n')
        if test_acc > best_test_acc:
            best_test_acc = test_acc
            best_test_ep = epoch
            # output predictions

        print 'test accuracy is: ' + str(test_acc)
        print 'valid accuracy is: ' + str(valid_acc)
        print 'current best valid prediction accuracy is: ' + str(best_valid_acc) + ' at epoch ' + str(best_valid_ep)
        print 'current best final prediction accuracy is: ' + str(final_acc) + ' at epoch ' + str(best_valid_ep)
        print 'current best test prediction accuracy is: ' + str(best_test_acc) + ' at epoch ' + str(best_test_ep)
        last_acc = test_acc
    # final_acc = last_acc
    return final_acc