Beispiel #1
0
def train(model, data, words, params):
    start_time = time.time()

    counter = 0
    try:
        for eidx in xrange(params.epochs):

            kf = utils.get_minibatches_idx(len(data), params.batchsize, shuffle=True)
            uidx = 0
            for _, train_index in kf:

                uidx += 1

                batch = [data[t] for t in train_index]
                for i in batch:
                    i[0].populate_embeddings(words)
                    i[1].populate_embeddings(words)

                (g1x, g1mask, g2x, g2mask, p1x, p1mask, p2x, p2mask) = getpairs(model, batch, params)

                cost = model.train_function(g1x, g2x, p1x, p2x, g1mask, g2mask, p1mask, p2mask)

                if np.isnan(cost) or np.isinf(cost):
                    print 'NaN detected'

                if (utils.checkIfQuarter(uidx, len(kf))):
                    if (params.save):
                        counter += 1
                        utils.saveParams(model, params.outfile + str(counter) + '.pickle')
                    if (params.evaluate):
                        evaluate_all(model, words)
                        sys.stdout.flush()

                #undo batch to save RAM
                for i in batch:
                    i[0].representation = None
                    i[1].representation = None
                    i[0].unpopulate_embeddings()
                    i[1].unpopulate_embeddings()

                #print 'Epoch ', (eidx+1), 'Update ', (uidx+1), 'Cost ', cost

            if (params.save):
                counter += 1
                utils.saveParams(model, params.outfile + str(counter) + '.pickle')

            if (params.evaluate):
                evaluate_all(model, words)

            print 'Epoch ', (eidx + 1), 'Cost ', cost

    except KeyboardInterrupt:
        print "Training interupted"

    end_time = time.time()
    print "total time:", (end_time - start_time)
Beispiel #2
0
def train(model, data, words, params):
    start_time = time.time()

    counter = 0
    try:
        for eidx in xrange(params.epochs):

            kf = utils.get_minibatches_idx(len(data), params.batchsize, shuffle=True)
            uidx = 0
            for _, train_index in kf:

                uidx += 1

                batch = [data[t] for t in train_index]
                for i in batch:
                    i[0].populate_embeddings(words)
                    i[1].populate_embeddings(words)

                (g1x, g1mask, g2x, g2mask, p1x, p1mask, p2x, p2mask) = getpairs(model, batch, params)

                cost = model.train_function(g1x, g2x, p1x, p2x, g1mask, g2mask, p1mask, p2mask)

                if np.isnan(cost) or np.isinf(cost):
                    print 'NaN detected'

                if (utils.checkIfQuarter(uidx, len(kf))):
                    if (params.save):
                        counter += 1
                        utils.saveParams(model, params.outfile + str(counter) + '.pickle')
                    if (params.evaluate):
                        evaluate_all(model, words)
                        sys.stdout.flush()

                # undo batch to save RAM
                for i in batch:
                    i[0].representation = None
                    i[1].representation = None
                    i[0].unpopulate_embeddings()
                    i[1].unpopulate_embeddings()

                    # print 'Epoch ', (eidx+1), 'Update ', (uidx+1), 'Cost ', cost

            if (params.save):
                counter += 1
                utils.saveParams(model, params.outfile + str(counter) + '.pickle')

            if (params.evaluate):
                evaluate_all(model, words)

            print 'Epoch ', (eidx + 1), 'Cost ', cost

    except KeyboardInterrupt:
        print "Training interupted"

    end_time = time.time()
    print "total time:", (end_time - start_time)
def train(model, data, words, params):
    start_time = time.time()
    counter = 0
    pre_ss = 0
    try:
        for eidx in xrange(params.epochs):
            #10 cross validation
            #nc = len(data)/10
            #test_nn = range((eidx%10)*nc, ((eidx%10)+1)*nc)
            #train_nn = list(set(xrange(len(data)))-set(test_nn))
            #test = [data[i] for i in test_nn]
            #data = [data[i] for i in train_nn]

            kf = utils.get_minibatches_idx(len(data),
                                           params.batchsize,
                                           shuffle=True)
            uidx = 0
            for _, train_index in kf:
                uidx += 1
                batch = [data[t] for t in train_index]
                for i in batch:
                    i[0].populate_embeddings(words)
                    i[1].populate_embeddings(words)

                (g1x, g2x, p1x, p2x) = utils.getpairs(model, batch, params)

                cost = model.train_function(g1x, g2x, p1x, p2x)

                if np.isnan(cost) or np.isinf(cost):
                    print 'NaN detected'

                #if (utils.checkIfQuarter(uidx, len(kf))):
                #    ss = utils.evaluate(model, words, test)
#    if (params.save) and ss > pre_ss:
#        counter += 1
#        utils.saveParams(model, params.outfile + '.pickle')
#	pre_ss = ss
#    sys.stdout.flush()

# undo batch to save RAM
                for i in batch:
                    i[0].representation = None
                    i[1].representation = None
                    i[0].unpopulate_embeddings()
                    i[1].unpopulate_embeddings()

                    # print 'Epoch ', (eidx+1), 'Update ', (uidx+1), 'Cost ', cost

            #ss = utils.evaluate(model, words, test)

#if (params.save) and ss > pre_ss:
#    counter += 1
#    utils.saveParams(model, params.outfile + '.pickle')
#    pre_ss = ss
            utils.saveParams(model, params.outfile + str(eidx) + '.pickle')

            print 'Epoch ', (eidx + 1), 'Cost ', cost

    except KeyboardInterrupt:
        print "Training interupted"

    end_time = time.time()
    print "total time:", (end_time - start_time)