def run(self, epochs=EPOCHS, learning_rate=LEARNING_RATE, regularization=REGULARIZATION, momentum=MOMENTUM, w2v_model=None):
        if w2v_model is not None:
            processor = NN_process.lengthTweetPairProcessor('../data/tweets/pairs/sets/tweet-pairs-train.txt',
                                                            '../data/tweets/pairs/sets/tweet-no-pairs-train.txt',
                                      '../data/wiki/model/docfreq.npy', w2v_model, WORDS, EMBEDDING_DIM, BATCH_SIZE, cutoff)
        else:
            processor = NN_process.lengthTweetPairProcessor('../data/tweets/pairs/sets/tweet-pairs-train.txt',
                                                            '../data/tweets/pairs/sets/tweet-no-pairs-train.txt',
                                      '../data/wiki/model/docfreq.npy', '../data/wiki/model/minimal', WORDS, EMBEDDING_DIM, BATCH_SIZE, cutoff)
        train_x1 = theano.shared(value=processor.x1, name='train_x1', borrow=False)
        train_x2 = theano.shared(value=processor.x2, name='train_x2', borrow=False)
        train_i1 = theano.shared(value=processor.indices1, name='train_i1', borrow=False)
        train_i2 = theano.shared(value=processor.indices2, name='train_i2', borrow=False)
        train_l1 = theano.shared(value=processor.l1, name='train_l1', borrow=False)
        train_l2 = theano.shared(value=processor.l2, name='train_l2', borrow=False)
        train_y = theano.shared(value=processor.y, name='train_y', borrow=False)
        train_z = theano.shared(value=processor.z, name='train_z', borrow=False)

        print 'Initializing train function...'
        train = self.train_function_momentum(train_x1, train_x2, train_i1, train_i2, train_l1, train_l2, train_y, train_z)

        print 'Cost factor: ' + str(COST_FACTOR)
        c = []

        t = Thread(target=processor.process)
        t.daemon = True
        t.start()

        def signal_handler(signal, frame):
            import os
            os._exit(0)
        signal.signal(signal.SIGINT, signal_handler)

        best_cost = float('inf')
        best_weights = None
        previous_best_cost = float('inf')
        second_time = False
        global LEARNING_RATE

        for e in xrange(epochs):
            processor.new_epoch()

            processor.lock.acquire()
            while not processor.ready:
                processor.lock.wait()
            processor.lock.release()

            train_x1.set_value(processor.x1, borrow=False)
            train_x2.set_value(processor.x2, borrow=False)
            train_i1.set_value(processor.indices1, borrow=False)
            train_i2.set_value(processor.indices2, borrow=False)
            train_l1.set_value(processor.l1, borrow=False)
            train_l2.set_value(processor.l2, borrow=False)
            train_y.set_value(processor.y, borrow=False)
            train_z.set_value(processor.z, borrow=False)

            processor.lock.acquire()
            processor.cont = True
            processor.ready = False
            processor.lock.notifyAll()
            processor.lock.release()

            for b in xrange(BATCHES):
                c = []
                cost = train(lr=learning_rate, reg=regularization, mom=momentum)
                c.append(cost)

                processor.lock.acquire()
                while not processor.ready:
                    processor.lock.wait()
                processor.lock.release()

                # print 'Training, batch %d (from %d), cost %.5f' % (b, BATCHES, cost)
                # we = self.model1.W.get_value()
                # print we

                train_x1.set_value(processor.x1, borrow=False)
                train_x2.set_value(processor.x2, borrow=False)
                train_i1.set_value(processor.indices1, borrow=False)
                train_i2.set_value(processor.indices2, borrow=False)
                train_l1.set_value(processor.l1, borrow=False)
                train_l2.set_value(processor.l2, borrow=False)
                train_y.set_value(processor.y, borrow=False)
                train_z.set_value(processor.z, borrow=False)

                processor.lock.acquire()
                if b < BATCHES-2:
                    processor.cont = True
                    processor.ready = False
                if b == BATCHES-1 and e == epochs-1:
                    processor.stop = True
                    processor.cont = True
                processor.lock.notifyAll()
                processor.lock.release()

            print 'Training, factor %d, lr %.5f, epoch %d, cost %.5f' % (int(COST_FACTOR), LEARNING_RATE, e, numpy.mean(c))
            we = self.model1.W.get_value()
            print repr(we)

            # UNCOMMENT WHEN THIS PIECE OF CODE IS CALLED EXTERNALLY
            # if numpy.mean(c) < best_cost - 0.0005:
            #     previous_best_cost = best_cost
            #     best_cost = numpy.mean(c)
            #     best_weights = we
            # elif second_time:
            #     processor.lock.acquire()
            #     processor.stop = True
            #     processor.cont = True
            #     processor.lock.notifyAll()
            #     processor.lock.release()
            #     break
            # else:
            #     best_cost = previous_best_cost
            #     LEARNING_RATE = 0.001
            #     second_time = True

        t.join()

        print "Best weights:"
        print repr(best_weights)
        return best_weights
    def run(self, epochs=EPOCHS, learning_rate=LEARNING_RATE, regularization=REGULARIZATION, momentum=MOMENTUM):
        processor = NN_process.lengthLinTweetPairProcessor('../data/tweets/pairs/sets/tweet-pairs-train.txt',
                                                        '../data/tweets/pairs/sets/tweet-no-pairs-train.txt',
                                  '../data/wiki/model/docfreq.npy', '../data/wiki/model/minimal', WORDS, EMBEDDING_DIM, BATCH_SIZE)
        train_x1 = theano.shared(value=processor.x1, name='train_x1', borrow=False)
        train_x2 = theano.shared(value=processor.x2, name='train_x2', borrow=False)
        train_i1 = theano.shared(value=processor.indices1, name='train_i1', borrow=False)
        train_i2 = theano.shared(value=processor.indices2, name='train_i2', borrow=False)
        train_l1 = theano.shared(value=processor.l1, name='train_l1', borrow=False)
        train_l2 = theano.shared(value=processor.l2, name='train_l2', borrow=False)
        train_y = theano.shared(value=processor.y, name='train_y', borrow=False)
        train_z = theano.shared(value=processor.z, name='train_z', borrow=False)

        print 'Initializing train function...'
        train = self.train_function_momentum(train_x1, train_x2, train_i1, train_i2, train_l1, train_l2, train_y, train_z)

        c = []

        t = Thread(target=processor.process)
        t.daemon = True
        t.start()

        def signal_handler(signal, frame):
            import os
            os._exit(0)
        signal.signal(signal.SIGINT, signal_handler)

        best_cost = float('inf')
        best_weights = None

        for e in xrange(epochs):
            processor.new_epoch()

            processor.lock.acquire()
            while not processor.ready:
                processor.lock.wait()
            processor.lock.release()

            train_x1.set_value(processor.x1, borrow=False)
            train_x2.set_value(processor.x2, borrow=False)
            train_i1.set_value(processor.indices1, borrow=False)
            train_i2.set_value(processor.indices2, borrow=False)
            train_l1.set_value(processor.l1, borrow=False)
            train_l2.set_value(processor.l2, borrow=False)
            train_y.set_value(processor.y, borrow=False)
            train_z.set_value(processor.z, borrow=False)

            processor.lock.acquire()
            processor.cont = True
            processor.ready = False
            processor.lock.notifyAll()
            processor.lock.release()

            for b in xrange(BATCHES):
                c = []
                cost = train(lr=learning_rate, reg=regularization, mom=momentum)
                c.append(cost)

                processor.lock.acquire()
                while not processor.ready:
                    processor.lock.wait()
                processor.lock.release()

                # print 'Training, batch %d (from %d), cost %.5f' % (b, BATCHES, cost)
                # we = self.model1.W.get_value()
                # print we

                train_x1.set_value(processor.x1, borrow=False)
                train_x2.set_value(processor.x2, borrow=False)
                train_i1.set_value(processor.indices1, borrow=False)
                train_i2.set_value(processor.indices2, borrow=False)
                train_l1.set_value(processor.l1, borrow=False)
                train_l2.set_value(processor.l2, borrow=False)
                train_y.set_value(processor.y, borrow=False)
                train_z.set_value(processor.z, borrow=False)

                processor.lock.acquire()
                if b < BATCHES-2:
                    processor.cont = True
                    processor.ready = False
                if b == BATCHES-1 and e == epochs-1:
                    processor.stop = True
                    processor.cont = True
                processor.lock.notifyAll()
                processor.lock.release()

            print 'Training, epoch %d, cost %.5f' % (e, numpy.mean(c))
            we = self.model1.W.get_value()
            print repr(we)

            # if numpy.mean(c) < best_cost - 0.0001:
            #     best_cost = numpy.mean(c)
            #     best_weights = we
            # else:
            #     processor.lock.acquire()
            #     processor.stop = True
            #     processor.cont = True
            #     processor.lock.notifyAll()
            #     processor.lock.release()
            #     break

        t.join()
        return best_weights
Example #3
0
    def run(self,
            epochs=EPOCHS,
            learning_rate=LEARNING_RATE,
            regularization=REGULARIZATION,
            momentum=MOMENTUM):
        processor = NN_process.lengthTweetPairProcessor(
            '../data/tweets/pairs/sets/tweet-pairs-train.txt',
            '../data/tweets/pairs/sets/tweet-no-pairs-train.txt',
            '../data/wiki/model/docfreq.npy', '../data/wiki/model/minimal',
            WORDS, EMBEDDING_DIM, BATCH_SIZE, cutoff)
        train_x1 = theano.shared(value=processor.x1,
                                 name='train_x1',
                                 borrow=False)
        train_x2 = theano.shared(value=processor.x2,
                                 name='train_x2',
                                 borrow=False)
        train_i1 = theano.shared(value=processor.indices1,
                                 name='train_i1',
                                 borrow=False)
        train_i2 = theano.shared(value=processor.indices2,
                                 name='train_i2',
                                 borrow=False)
        train_l1 = theano.shared(value=processor.l1,
                                 name='train_l1',
                                 borrow=False)
        train_l2 = theano.shared(value=processor.l2,
                                 name='train_l2',
                                 borrow=False)
        train_y = theano.shared(value=processor.y,
                                name='train_y',
                                borrow=False)
        train_z = theano.shared(value=processor.z,
                                name='train_z',
                                borrow=False)

        print 'Initializing train function...'
        train = self.train_function_momentum(train_x1, train_x2, train_i1,
                                             train_i2, train_l1, train_l2,
                                             train_y, train_z)

        lin = numpy.linspace(0, float(WORDS) - 1.0, float(WORDS))
        c = []

        t = Thread(target=processor.process)
        t.daemon = True
        t.start()

        def signal_handler(signal, frame):
            import os
            os._exit(0)

        signal.signal(signal.SIGINT, signal_handler)

        for e in xrange(epochs):
            processor.new_epoch()

            processor.lock.acquire()
            while not processor.ready:
                processor.lock.wait()
            processor.lock.release()

            train_x1.set_value(processor.x1, borrow=False)
            train_x2.set_value(processor.x2, borrow=False)
            train_i1.set_value(processor.indices1, borrow=False)
            train_i2.set_value(processor.indices2, borrow=False)
            train_l1.set_value(processor.l1, borrow=False)
            train_l2.set_value(processor.l2, borrow=False)
            train_y.set_value(processor.y, borrow=False)
            train_z.set_value(processor.z, borrow=False)

            processor.lock.acquire()
            processor.cont = True
            processor.ready = False
            processor.lock.notifyAll()
            processor.lock.release()

            for b in xrange(BATCHES):
                c = []
                cost = train(lr=learning_rate,
                             reg=regularization,
                             mom=momentum)
                c.append(cost)

                processor.lock.acquire()
                while not processor.ready:
                    processor.lock.wait()
                processor.lock.release()

                # print 'Training, batch %d (from %d), cost %.5f' % (b, BATCHES, cost)
                # we = self.model1.W.get_value()
                # s = numpy.log(1 + numpy.exp(we)) + 1.0
                # print we
                # print s
                # temp = numpy.log(1.0 - 1.0/s[0]) / (WORDS - 1.0)
                # f = s[0] * numpy.exp(temp * lin) + 1 - s[0]
                # print f

                train_x1.set_value(processor.x1, borrow=False)
                train_x2.set_value(processor.x2, borrow=False)
                train_i1.set_value(processor.indices1, borrow=False)
                train_i2.set_value(processor.indices2, borrow=False)
                train_l1.set_value(processor.l1, borrow=False)
                train_l2.set_value(processor.l2, borrow=False)
                train_y.set_value(processor.y, borrow=False)
                train_z.set_value(processor.z, borrow=False)

                processor.lock.acquire()
                if b < BATCHES - 2:
                    processor.cont = True
                    processor.ready = False
                if b == BATCHES - 1 and e == epochs - 1:
                    processor.stop = True
                processor.lock.notifyAll()
                processor.lock.release()

            print 'Training, epoch %d, cost %.5f' % (e, numpy.mean(c))
            we = self.model1.W.get_value()
            s = numpy.log(1 + numpy.exp(we)) + 1.0
            print we
            print s
            temp = numpy.log(1.0 - 1.0 / s[0]) / (WORDS - 1.0)
            f = s[0] * numpy.exp(temp * lin) + 1 - s[0]
            print f

        t.join()
        self.save_me('run7.npy')
    def run(self, epochs=EPOCHS, learning_rate=LEARNING_RATE, regularization=REGULARIZATION, momentum=MOMENTUM):
        processor = NN_process.lengthPairProcessor('../data/tweets/pairs/sets/tweet-pairs-train.txt',
                                             '../data/tweets/pairs/sets/tweet-no-pairs-train.txt',
                                  '../data/wiki/model/docfreq.npy', '../data/wiki/model/minimal', WORDS, EMBEDDING_DIM, BATCH_SIZE)
        train_x1 = theano.shared(value=processor.x1, name='train_x1', borrow=False)
        train_x2 = theano.shared(value=processor.x2, name='train_x2', borrow=False)
        train_l1 = theano.shared(value=processor.l1, name='train_l1', borrow=False)
        train_l2 = theano.shared(value=processor.l2, name='train_l2', borrow=False)
        train_y = theano.shared(value=processor.y, name='train_y', borrow=False)
        train_z = theano.shared(value=processor.z, name='train_z', borrow=False)

        print 'Initializing train function...'
        train = self.train_function_momentum(train_x1, train_x2, train_l1, train_l2, train_y, train_z)

        lin = numpy.linspace(0, float(WORDS) - 1.0, float(WORDS))
        c = []

        t = Thread(target=processor.process)
        t.start()
        for e in xrange(epochs):
            processor.new_epoch()

            processor.lock.acquire()
            while not processor.ready:
                processor.lock.wait()
            processor.lock.release()

            train_x1.set_value(processor.x1, borrow=False)
            train_x2.set_value(processor.x2, borrow=False)
            train_l1.set_value(processor.l1, borrow=False)
            train_l2.set_value(processor.l2, borrow=False)
            train_y.set_value(processor.y, borrow=False)
            train_z.set_value(processor.z, borrow=False)

            processor.lock.acquire()
            processor.cont = True
            processor.ready = False
            processor.lock.notifyAll()
            processor.lock.release()

            for b in xrange(BATCHES):
                c = []
                cost = train(lr=learning_rate, reg=regularization, mom=momentum)
                c.append(cost)

                processor.lock.acquire()
                while not processor.ready:
                    processor.lock.wait()
                processor.lock.release()

                train_x1.set_value(processor.x1, borrow=False)
                train_x2.set_value(processor.x2, borrow=False)
                train_l1.set_value(processor.l1, borrow=False)
                train_l2.set_value(processor.l2, borrow=False)
                train_y.set_value(processor.y, borrow=False)
                train_z.set_value(processor.z, borrow=False)

                processor.lock.acquire()
                if b < BATCHES-2:
                    processor.cont = True
                    processor.ready = False
                if b == BATCHES-1 and e == epochs-1:
                    processor.stop = True
                processor.lock.notifyAll()
                processor.lock.release()

            print 'Training, epoch %d, cost %.5f' % (e, numpy.mean(c))
            we = self.model1.W.get_value()
            print we
            f = we[0] * (lin ** 4) + we[1] * (lin ** 3) + we[2] * (lin ** 2) + we[3] * lin + 1.0
            print f

        self.save_me('run5.npy')
    def run(self, epochs=EPOCHS, learning_rate=LEARNING_RATE, regularization=REGULARIZATION, momentum=MOMENTUM):
        processor = NN_process.lengthTweetPairProcessor(
            "../data/tweets/pairs/sets/tweet-pairs-train.txt",
            "../data/tweets/pairs/sets/tweet-no-pairs-train.txt",
            "../data/wiki/model/docfreq.npy",
            "../data/wiki/model/minimal",
            WORDS,
            EMBEDDING_DIM,
            BATCH_SIZE,
            cutoff,
        )
        train_x1 = theano.shared(value=processor.x1, name="train_x1", borrow=False)
        train_x2 = theano.shared(value=processor.x2, name="train_x2", borrow=False)
        train_i1 = theano.shared(value=processor.indices1, name="train_i1", borrow=False)
        train_i2 = theano.shared(value=processor.indices2, name="train_i2", borrow=False)
        train_l1 = theano.shared(value=processor.l1, name="train_l1", borrow=False)
        train_l2 = theano.shared(value=processor.l2, name="train_l2", borrow=False)
        train_y = theano.shared(value=processor.y, name="train_y", borrow=False)
        train_z = theano.shared(value=processor.z, name="train_z", borrow=False)

        print "Initializing train function..."
        train = self.train_function_momentum(
            train_x1, train_x2, train_i1, train_i2, train_l1, train_l2, train_y, train_z
        )

        lin = numpy.linspace(0, float(WORDS) - 1.0, float(WORDS))
        c = []

        t = Thread(target=processor.process)
        t.daemon = True
        t.start()

        def signal_handler(signal, frame):
            import os

            os._exit(0)

        signal.signal(signal.SIGINT, signal_handler)

        for e in xrange(epochs):
            processor.new_epoch()

            processor.lock.acquire()
            while not processor.ready:
                processor.lock.wait()
            processor.lock.release()

            train_x1.set_value(processor.x1, borrow=False)
            train_x2.set_value(processor.x2, borrow=False)
            train_i1.set_value(processor.indices1, borrow=False)
            train_i2.set_value(processor.indices2, borrow=False)
            train_l1.set_value(processor.l1, borrow=False)
            train_l2.set_value(processor.l2, borrow=False)
            train_y.set_value(processor.y, borrow=False)
            train_z.set_value(processor.z, borrow=False)

            processor.lock.acquire()
            processor.cont = True
            processor.ready = False
            processor.lock.notifyAll()
            processor.lock.release()

            for b in xrange(BATCHES):
                c = []
                cost = train(lr=learning_rate, reg=regularization, mom=momentum)
                c.append(cost)

                processor.lock.acquire()
                while not processor.ready:
                    processor.lock.wait()
                processor.lock.release()

                # print 'Training, batch %d (from %d), cost %.5f' % (b, BATCHES, cost)
                # we = self.model1.W.get_value()
                # s = numpy.log(1 + numpy.exp(we)) + 1.0
                # print we
                # print s
                # temp = numpy.log(1.0 - 1.0/s[0]) / (WORDS - 1.0)
                # f = s[0] * numpy.exp(temp * lin) + 1 - s[0]
                # print f

                train_x1.set_value(processor.x1, borrow=False)
                train_x2.set_value(processor.x2, borrow=False)
                train_i1.set_value(processor.indices1, borrow=False)
                train_i2.set_value(processor.indices2, borrow=False)
                train_l1.set_value(processor.l1, borrow=False)
                train_l2.set_value(processor.l2, borrow=False)
                train_y.set_value(processor.y, borrow=False)
                train_z.set_value(processor.z, borrow=False)

                processor.lock.acquire()
                if b < BATCHES - 2:
                    processor.cont = True
                    processor.ready = False
                if b == BATCHES - 1 and e == epochs - 1:
                    processor.stop = True
                processor.lock.notifyAll()
                processor.lock.release()

            print "Training, epoch %d, cost %.5f" % (e, numpy.mean(c))
            we = self.model1.W.get_value()
            s = numpy.log(1 + numpy.exp(we)) + 1.0
            print we
            print s
            temp = numpy.log(1.0 - 1.0 / s[0]) / (WORDS - 1.0)
            f = s[0] * numpy.exp(temp * lin) + 1 - s[0]
            print f

        t.join()
        self.save_me("run7.npy")
    def run(self, epochs=1, learning_rate=1.5, regularization=0.0, momentum=0.1):
        processor = NN_process.unsortedPairProcessor('../data/pairs/sets/enwiki_pairs_20-train.txt', '../data/pairs/sets/enwiki_no_pairs_20-train.txt',
                                  '../data/model/docfreq.npy', '../data/model/minimal', WORDS, EMBEDDING_DIM, BATCH_SIZE)
        train_x1 = theano.shared(value=processor.x1, name='train_x1', borrow=False)
        train_x2 = theano.shared(value=processor.x2, name='train_x2', borrow=False)
        train_y = theano.shared(value=processor.y, name='train_y', borrow=False)
        train_z = theano.shared(value=processor.z, name='train_z', borrow=False)

        print 'Initializing train function...'
        train = self.train_function_momentum(train_x1, train_x2, train_y, train_z)

        t = Thread(target=processor.process)
        t.start()
        for e in xrange(epochs):
            processor.new_epoch()

            processor.lock.acquire()
            while not processor.ready:
                processor.lock.wait()
            processor.lock.release()

            train_x1.set_value(processor.x1, borrow=False)
            train_x2.set_value(processor.x2, borrow=False)
            train_y.set_value(processor.y, borrow=False)
            train_z.set_value(processor.z, borrow=False)

            processor.lock.acquire()
            processor.cont = True
            processor.ready = False
            processor.lock.notifyAll()
            processor.lock.release()

            for b in xrange(BATCHES):
                #c = []
                cost = train(lr=learning_rate, reg=regularization, mom=momentum)
                #c.append(cost)

                print 'Training, batch %d, cost %.5f' % (b, cost)
                print numpy.transpose(self.model1.W.get_value())

                processor.lock.acquire()
                while not processor.ready:
                    processor.lock.wait()
                processor.lock.release()

                train_x1.set_value(processor.x1, borrow=False)
                train_x2.set_value(processor.x2, borrow=False)
                train_y.set_value(processor.y, borrow=False)
                train_z.set_value(processor.z, borrow=False)

                processor.lock.acquire()
                processor.cont = True
                processor.ready = False
                if b == BATCHES-1 and e == epochs-1:
                    processor.stop = True
                processor.lock.notifyAll()
                processor.lock.release()

            #print 'Training, epoch %d, cost %.5f' % (e, numpy.mean(c))

        self.save_me('run2.npy')
import NN_process
from threading import Thread
import time
import copy

NO_DATA = 4900000
BATCH_SIZE = 100
BATCHES = NO_DATA / BATCH_SIZE
EMBEDDING_DIM = 400
WORDS = 20
INPUT_SHAPE = (400, 20)
OUTPUT_SHAPE = (400, 1)

processor = NN_process.PairProcessor('../data/pairs/enwiki_pairs_20.txt',
                                     '../data/pairs/enwiki_no_pairs_20.txt',
                                     '../data/model/docfreq.npy',
                                     '../data/model/minimal', WORDS,
                                     EMBEDDING_DIM, BATCH_SIZE)
t = Thread(target=processor.process)
t.start()
print 'Start processor thread'

processor.new_epoch()
processor.lock.acquire()
while not processor.ready:
    processor.lock.wait()
processor.lock.release()

processor.lock.acquire()
processor.cont = True
processor.ready = False
    def run(self, epochs=EPOCHS, learning_rate=LEARNING_RATE, regularization=REGULARIZATION, momentum=MOMENTUM):
        processor = NN_process.PairProcessor('../data/wiki/pairs/sets/enwiki_pairs_' + str(WORDS) + '-train.txt',
                                             '../data/wiki/pairs/sets/enwiki_no_pairs_' + str(WORDS) + '-train.txt',
                                  '../data/wiki/model/docfreq.npy', '../data/wiki/model/minimal', WORDS, EMBEDDING_DIM, BATCH_SIZE)
        train_x1 = theano.shared(value=processor.x1, name='train_x1', borrow=False)
        train_x2 = theano.shared(value=processor.x2, name='train_x2', borrow=False)
        train_y = theano.shared(value=processor.y, name='train_y', borrow=False)
        train_z = theano.shared(value=processor.z, name='train_z', borrow=False)

        print 'Initializing train function...'
        train = self.train_function_momentum(train_x1, train_x2, train_y, train_z)

        t = Thread(target=processor.process)
        t.daemon = True
        t.start()

        import signal
        def signal_handler(signal, frame):
            import os
            os._exit(0)
        signal.signal(signal.SIGINT, signal_handler)

        for e in xrange(epochs):
            processor.new_epoch()

            processor.lock.acquire()
            while not processor.ready:
                processor.lock.wait()
            processor.lock.release()

            train_x1.set_value(processor.x1, borrow=False)
            train_x2.set_value(processor.x2, borrow=False)
            train_y.set_value(processor.y, borrow=False)
            train_z.set_value(processor.z, borrow=False)

            processor.lock.acquire()
            processor.cont = True
            processor.ready = False
            processor.lock.notifyAll()
            processor.lock.release()

            for b in xrange(BATCHES):
                #c = []
                cost = train(lr=learning_rate, reg=regularization, mom=momentum)
                #c.append(cost)

                print 'Training, batch %d, cost %.5f' % (b, cost)
                print repr(self.model1.W.get_value())

                processor.lock.acquire()
                while not processor.ready:
                    processor.lock.wait()
                processor.lock.release()

                train_x1.set_value(processor.x1, borrow=False)
                train_x2.set_value(processor.x2, borrow=False)
                train_y.set_value(processor.y, borrow=False)
                train_z.set_value(processor.z, borrow=False)

                processor.lock.acquire()
                if b < BATCHES-2:
                    processor.cont = True
                    processor.ready = False
                if b == BATCHES-1 and e == epochs-1:
                    processor.stop = True
                    processor.cont = True
                processor.lock.notifyAll()
                processor.lock.release()

            #print 'Training, epoch %d, cost %.5f' % (e, numpy.mean(c))

        t.join()
        self.save_me('run1.npy')
    def run(self, epochs=EPOCHS, learning_rate=LEARNING_RATE, regularization=REGULARIZATION, momentum=MOMENTUM, w2v_model=None):
        processor = NN_process.lengthTweetPairProcessor('../data/wiki/pairs/sets/enwiki_pairs_' + 'r' + '-train.txt',
                                             '../data/wiki/pairs/sets/enwiki_no_pairs_' + 'r' + '-train.txt',
                                      '../data/wiki/model/docfreq.npy', '../data/wiki/model/minimal', WORDS, EMBEDDING_DIM, BATCH_SIZE, cutoff)
        train_x1 = theano.shared(value=processor.x1, name='train_x1', borrow=False)
        train_x2 = theano.shared(value=processor.x2, name='train_x2', borrow=False)
        train_i1 = theano.shared(value=processor.indices1, name='train_i1', borrow=False)
        train_i2 = theano.shared(value=processor.indices2, name='train_i2', borrow=False)
        train_l1 = theano.shared(value=processor.l1, name='train_l1', borrow=False)
        train_l2 = theano.shared(value=processor.l2, name='train_l2', borrow=False)
        train_y = theano.shared(value=processor.y, name='train_y', borrow=False)
        train_z = theano.shared(value=processor.z, name='train_z', borrow=False)

        print 'Initializing train function...'
        train = self.train_function_momentum(train_x1, train_x2, train_i1, train_i2, train_l1, train_l2, train_y, train_z)

        print 'Cost factor: ' + str(COST_FACTOR)

        t = Thread(target=processor.process)
        t.daemon = True
        t.start()

        def signal_handler(signal, frame):
            import os
            os._exit(0)
        signal.signal(signal.SIGINT, signal_handler)

        best_cost = float('inf')
        best_weights = None
        previous_best_cost = float('inf')
        second_time = False
        global LEARNING_RATE

        for e in xrange(epochs):
            processor.new_epoch()

            processor.lock.acquire()
            while not processor.ready:
                processor.lock.wait()
            processor.lock.release()

            train_x1.set_value(processor.x1, borrow=False)
            train_x2.set_value(processor.x2, borrow=False)
            train_i1.set_value(processor.indices1, borrow=False)
            train_i2.set_value(processor.indices2, borrow=False)
            train_l1.set_value(processor.l1, borrow=False)
            train_l2.set_value(processor.l2, borrow=False)
            train_y.set_value(processor.y, borrow=False)
            train_z.set_value(processor.z, borrow=False)

            processor.lock.acquire()
            processor.cont = True
            processor.ready = False
            processor.lock.notifyAll()
            processor.lock.release()

            for b in xrange(BATCHES):
                cost = train(lr=learning_rate, reg=regularization, mom=momentum)

                processor.lock.acquire()
                while not processor.ready:
                    processor.lock.wait()
                processor.lock.release()

                print 'Training, batch %d (from %d), cost %.5f' % (b, BATCHES, cost)
                we = self.model1.W.get_value()
                print repr(we)

                train_x1.set_value(processor.x1, borrow=False)
                train_x2.set_value(processor.x2, borrow=False)
                train_i1.set_value(processor.indices1, borrow=False)
                train_i2.set_value(processor.indices2, borrow=False)
                train_l1.set_value(processor.l1, borrow=False)
                train_l2.set_value(processor.l2, borrow=False)
                train_y.set_value(processor.y, borrow=False)
                train_z.set_value(processor.z, borrow=False)

                processor.lock.acquire()
                if b < BATCHES-2:
                    processor.cont = True
                    processor.ready = False
                if b == BATCHES-1 and e == epochs-1:
                    processor.stop = True
                    processor.cont = True
                processor.lock.notifyAll()
                processor.lock.release()

            #print 'Training, factor %d, lr %.5f, epoch %d, cost %.5f' % (int(COST_FACTOR), LEARNING_RATE, e, numpy.mean(c))
            #we = self.model1.W.get_value()
            #print repr(we)

            # UNCOMMENT WHEN THIS PIECE OF CODE IS CALLED EXTERNALLY
            # if numpy.mean(c) < best_cost - 0.0005:
            #     previous_best_cost = best_cost
            #     best_cost = numpy.mean(c)
            #     best_weights = we
            # elif second_time:
            #     processor.lock.acquire()
            #     processor.stop = True
            #     processor.cont = True
            #     processor.lock.notifyAll()
            #     processor.lock.release()
            #     break
            # else:
            #     best_cost = previous_best_cost
            #     LEARNING_RATE = 0.001
            #     second_time = True

        t.join()
    def run(self,
            epochs=EPOCHS,
            learning_rate=LEARNING_RATE,
            regularization=REGULARIZATION,
            momentum=MOMENTUM):
        processor = NN_process.lengthPairProcessor(
            '../data/tweets/pairs/sets/tweet-pairs-train.txt',
            '../data/tweets/pairs/sets/tweet-no-pairs-train.txt',
            '../data/wiki/model/docfreq.npy', '../data/wiki/model/minimal',
            WORDS, EMBEDDING_DIM, BATCH_SIZE)
        train_x1 = theano.shared(value=processor.x1,
                                 name='train_x1',
                                 borrow=False)
        train_x2 = theano.shared(value=processor.x2,
                                 name='train_x2',
                                 borrow=False)
        train_l1 = theano.shared(value=processor.l1,
                                 name='train_l1',
                                 borrow=False)
        train_l2 = theano.shared(value=processor.l2,
                                 name='train_l2',
                                 borrow=False)
        train_y = theano.shared(value=processor.y,
                                name='train_y',
                                borrow=False)
        train_z = theano.shared(value=processor.z,
                                name='train_z',
                                borrow=False)

        print 'Initializing train function...'
        train = self.train_function_momentum(train_x1, train_x2, train_l1,
                                             train_l2, train_y, train_z)

        lin = numpy.linspace(0, float(WORDS) - 1.0, float(WORDS))
        c = []

        t = Thread(target=processor.process)
        t.start()
        for e in xrange(epochs):
            processor.new_epoch()

            processor.lock.acquire()
            while not processor.ready:
                processor.lock.wait()
            processor.lock.release()

            train_x1.set_value(processor.x1, borrow=False)
            train_x2.set_value(processor.x2, borrow=False)
            train_l1.set_value(processor.l1, borrow=False)
            train_l2.set_value(processor.l2, borrow=False)
            train_y.set_value(processor.y, borrow=False)
            train_z.set_value(processor.z, borrow=False)

            processor.lock.acquire()
            processor.cont = True
            processor.ready = False
            processor.lock.notifyAll()
            processor.lock.release()

            for b in xrange(BATCHES):
                c = []
                cost = train(lr=learning_rate,
                             reg=regularization,
                             mom=momentum)
                c.append(cost)

                processor.lock.acquire()
                while not processor.ready:
                    processor.lock.wait()
                processor.lock.release()

                # print 'Training, batch %d (from %d), cost %.5f' % (b, BATCHES, cost)
                # we = self.model1.W.get_value()
                # print we
                # temp = math.log(1 - math.exp(-we[0])) / (WORDS - 1.0)
                # f = numpy.exp(we[0] + lin * temp) + (1 - math.exp(we[0]))
                # print f

                train_x1.set_value(processor.x1, borrow=False)
                train_x2.set_value(processor.x2, borrow=False)
                train_l1.set_value(processor.l1, borrow=False)
                train_l2.set_value(processor.l2, borrow=False)
                train_y.set_value(processor.y, borrow=False)
                train_z.set_value(processor.z, borrow=False)

                processor.lock.acquire()
                if b < BATCHES - 2:
                    processor.cont = True
                    processor.ready = False
                if b == BATCHES - 1 and e == epochs - 1:
                    processor.stop = True
                processor.lock.notifyAll()
                processor.lock.release()

            print 'Training, epoch %d, cost %.5f' % (e, numpy.mean(c))
            we = self.model1.W.get_value()
            s = 1.0 / (1.0 + numpy.exp(-we))
            print we
            print s
            temp = math.log(1 - math.exp(-s[0])) / (WORDS - 1.0)
            f = numpy.exp(s[0] + lin * temp) + (1 - math.exp(s[0]))
            print f

        t.join()
        self.save_me('run6.npy')
    def run(self,
            epochs=EPOCHS,
            learning_rate=LEARNING_RATE,
            regularization=REGULARIZATION,
            momentum=MOMENTUM):
        processor = NN_process.lengthLinTweetPairProcessor(
            '../data/tweets/pairs/sets/tweet-pairs-train.txt',
            '../data/tweets/pairs/sets/tweet-no-pairs-train.txt',
            '../data/wiki/model/docfreq.npy', '../data/wiki/model/minimal',
            WORDS, EMBEDDING_DIM, BATCH_SIZE)
        train_x1 = theano.shared(value=processor.x1,
                                 name='train_x1',
                                 borrow=False)
        train_x2 = theano.shared(value=processor.x2,
                                 name='train_x2',
                                 borrow=False)
        train_i1 = theano.shared(value=processor.indices1,
                                 name='train_i1',
                                 borrow=False)
        train_i2 = theano.shared(value=processor.indices2,
                                 name='train_i2',
                                 borrow=False)
        train_l1 = theano.shared(value=processor.l1,
                                 name='train_l1',
                                 borrow=False)
        train_l2 = theano.shared(value=processor.l2,
                                 name='train_l2',
                                 borrow=False)
        train_y = theano.shared(value=processor.y,
                                name='train_y',
                                borrow=False)
        train_z = theano.shared(value=processor.z,
                                name='train_z',
                                borrow=False)

        print 'Initializing train function...'
        train = self.train_function_momentum(train_x1, train_x2, train_i1,
                                             train_i2, train_l1, train_l2,
                                             train_y, train_z)

        c = []

        t = Thread(target=processor.process)
        t.daemon = True
        t.start()

        def signal_handler(signal, frame):
            import os
            os._exit(0)

        signal.signal(signal.SIGINT, signal_handler)

        best_cost = float('inf')
        best_weights = None

        for e in xrange(epochs):
            processor.new_epoch()

            processor.lock.acquire()
            while not processor.ready:
                processor.lock.wait()
            processor.lock.release()

            train_x1.set_value(processor.x1, borrow=False)
            train_x2.set_value(processor.x2, borrow=False)
            train_i1.set_value(processor.indices1, borrow=False)
            train_i2.set_value(processor.indices2, borrow=False)
            train_l1.set_value(processor.l1, borrow=False)
            train_l2.set_value(processor.l2, borrow=False)
            train_y.set_value(processor.y, borrow=False)
            train_z.set_value(processor.z, borrow=False)

            processor.lock.acquire()
            processor.cont = True
            processor.ready = False
            processor.lock.notifyAll()
            processor.lock.release()

            for b in xrange(BATCHES):
                c = []
                cost = train(lr=learning_rate,
                             reg=regularization,
                             mom=momentum)
                c.append(cost)

                processor.lock.acquire()
                while not processor.ready:
                    processor.lock.wait()
                processor.lock.release()

                # print 'Training, batch %d (from %d), cost %.5f' % (b, BATCHES, cost)
                # we = self.model1.W.get_value()
                # print we

                train_x1.set_value(processor.x1, borrow=False)
                train_x2.set_value(processor.x2, borrow=False)
                train_i1.set_value(processor.indices1, borrow=False)
                train_i2.set_value(processor.indices2, borrow=False)
                train_l1.set_value(processor.l1, borrow=False)
                train_l2.set_value(processor.l2, borrow=False)
                train_y.set_value(processor.y, borrow=False)
                train_z.set_value(processor.z, borrow=False)

                processor.lock.acquire()
                if b < BATCHES - 2:
                    processor.cont = True
                    processor.ready = False
                if b == BATCHES - 1 and e == epochs - 1:
                    processor.stop = True
                    processor.cont = True
                processor.lock.notifyAll()
                processor.lock.release()

            print 'Training, epoch %d, cost %.5f' % (e, numpy.mean(c))
            we = self.model1.W.get_value()
            print repr(we)

            # if numpy.mean(c) < best_cost - 0.0001:
            #     best_cost = numpy.mean(c)
            #     best_weights = we
            # else:
            #     processor.lock.acquire()
            #     processor.stop = True
            #     processor.cont = True
            #     processor.lock.notifyAll()
            #     processor.lock.release()
            #     break

        t.join()
        return best_weights
    def run(self,
            epochs=EPOCHS,
            learning_rate=LEARNING_RATE,
            regularization=REGULARIZATION,
            momentum=MOMENTUM):
        processor = NN_process.lengthPairProcessor(
            '../data/pairs/sets/enwiki_pairs_' + WORDS_FILE + '-train.txt',
            '../data/pairs/sets/enwiki_no_pairs_' + WORDS_FILE + '-train.txt',
            '../data/model/docfreq.npy', '../data/model/minimal', WORDS,
            EMBEDDING_DIM, BATCH_SIZE)
        train_x1 = theano.shared(value=processor.x1,
                                 name='train_x1',
                                 borrow=False)
        train_x2 = theano.shared(value=processor.x2,
                                 name='train_x2',
                                 borrow=False)
        train_l1 = theano.shared(value=processor.l1,
                                 name='train_l1',
                                 borrow=False)
        train_l2 = theano.shared(value=processor.l2,
                                 name='train_l2',
                                 borrow=False)
        train_y = theano.shared(value=processor.y,
                                name='train_y',
                                borrow=False)
        train_z = theano.shared(value=processor.z,
                                name='train_z',
                                borrow=False)

        print 'Initializing train function...'
        train = self.train_function_momentum(train_x1, train_x2, train_l1,
                                             train_l2, train_y, train_z)

        lin = numpy.linspace(0, 29.0, 30.0)

        t = Thread(target=processor.process)
        t.start()
        for e in xrange(epochs):
            processor.new_epoch()

            processor.lock.acquire()
            while not processor.ready:
                processor.lock.wait()
            processor.lock.release()

            train_x1.set_value(processor.x1, borrow=False)
            train_x2.set_value(processor.x2, borrow=False)
            train_l1.set_value(processor.l1, borrow=False)
            train_l2.set_value(processor.l2, borrow=False)
            train_y.set_value(processor.y, borrow=False)
            train_z.set_value(processor.z, borrow=False)

            processor.lock.acquire()
            processor.cont = True
            processor.ready = False
            processor.lock.notifyAll()
            processor.lock.release()

            for b in xrange(BATCHES):
                #c = []
                cost = train(lr=learning_rate,
                             reg=regularization,
                             mom=momentum)
                #c.append(cost)

                print 'Training, batch %d, cost %.5f' % (b, cost)
                we = self.model1.W.get_value()
                print repr(we)
                f = we[0] * (lin**2) + we[1] * lin + we[2]
                print f

                processor.lock.acquire()
                while not processor.ready:
                    processor.lock.wait()
                processor.lock.release()

                train_x1.set_value(processor.x1, borrow=False)
                train_x2.set_value(processor.x2, borrow=False)
                train_l1.set_value(processor.l1, borrow=False)
                train_l2.set_value(processor.l2, borrow=False)
                train_y.set_value(processor.y, borrow=False)
                train_z.set_value(processor.z, borrow=False)

                processor.lock.acquire()
                if b < BATCHES - 2:
                    processor.cont = True
                    processor.ready = False
                if b == BATCHES - 1 and e == epochs - 1:
                    processor.stop = True
                processor.lock.notifyAll()
                processor.lock.release()

            #print 'Training, epoch %d, cost %.5f' % (e, numpy.mean(c))

        self.save_me('run4.npy')