def run(self, epochs=EPOCHS, learning_rate=LEARNING_RATE, regularization=REGULARIZATION, momentum=MOMENTUM): processor = NN_process.lengthLinTweetPairProcessor('../data/tweets/pairs/sets/tweet-pairs-train.txt', '../data/tweets/pairs/sets/tweet-no-pairs-train.txt', '../data/wiki/model/docfreq.npy', '../data/wiki/model/minimal', WORDS, EMBEDDING_DIM, BATCH_SIZE) train_x1 = theano.shared(value=processor.x1, name='train_x1', borrow=False) train_x2 = theano.shared(value=processor.x2, name='train_x2', borrow=False) train_i1 = theano.shared(value=processor.indices1, name='train_i1', borrow=False) train_i2 = theano.shared(value=processor.indices2, name='train_i2', borrow=False) train_l1 = theano.shared(value=processor.l1, name='train_l1', borrow=False) train_l2 = theano.shared(value=processor.l2, name='train_l2', borrow=False) train_y = theano.shared(value=processor.y, name='train_y', borrow=False) train_z = theano.shared(value=processor.z, name='train_z', borrow=False) print 'Initializing train function...' train = self.train_function_momentum(train_x1, train_x2, train_i1, train_i2, train_l1, train_l2, train_y, train_z) c = [] t = Thread(target=processor.process) t.daemon = True t.start() def signal_handler(signal, frame): import os os._exit(0) signal.signal(signal.SIGINT, signal_handler) best_cost = float('inf') best_weights = None for e in xrange(epochs): processor.new_epoch() processor.lock.acquire() while not processor.ready: processor.lock.wait() processor.lock.release() train_x1.set_value(processor.x1, borrow=False) train_x2.set_value(processor.x2, borrow=False) train_i1.set_value(processor.indices1, borrow=False) train_i2.set_value(processor.indices2, borrow=False) train_l1.set_value(processor.l1, borrow=False) train_l2.set_value(processor.l2, borrow=False) train_y.set_value(processor.y, borrow=False) train_z.set_value(processor.z, borrow=False) processor.lock.acquire() processor.cont = True processor.ready = False processor.lock.notifyAll() processor.lock.release() for b in xrange(BATCHES): c = [] cost = train(lr=learning_rate, reg=regularization, mom=momentum) c.append(cost) processor.lock.acquire() while not processor.ready: processor.lock.wait() processor.lock.release() # print 'Training, batch %d (from %d), cost %.5f' % (b, BATCHES, cost) # we = self.model1.W.get_value() # print we train_x1.set_value(processor.x1, borrow=False) train_x2.set_value(processor.x2, borrow=False) train_i1.set_value(processor.indices1, borrow=False) train_i2.set_value(processor.indices2, borrow=False) train_l1.set_value(processor.l1, borrow=False) train_l2.set_value(processor.l2, borrow=False) train_y.set_value(processor.y, borrow=False) train_z.set_value(processor.z, borrow=False) processor.lock.acquire() if b < BATCHES-2: processor.cont = True processor.ready = False if b == BATCHES-1 and e == epochs-1: processor.stop = True processor.cont = True processor.lock.notifyAll() processor.lock.release() print 'Training, epoch %d, cost %.5f' % (e, numpy.mean(c)) we = self.model1.W.get_value() print repr(we) # if numpy.mean(c) < best_cost - 0.0001: # best_cost = numpy.mean(c) # best_weights = we # else: # processor.lock.acquire() # processor.stop = True # processor.cont = True # processor.lock.notifyAll() # processor.lock.release() # break t.join() return best_weights
def run(self, epochs=EPOCHS, learning_rate=LEARNING_RATE, regularization=REGULARIZATION, momentum=MOMENTUM): processor = NN_process.lengthLinTweetPairProcessor( '../data/tweets/pairs/sets/tweet-pairs-train.txt', '../data/tweets/pairs/sets/tweet-no-pairs-train.txt', '../data/wiki/model/docfreq.npy', '../data/wiki/model/minimal', WORDS, EMBEDDING_DIM, BATCH_SIZE) train_x1 = theano.shared(value=processor.x1, name='train_x1', borrow=False) train_x2 = theano.shared(value=processor.x2, name='train_x2', borrow=False) train_i1 = theano.shared(value=processor.indices1, name='train_i1', borrow=False) train_i2 = theano.shared(value=processor.indices2, name='train_i2', borrow=False) train_l1 = theano.shared(value=processor.l1, name='train_l1', borrow=False) train_l2 = theano.shared(value=processor.l2, name='train_l2', borrow=False) train_y = theano.shared(value=processor.y, name='train_y', borrow=False) train_z = theano.shared(value=processor.z, name='train_z', borrow=False) print 'Initializing train function...' train = self.train_function_momentum(train_x1, train_x2, train_i1, train_i2, train_l1, train_l2, train_y, train_z) c = [] t = Thread(target=processor.process) t.daemon = True t.start() def signal_handler(signal, frame): import os os._exit(0) signal.signal(signal.SIGINT, signal_handler) best_cost = float('inf') best_weights = None for e in xrange(epochs): processor.new_epoch() processor.lock.acquire() while not processor.ready: processor.lock.wait() processor.lock.release() train_x1.set_value(processor.x1, borrow=False) train_x2.set_value(processor.x2, borrow=False) train_i1.set_value(processor.indices1, borrow=False) train_i2.set_value(processor.indices2, borrow=False) train_l1.set_value(processor.l1, borrow=False) train_l2.set_value(processor.l2, borrow=False) train_y.set_value(processor.y, borrow=False) train_z.set_value(processor.z, borrow=False) processor.lock.acquire() processor.cont = True processor.ready = False processor.lock.notifyAll() processor.lock.release() for b in xrange(BATCHES): c = [] cost = train(lr=learning_rate, reg=regularization, mom=momentum) c.append(cost) processor.lock.acquire() while not processor.ready: processor.lock.wait() processor.lock.release() # print 'Training, batch %d (from %d), cost %.5f' % (b, BATCHES, cost) # we = self.model1.W.get_value() # print we train_x1.set_value(processor.x1, borrow=False) train_x2.set_value(processor.x2, borrow=False) train_i1.set_value(processor.indices1, borrow=False) train_i2.set_value(processor.indices2, borrow=False) train_l1.set_value(processor.l1, borrow=False) train_l2.set_value(processor.l2, borrow=False) train_y.set_value(processor.y, borrow=False) train_z.set_value(processor.z, borrow=False) processor.lock.acquire() if b < BATCHES - 2: processor.cont = True processor.ready = False if b == BATCHES - 1 and e == epochs - 1: processor.stop = True processor.cont = True processor.lock.notifyAll() processor.lock.release() print 'Training, epoch %d, cost %.5f' % (e, numpy.mean(c)) we = self.model1.W.get_value() print repr(we) # if numpy.mean(c) < best_cost - 0.0001: # best_cost = numpy.mean(c) # best_weights = we # else: # processor.lock.acquire() # processor.stop = True # processor.cont = True # processor.lock.notifyAll() # processor.lock.release() # break t.join() return best_weights