def __init__(self, cropsize, batch_size, nkerns=[10, 10, 10], filters=[11, 6, 4]): self.X_batch, self.y_batch = T.tensor4('x'), T.matrix('y') self.layers, self.params = [], [] rng = np.random.RandomState(23455) layer0 = LeNetConvPoolLayer(rng, input=self.X_batch, image_shape=(batch_size, 1, cropsize, cropsize), filter_shape=(nkerns[0], 1, filters[0], filters[0]), poolsize=(2, 2)) self.layers += [layer0] self.params += layer0.params # 400 - 11 + 1 = 390 / 2 = 195 map_size = (cropsize - filters[0] + 1) / 2 layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], map_size, map_size), filter_shape=(nkerns[1], nkerns[0], filters[1], filters[1]), poolsize=(2, 2)) self.layers += [layer1] self.params += layer1.params # 195 - 6 + 1 = 190 / 2 = 95 map_size = (map_size - filters[1] + 1) / 2 layer2 = LeNetConvPoolLayer(rng, input=layer1.output, image_shape=(batch_size, nkerns[1], map_size, map_size), filter_shape=(nkerns[2], nkerns[1], filters[2], filters[2]), poolsize=(2, 2)) self.layers += [layer2] self.params += layer2.params # 95 - 4 + 1 = 92 / 2 = 46 map_size = (map_size - filters[2] + 1) / 2 layer3_input = layer2.output.flatten(2) # construct a fully-connected sigmoidal layer layer3 = HiddenLayer(rng, input=layer3_input, n_in=nkerns[2] * map_size * map_size, n_out=1, activation=None) self.layers += [layer3] self.params += layer3.params nparams = np.sum( [p.get_value().flatten().shape[0] for p in self.params]) print "model contains %i parameters" % nparams self.output = self.layers[-1].output
def __init__(self, nkerns=[48], miniBatchSize=200): rng = numpy.random.RandomState(23455) nClasses = 2 nMaxPool = 2 nHidden = 200 self.p = 65 #self.x = T.tensor3('x') # membrane data set self.x = T.tensor4('x') # membrane mini-batch self.y = T.ivector('y') # 1D vector of [int] labels self.miniBatchSize = miniBatchSize # Reshape matrix of rasterized images # to a 4D tensor, # compatible with the LeNetConvPoolLayer #layer0_input = self.x.reshape((self.miniBatchSize, 1, self.p, self.p)) layer0_input = self.x #-------------------------------------------------- # LAYER 0 # layer0 convolution+max pool reduces image dimensions by: # 65 -> 62 -> 31 #-------------------------------------------------- fs0 = 4 # filter size, layer 0 os0 = (self.p - fs0 + 1) / nMaxPool # image out size 0 assert (os0 == 31) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(self.miniBatchSize, 1, self.p, self.p), filter_shape=(nkerns[0], 1, fs0, fs0), poolsize=(nMaxPool, nMaxPool)) #-------------------------------------------------- # LAYER 1 # Fully connected sigmoidal layer, goes from # X -> 200 #-------------------------------------------------- layer1_input = layer0.output.flatten(2) layer1 = HiddenLayer(rng, input=layer1_input, n_in=nkerns[0] * os0 * os0, n_out=nHidden, activation=T.tanh) #-------------------------------------------------- # LAYER 2 # Classification via a logistic regression layer # 200 -> 2 #-------------------------------------------------- # classify the values of the fully-connected sigmoidal layer layer2 = LogisticRegression(input=layer1.output, n_in=nHidden, n_out=nClasses) self.layers = (layer0, layer1, layer2)
def __init__(self, nkerns=[48, 48, 48, 48], miniBatchSize=200): rng = numpy.random.RandomState(23455) nClasses = 2 nMaxPool = 2 nHidden = 200 self.p = 95 #self.x = T.tensor3('x') # membrane data set self.x = T.tensor4('x') # membrane mini-batch self.y = T.ivector('y') # labels := 1D vector of [int] labels self.miniBatchSize = miniBatchSize # Reshape matrix of rasterized images # to a 4D tensor, # compatible with the LeNetConvPoolLayer #layer0_input = self.x.reshape((self.miniBatchSize, 1, self.p, self.p)) layer0_input = self.x #-------------------------------------------------- # LAYER 0 # layer0 convolution+max pool reduces image dimensions by: # 95 -> 92 -> 46 #-------------------------------------------------- fs0 = 4 # filter size, layer 0 os0 = (self.p - fs0 + 1) / nMaxPool # image out size 0 assert (os0 == 46) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(self.miniBatchSize, 1, self.p, self.p), filter_shape=(nkerns[0], 1, fs0, fs0), poolsize=(nMaxPool, nMaxPool)) #-------------------------------------------------- # LAYER 1 # layer1 convolution+max pool reduces image dimensions by: # 46 -> 42 -> 21 #-------------------------------------------------- fs1 = 5 # filter size, layer 1 os1 = (os0 - fs1 + 1) / nMaxPool # image out size 1 assert (os1 == 21) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(self.miniBatchSize, nkerns[0], os0, os0), filter_shape=(nkerns[1], nkerns[0], fs1, fs1), poolsize=(nMaxPool, nMaxPool)) #-------------------------------------------------- # LAYER 2 # layer2 convolution+max pool reduces image dimensions by: # 21 -> 18 -> 9 #-------------------------------------------------- fs2 = 4 os2 = (os1 - fs2 + 1) / nMaxPool assert (os2 == 9) layer2 = LeNetConvPoolLayer(rng, input=layer1.output, image_shape=(self.miniBatchSize, nkerns[0], os1, os1), filter_shape=(nkerns[2], nkerns[1], fs2, fs2), poolsize=(nMaxPool, nMaxPool)) #-------------------------------------------------- # LAYER 3 # layer3 convolution+max pool reduces image dimensions by: # 9 -> 6 -> 3 #-------------------------------------------------- fs3 = 4 os3 = (os2 - fs3 + 1) / nMaxPool assert (os3 == 3) layer3 = LeNetConvPoolLayer(rng, input=layer2.output, image_shape=(self.miniBatchSize, nkerns[0], os2, os2), filter_shape=(nkerns[3], nkerns[2], fs3, fs3), poolsize=(nMaxPool, nMaxPool)) #-------------------------------------------------- # LAYER 4 # Fully connected sigmoidal layer, goes from # 3*3*48 ~ 450 -> 200 #-------------------------------------------------- layer4_input = layer3.output.flatten(2) layer4 = HiddenLayer(rng, input=layer4_input, n_in=nkerns[3] * os3 * os3, n_out=nHidden, activation=T.tanh) #-------------------------------------------------- # LAYER 5 # Classification via a logistic regression layer # 200 -> 2 #-------------------------------------------------- # classify the values of the fully-connected sigmoidal layer layer5 = LogisticRegression(input=layer4.output, n_in=nHidden, n_out=nClasses) self.layers = (layer0, layer1, layer2, layer3, layer4, layer5)
def cifar_fast_net(batch_size=128,n_epochs=300,test_frequency=13, learning_rate=0.001): rng1 = numpy.random.RandomState(23455) rng2 = numpy.random.RandomState(12423) rng3 = numpy.random.RandomState(23245) rng4 = numpy.random.RandomState(12123) rng5 = numpy.random.RandomState(25365) rng6 = numpy.random.RandomState(15323) train_set_x, train_set_y = load_cifar_data(['data_batch_1','data_batch_2','data_batch_3','data_batch_4']) valid_set_x, valid_set_y = load_cifar_data(['data_batch_5'],WHICHSET='valid') test_set_x, test_set_y = load_cifar_data(['test_batch'],WHICHSET='test') n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size n_valid_batches /= batch_size n_test_batches /= batch_size index = T.lscalar() x = T.matrix('x') y = T.ivector('y') img_input = x.reshape((batch_size,3,32,32)) img_input = img_input.dimshuffle(1,2,3,0) ####define the layers: conv_pool1 = LeNetConvPoolLayer(rng=rng1,input=img_input, filter_shape=(3,5,5,32), image_shape=(3,32,32,batch_size), activation='vshape', poolsize=(3,3),poolstride=2,pad=2, convstride=1,initW=0.0001,initB=0,partial_sum=4, pooling='max', epsW=0.001, epsB=0.002, momW=0.9, momB=0.9, wc=0.004 ) conv_pool2 = LeNetConvPoolLayer(rng=rng2,input=conv_pool1.output, filter_shape=(32,5,5,32), image_shape=(32,16,16,batch_size), activation='vshape', poolsize=(3,3),poolstride=2,pad=2, convstride=1,initW=0.01,initB=0,partial_sum=4, pooling='average', epsW=0.001, epsB=0.002, momW=0.9, momB=0.9, wc=0.004) conv_pool3 = LeNetConvPoolLayer(rng=rng3,input=conv_pool2.output, filter_shape=(32,5,5,64), image_shape=(32,8,8,batch_size), activation='vshape', poolsize=(3,3),poolstride=2,pad=2, convstride=1,initW=0.01,initB=0,partial_sum=4, pooling='average', epsW=0.001, epsB=0.002, momW=0.9, momB=0.9, wc=0.004) layer4_input = conv_pool3.output.dimshuffle(3,0,1,2).flatten(2) #fc_64 = HiddenLayer(rng=rng4,input=layer4_input,n_in=64*4*4,n_out=64,initW=0.1,initB=0) fc_64 = HiddenLayer(rng=rng4,input=layer4_input,n_in=64*4*4,n_out=64,initW=0.1,initB=0, epsW=0.001, epsB=0.002, momW=0.9, momB=0.9, wc=0.03) fc_10 = LogisticRegression(input=fc_64.output,rng=rng5,n_in=64,n_out=10,initW=0.1, epsW=0.001, epsB=0.002, momW=0.9, momB=0.9, wc=0.03) ####build the models: cost = fc_10.negative_log_likelihood(y) test_model = theano.function([index], fc_10.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size]}) validate_model = theano.function([index], fc_10.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size]}) Ws = [conv_pool1.W, conv_pool2.W, conv_pool3.W, fc_64.W, fc_10.W] pgradWs = [conv_pool1.grad_W, conv_pool2.grad_W, conv_pool3.grad_W, fc_64.grad_W, fc_10.grad_W] bs = [conv_pool1.b, conv_pool2.b, conv_pool3.b, fc_64.b, fc_10.b] pgradbs = [conv_pool1.grad_b, conv_pool2.grad_b, conv_pool3.grad_b, fc_64.grad_b, fc_10.grad_b] momWs = [conv_pool1.momW, conv_pool2.momW, conv_pool3.momW, fc_64.momW, fc_10.momW] momBs = [conv_pool1.momB, conv_pool2.momB, conv_pool3.momB, fc_64.momB, fc_10.momB] wcs = [conv_pool1.wc, conv_pool2.wc, conv_pool3.wc, fc_64.wc, fc_10.wc] epsWs = [conv_pool1.epsW, conv_pool2.epsW, conv_pool3.epsW, fc_64.epsW, fc_10.epsW] epsBs = [conv_pool1.epsB, conv_pool2.epsB, conv_pool3.epsB, fc_64.epsB, fc_10.epsB] gradWs = T.grad(cost, Ws) gradbs = T.grad(cost, bs) updates = [] for W_i, gradW_i, momW_i, wc_i, epsW_i, pgW_i in zip(Ws,gradWs,momWs,wcs, epsWs,pgradWs): grad_i = - epsW_i*gradW_i - wc_i*epsW_i*W_i + momW_i*pgW_i updates.append((W_i, W_i+grad_i)) updates.append((pgW_i,grad_i)) for b_i, gradb_i, momB_i, epsB_i, pgB_i in zip(bs,gradbs,momBs, epsBs,pgradbs): grad_i = - epsB_i*gradb_i + momB_i*pgB_i updates.append((b_i, b_i+grad_i)) updates.append((pgB_i,grad_i)) train_model = theano.function([index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size]}) ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 #below is the code for reduce learning_rate ########################################### if epoch == 50: epsWs = [k/10.0 for k in epsWs] epsBs = [k/10.0 for k in epsBs] print 'reduce eps by a factor of 10' updates = [] for W_i, gradW_i, momW_i, wc_i, epsW_i, pgW_i in zip(Ws,gradWs,momWs,wcs, epsWs,pgradWs): grad_i = - epsW_i*gradW_i - wc_i*epsW_i*W_i + momW_i*pgW_i updates.append((W_i, W_i+grad_i)) updates.append((pgW_i,grad_i)) for b_i, gradb_i, momB_i, epsB_i, pgB_i in zip(bs,gradbs,momBs, epsBs,pgradbs): grad_i = - epsB_i*gradb_i + momB_i*pgB_i updates.append((b_i, b_i+grad_i)) updates.append((pgB_i,grad_i)) train_model = theano.function([index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size]}) ############################################## for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print 'training @ iter = ', iter cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, \ this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter conv_pool1.bestW = conv_pool1.W.get_value().copy() conv_pool1.bestB = conv_pool1.b.get_value().copy() conv_pool2.bestW = conv_pool2.W.get_value().copy() conv_pool2.bestB = conv_pool2.b.get_value().copy() conv_pool3.bestW = conv_pool3.W.get_value().copy() conv_pool3.bestB = conv_pool3.b.get_value().copy() fc_64.bestW = fc_64.W.get_value().copy() fc_64.bestB = fc_64.b.get_value().copy() fc_10.bestW = fc_10.W.get_value().copy() fc_10.bestB = fc_10.b.get_value().copy() ##saving current best print 'saving current best params..' current_params = (conv_pool1.bestW,conv_pool1.bestB,conv_pool2.bestW, conv_pool2.bestB,conv_pool3.bestW,conv_pool3.bestB,fc_64.bestW,fc_64.bestB, fc_10.bestW,fc_10.bestB,momWs,momBs,epsWs,epsBs,wcs) outfile = file('current_best_params.pkl','wb') cPickle.dump(current_params,outfile) outfile.close() # test it on the test set test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of best ' 'model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i,'\ 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
embeddings = theano.shared(numpy.array(wordvectors, dtype=theano.config.floatX)).dimshuffle( 1, 0) batchsizeVar = numSamples.shape[0] y_resh = y.reshape((batchsizeVar, )) # rel:e1->e2 y1ET_resh = y1ET.reshape((batchsizeVar, )) y2ET_resh = y2ET.reshape((batchsizeVar, )) numSamples_resh = numSamples.reshape((batchsizeVar, )) layers = [] cnnContext = LeNetConvPoolLayer(rng=rng, filter_shape=(nkernsContext, 1, representationsize, filtersizeContext), poolsize=(1, kmaxContext)) layers.append(cnnContext) if "middleContext" in config: hidden_in = nkernsContext * kmaxContext else: cnnEntities = LeNetConvPoolLayer(rng=rng, filter_shape=(nkernsEntities, 1, representationsize, filtersizeEntities), poolsize=(1, kmaxEntities)) layers.append(cnnEntities) hidden_in = 2 * (2 * nkernsContext * kmaxContext + nkernsEntities * kmaxEntities) hiddenLayer = HiddenLayer(rng=rng, n_in=hidden_in, n_out=hiddenUnits)
in_height = 50 # filter sizes flt_channels = 40 flt_time = 20 flt_width = 5 flt_height = 5 signals_shape = (batchsize, in_channels, in_height, in_width) filters_shape = (flt_channels, in_channels, flt_height, flt_width) layer0_input = x.reshape(signals_shape) layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=signals_shape, filter_shape=filters_shape, poolsize=(2, 2), ) layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, flt_channels, layer1_w, layer1_h), filter_shape=(60, flt_channels, 3, 3), poolsize=(2, 2), ) layer2 = LeNetConvPoolLayer( rng, input=layer1.output,
def prepare_network(): rng = numpy.random.RandomState(23455) print('Preparing Theano model...') mydatasets = load_initial_test_data() test_set_x, test_set_y, shared_test_set_y, valid_ds = mydatasets n_test_batches = test_set_x.get_value(borrow=True).shape[0] # allocate symbolic variables for the data index = T.lscalar() x = T.matrix('x') y = T.ivector('y') # image size layer0_w = 50 layer0_h = 50 layer1_w = (layer0_w - 4) // 2 layer1_h = (layer0_h - 4) // 2 layer2_w = (layer1_w - 2) // 2 layer2_h = (layer1_h - 2) // 2 layer3_w = (layer2_w - 2) // 2 layer3_h = (layer2_h - 2) // 2 ###################### # BUILD NETWORK # ###################### # image sizes batchsize = 1 in_channels = 20 in_width = 50 in_height = 50 #filter sizes flt_channels = 40 flt_time = 20 flt_width = 5 flt_height = 5 signals_shape = (batchsize, in_channels, in_height, in_width) filters_shape = (flt_channels, in_channels, flt_height, flt_width) layer0_input = x.reshape(signals_shape) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=signals_shape, filter_shape=filters_shape, poolsize=(2, 2)) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batchsize, flt_channels, layer1_w, layer1_h), filter_shape=(60, flt_channels, 3, 3), poolsize=(2, 2)) layer2 = LeNetConvPoolLayer(rng, input=layer1.output, image_shape=(batchsize, 60, layer2_w, layer2_h), filter_shape=(90, 60, 3, 3), poolsize=(2, 2)) layer3_input = layer2.output.flatten(2) layer3 = HiddenLayer(rng, input=layer3_input, n_in=90 * layer3_w * layer3_h, n_out=500, activation=T.tanh) layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=8) cost = layer4.negative_log_likelihood(y) classify = theano.function( [index], outputs=layer4.get_output_labels(y), givens={ x: test_set_x[index * batchsize:(index + 1) * batchsize], y: test_set_y[index * batchsize:(index + 1) * batchsize] }) print('Loading network weights...') weightFile = '../live_count/weights.save' f = open(weightFile, 'rb') loaded_objects = [] for i in range(5): loaded_objects.append(pickle.load(f)) f.close() layer0.__setstate__(loaded_objects[0]) layer1.__setstate__(loaded_objects[1]) layer2.__setstate__(loaded_objects[2]) layer3.__setstate__(loaded_objects[3]) layer4.__setstate__(loaded_objects[4]) return test_set_x, test_set_y, shared_test_set_y, valid_ds, classify, batchsize
def __init__(self, configfile, train=False): self.slotList = [ "N", "per:age", "per:alternate_names", "per:children", "per:cause_of_death", "per:date_of_birth", "per:date_of_death", "per:employee_or_member_of", "per:location_of_birth", "per:location_of_death", "per:locations_of_residence", "per:origin", "per:schools_attended", "per:siblings", "per:spouse", "per:title", "org:alternate_names", "org:date_founded", "org:founded_by", "org:location_of_headquarters", "org:members", "org:parents", "org:top_members_employees" ] typeList = [ "O", "PERSON", "LOCATION", "ORGANIZATION", "DATE", "NUMBER" ] self.config = readConfig(configfile) self.addInputSize = 1 logger.info("additional mlp input") wordvectorfile = self.config["wordvectors"] logger.info("wordvectorfile " + wordvectorfile) networkfile = self.config["net"] logger.info("networkfile " + networkfile) hiddenunits = int(self.config["hidden"]) logger.info("hidden units " + str(hiddenunits)) hiddenunitsNer = hiddenunits if "hiddenunitsNER" in self.config: hiddenunitsNer = int(self.config["hiddenunitsNER"]) representationsizeNER = 50 if "representationsizeNER" in self.config: representationsizeNER = int(self.config["representationsizeNER"]) learning_rate = float(self.config["lrate"]) logger.info("learning rate " + str(learning_rate)) if train: self.batch_size = int(self.config["batchsize"]) else: self.batch_size = 1 logger.info("batch size " + str(self.batch_size)) self.filtersize = [1, int(self.config["filtersize"])] nkerns = [int(self.config["nkerns"])] logger.info("nkerns " + str(nkerns)) pool = [1, int(self.config["kmax"])] self.contextsize = int(self.config["contextsize"]) logger.info("contextsize " + str(self.contextsize)) if self.contextsize < self.filtersize[1]: logger.info("setting filtersize to " + str(self.contextsize)) self.filtersize[1] = self.contextsize logger.info("filtersize " + str(self.filtersize)) sizeAfterConv = self.contextsize - self.filtersize[1] + 1 sizeAfterPooling = -1 if sizeAfterConv < pool[1]: logger.info("setting poolsize to " + str(sizeAfterConv)) pool[1] = sizeAfterConv sizeAfterPooling = pool[1] logger.info("kmax pooling: k = " + str(pool[1])) # reading word vectors self.wordvectors, self.vectorsize = readWordvectors(wordvectorfile) self.representationsize = self.vectorsize + 1 rng = numpy.random.RandomState( 23455 ) # not relevant, parameters will be overwritten by stored model anyways if train: seed = rng.get_state()[1][0] logger.info("seed: " + str(seed)) numSFclasses = 23 numNERclasses = 6 # allocate symbolic variables for the data self.index = T.lscalar() # index to a [mini]batch self.xa = T.matrix('xa') # left context self.xb = T.matrix('xb') # middle context self.xc = T.matrix('xc') # right context self.y = T.imatrix('y') # label (only present in training) self.yNER1 = T.imatrix( 'yNER1') # label for first entity (only present in training) self.yNER2 = T.imatrix( 'yNER2') # label for second entity (only present in training) ishape = [self.representationsize, self.contextsize] # this is the size of context matrizes ###################### # BUILD ACTUAL MODEL # ###################### logger.info('... building the model') # Reshape input matrix to be compatible with LeNetConvPoolLayer layer0a_input = self.xa.reshape( (self.batch_size, 1, ishape[0], ishape[1])) layer0b_input = self.xb.reshape( (self.batch_size, 1, ishape[0], ishape[1])) layer0c_input = self.xc.reshape( (self.batch_size, 1, ishape[0], ishape[1])) y_reshaped = self.y.reshape((self.batch_size, 1)) yNER1reshaped = self.yNER1.reshape((self.batch_size, 1)) yNER2reshaped = self.yNER2.reshape((self.batch_size, 1)) # Construct convolutional pooling layer: filter_shape = (nkerns[0], 1, self.representationsize, self.filtersize[1]) poolsize = (pool[0], pool[1]) fan_in = numpy.prod(filter_shape[1:]) fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize)) W_bound = numpy.sqrt(6. / (fan_in + fan_out)) # the convolution weight matrix convW = theano.shared(numpy.asarray(rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX), borrow=True) # the bias is a 1D tensor -- one bias per output feature map b_values = numpy.zeros((filter_shape[0], ), dtype=theano.config.floatX) convB = theano.shared(value=b_values, borrow=True) self.layer0a = LeNetConvPoolLayer(rng, W=convW, b=convB, input=layer0a_input, image_shape=(self.batch_size, 1, ishape[0], ishape[1]), filter_shape=filter_shape, poolsize=poolsize) self.layer0b = LeNetConvPoolLayer(rng, W=convW, b=convB, input=layer0b_input, image_shape=(self.batch_size, 1, ishape[0], ishape[1]), filter_shape=filter_shape, poolsize=poolsize) self.layer0c = LeNetConvPoolLayer(rng, W=convW, b=convB, input=layer0c_input, image_shape=(self.batch_size, 1, ishape[0], ishape[1]), filter_shape=filter_shape, poolsize=poolsize) layer0aflattened = self.layer0a.output.flatten(2).reshape( (self.batch_size, nkerns[0] * sizeAfterPooling)) layer0bflattened = self.layer0b.output.flatten(2).reshape( (self.batch_size, nkerns[0] * sizeAfterPooling)) layer0cflattened = self.layer0c.output.flatten(2).reshape( (self.batch_size, nkerns[0] * sizeAfterPooling)) layer0outputSF = T.concatenate( [layer0aflattened, layer0bflattened, layer0cflattened], axis=1) layer0outputSFsize = 3 * (nkerns[0] * sizeAfterPooling) layer0outputNER1 = T.concatenate([layer0aflattened, layer0bflattened], axis=1) layer0outputNER2 = T.concatenate([layer0bflattened, layer0cflattened], axis=1) layer0outputNERsize = 2 * (nkerns[0] * sizeAfterPooling) layer2ner1 = HiddenLayer(rng, input=layer0outputNER1, n_in=layer0outputNERsize, n_out=hiddenunitsNer, activation=T.tanh) layer2ner2 = HiddenLayer(rng, input=layer0outputNER2, n_in=layer0outputNERsize, n_out=hiddenunitsNer, activation=T.tanh, W=layer2ner1.W, b=layer2ner1.b) # concatenate additional features to sentence representation self.additionalFeatures = T.matrix('additionalFeatures') self.additionalFeatsShaped = self.additionalFeatures.reshape( (self.batch_size, 1)) layer2SFinput = T.concatenate( [layer0outputSF, self.additionalFeatsShaped], axis=1) layer2SFinputSize = layer0outputSFsize + self.addInputSize layer2SF = HiddenLayer(rng, input=layer2SFinput, n_in=layer2SFinputSize, n_out=hiddenunits, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer3rel = LogisticRegression(input=layer2SF.output, n_in=hiddenunits, n_out=numSFclasses) layer3et = LogisticRegression(input=layer2ner1.output, n_in=hiddenunitsNer, n_out=numNERclasses) scoresForR1 = layer3rel.getScores(layer2SF.output) scoresForE1 = layer3et.getScores(layer2ner1.output) scoresForE2 = layer3et.getScores(layer2ner2.output) self.crfLayer = CRF(numClasses=numSFclasses + numNERclasses, rng=rng, batchsizeVar=self.batch_size, sequenceLength=3) scores = T.zeros((self.batch_size, 3, numSFclasses + numNERclasses)) scores = T.set_subtensor(scores[:, 0, numSFclasses:], scoresForE1) scores = T.set_subtensor(scores[:, 1, :numSFclasses], scoresForR1) scores = T.set_subtensor(scores[:, 2, numSFclasses:], scoresForE2) self.scores = scores self.y_conc = T.concatenate([ yNER1reshaped + numSFclasses, y_reshaped, yNER2reshaped + numSFclasses ], axis=1) # create a list of all model parameters self.paramList = [ self.crfLayer.params, layer3rel.params, layer3et.params, layer2SF.params, layer2ner1.params, self.layer0a.params ] self.params = [] for p in self.paramList: self.params += p logger.info(p) if not train: self.gotNetwork = 1 # load parameters if not os.path.isfile(networkfile): logger.error("network file does not exist") self.gotNetwork = 0 else: save_file = open(networkfile, 'rb') for p in self.params: p.set_value(cPickle.load(save_file), borrow=False) save_file.close() self.relation_scores_global = self.crfLayer.getProbForClass( self.scores, numSFclasses) self.predictions_global = self.crfLayer.getPrediction(self.scores)
def train_rep( learning_rate=0.002, L1_reg=0.0002, L2_reg=0.005, n_epochs=200, nkerns=[20, 50], batch_size=25, ): rng = numpy.random.RandomState(23455) train_dir = "../out/h5/" valid_dir = "../out/h5/" weights_dir = "./weights/" print("... load input data") filename = train_dir + "rep_train_data_1.gzip.h5" datasets = load_initial_data(filename) train_set_x, train_set_y, shared_train_set_y = datasets filename = valid_dir + "rep_valid_data_1.gzip.h5" datasets = load_initial_data(filename) valid_set_x, valid_set_y, shared_valid_set_y = datasets mydatasets = load_initial_test_data() test_set_x, test_set_y, shared_test_set_y, valid_ds = mydatasets # compute number of minibatches for training, validation and testing n_all_train_batches = 30000 n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_all_train_batches /= batch_size n_train_batches /= batch_size n_valid_batches /= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix("x") # the data is presented as rasterized images y = T.ivector("y") # the labels are presented as 1D vector of # [int] labels # image size layer0_w = 50 layer0_h = 50 layer1_w = (layer0_w - 4) / 2 layer1_h = (layer0_h - 4) / 2 layer2_w = (layer1_w - 2) / 2 layer2_h = (layer1_h - 2) / 2 layer3_w = (layer2_w - 2) / 2 layer3_h = (layer2_h - 2) / 2 ###################### # BUILD ACTUAL MODEL # ###################### print("... building the model") # image sizes batchsize = batch_size in_channels = 20 in_width = 50 in_height = 50 # filter sizes flt_channels = 40 flt_time = 20 flt_width = 5 flt_height = 5 signals_shape = (batchsize, in_channels, in_height, in_width) filters_shape = (flt_channels, in_channels, flt_height, flt_width) layer0_input = x.reshape(signals_shape) layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=signals_shape, filter_shape=filters_shape, poolsize=(2, 2), ) # TODO: incase of flt_time < in_time the output dimension will be different layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, flt_channels, layer1_w, layer1_h), filter_shape=(60, flt_channels, 3, 3), poolsize=(2, 2), ) layer2 = LeNetConvPoolLayer( rng, input=layer1.output, image_shape=(batch_size, 60, layer2_w, layer2_h), filter_shape=(90, 60, 3, 3), poolsize=(2, 2), ) layer3_input = layer2.output.flatten(2) layer3 = HiddenLayer( rng, input=layer3_input, n_in=90 * layer3_w * layer3_h, n_out=500, activation=T.tanh, ) layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=8) classify = theano.function( [index], outputs=layer4.get_output_labels(y), givens={ x: test_set_x[index * batch_size : (index + 1) * batch_size], y: test_set_y[index * batch_size : (index + 1) * batch_size], }, ) validate_model = theano.function( [index], layer4.errors(y), givens={ x: valid_set_x[index * batch_size : (index + 1) * batch_size], y: valid_set_y[index * batch_size : (index + 1) * batch_size], }, ) # create a list of all model parameters to be fit by gradient descent params = ( layer4.params + layer3.params + layer2.params + layer1.params + layer0.params ) # symbolic Theano variable that represents the L1 regularization term L1 = ( T.sum(abs(layer4.params[0])) + T.sum(abs(layer3.params[0])) + T.sum(abs(layer2.params[0])) + T.sum(abs(layer1.params[0])) + T.sum(abs(layer0.params[0])) ) # symbolic Theano variable that represents the squared L2 term L2_sqr = ( T.sum(layer4.params[0] ** 2) + T.sum(layer3.params[0] ** 2) + T.sum(layer2.params[0] ** 2) + T.sum(layer1.params[0] ** 2) + T.sum(layer0.params[0] ** 2) ) # the loss cost = layer4.negative_log_likelihood(y) + L1_reg * L1 + L2_reg * L2_sqr # create a list of gradients for all model parameters grads = T.grad(cost, params) updates = [] for param_i, grad_i in zip(params, grads): updates.append((param_i, param_i - learning_rate * grad_i)) train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size : (index + 1) * batch_size], y: train_set_y[index * batch_size : (index + 1) * batch_size], }, ) ############### # TRAIN MODEL # ############### print("... training") start_time = time.clock() epoch = 0 done_looping = False cost_ij = 0 train_files_num = 600 val_files_num = 100 startc = time.clock() while (epoch < n_epochs) and (not done_looping): endc = time.clock() print(("epoch %i, took %.2f minutes" % (epoch, (endc - startc) / 60.0))) startc = time.clock() epoch = epoch + 1 for nTrainSet in range(1, train_files_num + 1): # load next train data if nTrainSet % 50 == 0: print("training @ nTrainSet = ", nTrainSet, ", cost = ", cost_ij) filename = train_dir + "rep_train_data_" + str(nTrainSet) + ".gzip.h5" datasets = load_next_data(filename) ns_train_set_x, ns_train_set_y = datasets train_set_x.set_value(ns_train_set_x, borrow=True) shared_train_set_y.set_value( numpy.asarray(ns_train_set_y, dtype=theano.config.floatX), borrow=True ) n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size # train for minibatch_index in range(n_train_batches): # training itself # -------------------------------------- cost_ij = train_model(minibatch_index) # ------------------------- # at the end of each epoch run validation this_validation_loss = 0 for nValSet in range(1, val_files_num + 1): filename = valid_dir + "rep_valid_data_" + str(nValSet) + ".gzip.h5" datasets = load_next_data(filename) ns_valid_set_x, ns_valid_set_y = datasets valid_set_x.set_value(ns_valid_set_x, borrow=True) shared_valid_set_y.set_value( numpy.asarray(ns_valid_set_y, dtype=theano.config.floatX), borrow=True ) n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_valid_batches /= batch_size # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in range(n_valid_batches)] this_validation_loss += numpy.mean(validation_losses) this_validation_loss /= val_files_num print(( "epoch %i, minibatch %i/%i, validation error %f %%" % ( epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.0, ) )) # save snapshots print("saving weights state, epoch = ", epoch) f = file(weights_dir + "weights_epoch" + str(epoch) + ".save", "wb") state_L0 = layer0.__getstate__() pickle.dump(state_L0, f, protocol=pickle.HIGHEST_PROTOCOL) state_L1 = layer1.__getstate__() pickle.dump(state_L1, f, protocol=pickle.HIGHEST_PROTOCOL) state_L2 = layer2.__getstate__() pickle.dump(state_L2, f, protocol=pickle.HIGHEST_PROTOCOL) state_L3 = layer3.__getstate__() pickle.dump(state_L3, f, protocol=pickle.HIGHEST_PROTOCOL) state_L4 = layer4.__getstate__() pickle.dump(state_L4, f, protocol=pickle.HIGHEST_PROTOCOL) f.close() end_time = time.clock() print ("Optimization complete.") print(( "The code for file " + os.path.split(__file__)[1] + " ran for %.2fm" % ((end_time - start_time) / 60.0) ), file=sys.stderr)
W_bound = numpy.sqrt(6. / (fan_in + fan_out)) # the convolution weight matrix convW = theano.shared(numpy.asarray(rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX), name='conv_W', borrow=True) # the bias is a 1D tensor -- one bias per output feature map b_values = numpy.zeros((filter_shape[0], ), dtype=theano.config.floatX) convB = theano.shared(value=b_values, name='conv_b', borrow=True) layer0 = LeNetConvPoolLayer(rng, W=convW, b=convB, input=layer0_input, filter_shape=filter_shape, poolsize=poolsize) layer0flattened = layer0.output.flatten(2).reshape( (batch_size_var, nkerns[0] * sizeAfterPooling)) layer0outputsize = nkerns[0] * sizeAfterPooling if "internalOnH" in attentionMethod: layer1 = AttentionLayer(rng, thisInput=layer0.conv_out_tanh, batchsize=batch_size_var, dim1=nkerns[0], dim2=sizeAfterConv, method=attentionMethod, k=kattention)
def main(args): # initial parameters embedding_size = args.embedding_size mention_context_size = args.mention_context_size type_context_size = args.type_context_size embedding_file = args.embedding_path hidden_units = args.hidden_units learning_rate = args.learning_rate margin = args.margin batch_size = args.batch_size n_epochs = args.num_epochs relation_size = 82 nkerns = [500] filter_size = [1, 1] pool = [1, 1] l1 = 0.000001 l2 = 0.000002 newbob = False network_file = args.model_path test_file = args.test test_result_file = args.test_result label_file = args.ontology_path label_file_norm = args.norm_ontology_path relation_file = args.relation_path train_type_flag = args.seen_types tup_representation_size = embedding_size * 2 # load word vectors word_vectors, vector_size = load_word_vec(embedding_file) # read train and dev file print("start loading train and dev file ... ") doc_id_list_test, type_list_test, trigger_list_test, left_word_list_test, relation_list_test, \ right_word_list_test = load_training_data(test_file) print("start loading arg and relation files ... ") all_type_list, all_type_structures = load_types_1(label_file_norm) rel_index, index_rel = read_relation_index(relation_file) type_size = len(all_type_list) # using a matrix to represent each relation relation_matrix = random_init_rel_vec_factor( relation_file, tup_representation_size * tup_representation_size) train_types = get_types_for_train(train_type_flag, label_file) # prepare data structure print("start preparing data structures ... ") curSeed = 23455 rng = numpy.random.RandomState(curSeed) seed = rng.get_state()[1][0] print("seed: ", seed) result_index_test_matrix, result_vector_test_matrix, input_context_test_matrix, input_trigger_test_matrix, \ relation_binary_test_matrix, pos_neg_test_matrix = input_matrix_1_test( type_list_test, trigger_list_test, left_word_list_test, relation_list_test, right_word_list_test, embedding_size, mention_context_size, relation_size, label_file, word_vectors, rel_index, train_type_flag) input_type_matrix, input_type_structure_matrix = type_matrix( all_type_list, all_type_structures, embedding_file, type_context_size) time1 = time.time() dt = theano.config.floatX test_set_content = theano.shared( numpy.matrix(input_context_test_matrix, dtype=dt)) test_set_trigger = theano.shared( numpy.matrix(input_trigger_test_matrix, dtype=dt)) test_set_relation_binary = theano.shared( numpy.matrix(relation_binary_test_matrix, dtype=dt)) test_set_posneg = theano.shared(numpy.matrix(pos_neg_test_matrix, dtype=dt)) test_set_y = theano.shared( numpy.array(result_index_test_matrix, dtype=numpy.dtype(numpy.int32))) test_set_y_vector = theano.shared( numpy.matrix(result_vector_test_matrix, dtype=dt)) train_set_type = theano.shared(numpy.matrix(input_type_matrix, dtype=dt)) train_set_type_structure = theano.shared( numpy.matrix(input_type_structure_matrix, dtype=dt)) train_types = theano.shared(numpy.matrix(train_types, dtype=dt)) # compute number of minibatches for training, validation and testing n_test_batches = input_trigger_test_matrix.shape[0] n_test_batches /= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x_content = T.matrix( 'x_content') # the data is presented as rasterized images x_trigger = T.matrix( 'x_trigger') # the data is presented as rasterized images x_relation_binary = T.matrix('x_relation_binary') x_pos_neg_flag = T.matrix('x_pos_neg_flag') x_type = T.matrix('x_type') x_type_structure = T.matrix('x_type_structure') y = T.ivector('y') # the labels are presented as 1D vector of y_vector = T.matrix('y_vector') # the labels are presented as 1D vector of x_train_types = T.matrix('x_train_types') # [int] labels i_shape = [tup_representation_size, mention_context_size] # this is the size of context matrizes time2 = time.time() print("time for preparing data structures: ", time2 - time1) # build actual model print('start building the model ... ') time1 = time.time() rel_w = theano.shared(value=relation_matrix, borrow=True) ## 26*400 # Construct the mention structure input Layer layer0_input = x_content.reshape((batch_size, 1, i_shape[0], i_shape[1])) layer0_input_binary_relation = x_relation_binary.reshape( (batch_size, 1, relation_size, i_shape[1])) ## 100*1*26*5 # compose amr relation matrix to each tuple compose_layer = ComposeLayerMatrix( input=layer0_input, input_binary_relation=layer0_input_binary_relation, rel_w=rel_w, rel_vec_size=tup_representation_size) layer1_input = compose_layer.output # initialize the convolution weight matrix filter_shape = (nkerns[0], 1, tup_representation_size, filter_size[1]) pool_size = (pool[0], pool[1]) fan_in = numpy.prod(filter_shape[1:]) fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(pool_size)) w_bound = numpy.sqrt(6. / (fan_in + fan_out)) conv_w = theano.shared(numpy.asarray(rng.uniform(low=-w_bound, high=w_bound, size=filter_shape), dtype=theano.config.floatX), borrow=True) b_values = numpy.zeros((filter_shape[0], ), dtype=theano.config.floatX) conv_b = theano.shared(value=b_values, borrow=True) # conv with pool layer layer1_conv = LeNetConvPoolLayer(rng, W=conv_w, b=conv_b, input=layer1_input, image_shape=(batch_size, 1, i_shape[0], i_shape[1]), filter_shape=filter_shape, poolsize=pool_size) layer1_output = layer1_conv.output layer1_flattened = layer1_output.flatten(2) trigger_features_shaped = x_trigger.reshape((batch_size, embedding_size)) layer2_input = T.concatenate([layer1_flattened, trigger_features_shaped], axis=1) # Construct the type structure input Layer layer_type_input = x_type_structure.reshape( (type_size, 1, tup_representation_size, type_context_size)) filter_shape_type = (nkerns[0], 1, tup_representation_size, filter_size[1]) pool_size_type = (pool[0], pool[1]) # initialize the implicit relation tensor type_tensor_shape = (tup_representation_size, tup_representation_size, tup_representation_size) type_tensor_w = theano.shared(numpy.asarray(rng.uniform( low=-w_bound, high=w_bound, size=type_tensor_shape), dtype=theano.config.floatX), borrow=True) # compose relation tensor to each tuple compose_type_layer = ComposeLayerTensor(input=layer_type_input, tensor=type_tensor_w) layer_type_input1 = compose_type_layer.output # conv with pool layer layer1_conv_type = LeNetConvPoolLayer(rng, W=conv_w, b=conv_b, input=layer_type_input1, image_shape=(type_size, 1, tup_representation_size, type_context_size), filter_shape=filter_shape_type, poolsize=pool_size_type) layer1_type_output = layer1_conv_type.output layer1_type_flattened = layer1_type_output.flatten(2) types_shaped = x_type.reshape((type_size, embedding_size)) layer2_type_input = T.concatenate([layer1_type_flattened, types_shaped], axis=1) layer2_type_input_size = nkerns[0]**pool[1] + embedding_size # ranking based max margin loss layer train_types_signal = x_train_types.reshape((type_size, 1)) pos_neg_flag = x_pos_neg_flag.reshape((batch_size, 1)) layer3 = MaxRankingMarginCosine1(rng=rng, input=layer2_input, input_label=layer2_type_input, true_label=y_vector, n_in=layer2_type_input_size, margin=margin, batch_size=batch_size, type_size=type_size, train_type_signal=train_types_signal, pos_neg_flag=pos_neg_flag) cost = layer3.loss # create a list of all model parameters to be fit by gradient descent param_list = [ compose_layer.params, layer1_conv.params, compose_type_layer.params ] params = [] for p in param_list: params += p # the cost we minimize during training is the NLL of the model lambd1 = T.scalar('lambda1', dt) lambd2 = T.scalar('lambda2', dt) # L1 and L2 regularization possible reg2 = 0 reg1 = 0 for p in param_list: reg2 += T.sum(p[0]**2) reg1 += T.sum(abs(p[0])) cost += lambd2 * reg2 cost += lambd1 * reg1 lr = T.scalar('lr', dt) start = index * batch_size end = (index + 1) * batch_size testVariables = {} testVariables[x_content] = test_set_content[start:end] testVariables[x_trigger] = test_set_trigger[start:end] testVariables[x_relation_binary] = test_set_relation_binary[start:end] testVariables[x_type] = train_set_type testVariables[x_type_structure] = train_set_type_structure testVariables[y] = test_set_y[start:end] testVariables[y_vector] = test_set_y_vector[start:end] testVariables[x_train_types] = train_types testVariables[x_pos_neg_flag] = test_set_posneg[start:end] print("length of train variables ", len(testVariables)) # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by SGD Since this model has many parameters, # it would be tedious to manually create an update rule for each model parameter. We thus create the updates # list by automatically looping over all (params[i],grads[i]) pairs. updates = [] for param_i, grad_i in zip(params, grads): updates.append((param_i, param_i - lr * grad_i)) test_model_confidence = theano.function([index], layer3.results(y), on_unused_input='ignore', givens=testVariables) time2 = time.time() print("time for building the model: ", time2 - time1) print("loading saved network") netfile = open(network_file) relW = cPickle.load(netfile) compose_layer.params[0].set_value(relW, borrow=True) convolW = cPickle.load(netfile) convolB = cPickle.load(netfile) layer1_conv.params[0].set_value(convolW, borrow=True) layer1_conv.params[1].set_value(convolB, borrow=True) layer1_conv_type.params[0].set_value(convolW, borrow=True) layer1_conv_type.params[1].set_value(convolB, borrow=True) typeW = cPickle.load(netfile) compose_type_layer.params[0].set_value(typeW, borrow=True) netfile.close() print("finish loading network") test_batch_size = 100 all_batches = len(result_index_test_matrix) / test_batch_size confidence_prob = [] confidence_value = [] confidence_list = [] confidence = [test_model_confidence(i) for i in xrange(all_batches)] for r in range(0, len(confidence)): for r1 in range(0, test_batch_size): hypo_result = confidence[r][0].item(r1) confidence_prob.append(confidence[r][2][r1]) confidence_value.append(confidence[r][1][r1]) confidence_list.append(hypo_result) y_pred = confidence_list f = open(test_result_file, "w") for i in range(0, len(y_pred)): f.write(str(y_pred[i]) + "\t" + str(confidence_value[i]) + "\t") for j in range(0, type_size): f.write(str(confidence_prob[i][j]) + " ") f.write("\n") f.close()
def build_model(self, flag_preserve_params=False): ################### # build the model # logging.info('... building the model') # allocate symbolic variables for the data self.index = T.lscalar() # index to a [mini]batch self.x = T.matrix('x') # the data is presented as rasterized images # self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels, used to represent labels given by # data # the y as features, used for taking in intermediate layer "y" values self.y = T.matrix('y') # Reshape matrix of rasterized images of shape (batch_size,28*28) # to a 4D tensor, compatible with our LeNetConvPoolLayer self.layer0_input = self.x.reshape((self.batch_size, self.img_dim, self.img_size, self.img_size)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1,28-5+1)=(24,24) # maxpooling reduces this further to (24/2,24/2) = (12,12) # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12) self.layer0 = LeNetConvPoolLayer(self.rng, input=self.layer0_input, image_shape=(self.batch_size, self.img_dim, self.img_size, self.img_size), filter_shape=(self.nkerns[0], self.img_dim, self.filtersize[0], self.filtersize[0]), poolsize=(self.poolsize[0], self.poolsize[0]), activation=self.conv_activation) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1,12-5+1)=(8,8) # maxpooling reduces this further to (8/2,8/2) = (4,4) # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4) self.img_size1 = (self.img_size - self.filtersize[0] + 1) / self.poolsize[0] self.layer1 = LeNetConvPoolLayer(self.rng, input=self.layer0.output, image_shape=(self.batch_size, self.nkerns[0], self.img_size1, self.img_size1), filter_shape=(self.nkerns[1], self.nkerns[0], self.filtersize[1], self.filtersize[1]), poolsize=(self.poolsize[1], self.poolsize[1]), activation=self.conv_activation) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size,num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (20,32*4*4) = (20,512) self.layer2_input = self.layer1.output.flatten(2) self.img_size2 = (self.img_size1 - self.filtersize[1] + 1) / self.poolsize[1] # construct a fully-connected sigmoidal layer self.layer2 = HiddenLayer(self.rng, input=self.layer2_input, n_in=self.nkerns[1] * self.img_size2 * self.img_size2, n_out=self.num_hidden, activation=self.hidden_activation) # classify the values of the fully-connected sigmoidal layer self.layer3 = LogisticRegression(input=self.layer2.output, n_in=self.num_hidden, n_out=self.num_class, activation=self.logreg_activation) # regularization term self.decay_hidden = self.alpha_l1 * abs(self.layer2.W).sum() + \ self.alpha_l2 * (self.layer2.W ** 2).sum() self.decay_softmax = self.alpha_l1 * abs(self.layer3.W).sum() + \ self.alpha_l2 * (self.layer3.W ** 2).sum() # there's different choices of cost models if self.cost_type == 'nll_softmax': # the cost we minimize during training is the NLL of the model self.y = T.ivector('y') # index involved so has to use integer self.cost = self.layer3.negative_log_likelihood(self.y) + \ self.decay_hidden + self.decay_softmax + \ self.alpha_entropy * self.layer3.p_y_entropy elif self.cost_type == 'ssd_softmax': self.cost = T.mean((self.layer3.p_y_given_x - self.y) ** 2) + \ self.decay_hidden + self.decay_softmax elif self.cost_type == 'ssd_hidden': self.cost = T.mean((self.layer2.output - self.y) ** 2) + \ self.decay_hidden elif self.cost_type == 'ssd_conv': self.cost = T.mean((self.layer2_input - self.y) ** 2) # create a list of all model parameters to be fit by gradient descent # preserve parameters if the exist, used for keep parameter while # changing # some of the theano functions # but the user need to be aware that if the parameters should be kept # only if the network structure doesn't change if flag_preserve_params and hasattr(self, 'params'): pass params_temp = copy.deepcopy(self.params) else: params_temp = None self.params = self.layer3.params + self.layer2.params + self.layer1.params + self.layer0.params # if needed, assign old parameters if flag_preserve_params and (params_temp is not None): for ind in range(len(params_temp)): self.params[ind].set_value(params_temp[ind].get_value(), borrow=True) # create a list of gradients for all model parameters self.grads = T.grad(self.cost, self.params, disconnected_inputs='warn') # error function from the last layer logistic regression self.errors = self.layer3.errors
def build_lenet(config): rng = np.random.RandomState(23455) x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector image_width = config.image_width batch_size = config.batch_size image_size = image_width**2 x_shared = T.cast(theano.shared(np.random.rand(batch_size, image_size), borrow=True), theano.config.floatX) y_shared = T.cast(theano.shared(np.random.randint(config.ydim, size=batch_size), borrow=True), 'int32') layer0_input = x.reshape((batch_size, 1, image_width, image_width)) # construct the first convolutional pooling layer layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 1, image_width, image_width), filter_shape=(config.num_kerns[0], 1, 5, 5), poolsize=(2, 2) ) # construct the second convolutional pooling layer layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, config.num_kerns[0], 12, 12), filter_shape=(config.num_kerns[1], config.num_kerns[0], 5, 5), poolsize=(2, 2) ) layer2_input = layer1.output.flatten(2) # construct a fully-connected layer layer2 = HiddenLayer( rng, input=layer2_input, n_in=config.num_kerns[1] * 4 * 4, n_out=500, activation=relu ) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=config.ydim) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a list of all model parameters to be fit by gradient descent params_W = [layer3.W, layer2.W, layer1.W, layer0.W] params_b = [layer3.b, layer2.b, layer1.b, layer0.b] params = params_W + params_b shared_cost = theano.shared(np.float32(0.0)) grads_temp = T.grad(cost, params) start_compilation = time.time() forward_step = theano.function([], [], updates=[(shared_cost, cost)], givens={x: x_shared, y: y_shared}) forward_backward_step = theano.function([], grads_temp, givens={x: x_shared, y: y_shared}) print 'compilation time: %.4f s' % (time.time() - start_compilation) return forward_step, forward_backward_step
# the convolution weight matrix convW = theano.shared(numpy.asarray(rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX), name='conv_W', borrow=True) # the bias is a 1D tensor -- one bias per output feature map b_values = numpy.zeros((filter_shape[0], ), dtype=theano.config.floatX) convB = theano.shared(value=b_values, name='conv_b', borrow=True) layer0a = LeNetConvPoolLayer(rng, W=convW, b=convB, input=layer0a_input, image_shape=(xa.shape[0], 1, ishape[0], ishape[1]), filter_shape=filter_shape, poolsize=poolsize) layer0b = LeNetConvPoolLayer(rng, W=convW, b=convB, input=layer0b_input, image_shape=(xb.shape[0], 1, ishape[0], ishape[1]), filter_shape=filter_shape, poolsize=poolsize) layer0c = LeNetConvPoolLayer(rng, W=convW, b=convB, input=layer0c_input,
def main(args): # initial parameters embedding_size = args.embedding_size arg_context_size = args.arg_context_size role_context_size = args.role_context_size embedding_file = args.embedding_path hidden_units = args.hidden_units learning_rate = args.learning_rate margin = args.margin batch_size = args.batch_size n_epochs = args.num_epochs relation_size = 82 nkerns = [500] filter_size = [1, 1] pool = [1, 1] l1 = 0.001 l2 = 0.002 newbob = False arg_network_file = args.model_path arg_train_file = args.train arg_dev_file = args.dev arg_test_file = args.test arg_label_file = args.ontology_path arg_label_file_norm = args.norm_ontology_path relation_file = args.relation_path train_role_flag = args.seen_args arg_path_file_merge = args.arg_path_file arg_path_file_universal = args.arg_path_file_universal trigger_role_matrix_file = args.trigger_role_matrix tup_representation_size = embedding_size * 2 # load word vectors word_vectors, vector_size = load_word_vec(embedding_file) # read train and dev file print("start loading train and dev file ... ") arg_trigger_list_train, arg_trigger_type_list_train, arg_list_train, arg_path_left_list_train, \ arg_path_rel_list_train, arg_path_right_list_train, arg_role_list_train = load_arg_data(arg_train_file) arg_trigger_list_dev, arg_trigger_type_list_dev, arg_list_dev, arg_path_left_list_dev, \ arg_path_rel_list_dev, arg_path_right_list_dev, arg_role_list_dev = load_arg_data(arg_dev_file) arg_trigger_list_test, arg_trigger_type_list_test, arg_list_test, arg_path_left_list_test, \ arg_path_rel_list_test, arg_path_right_list_test, arg_role_list_test = load_arg_data(arg_test_file) num_examples_per_epoch = len(arg_trigger_list_train) print("start loading arg and relation files ... ") all_type_list, all_type_structures = load_types(arg_label_file_norm) type_size = len(all_type_list) all_arg_role_list, all_type_role_structures, index_2_role, trigger_role_2_index, index_2_norm_role, \ trigger_norm_role_2_index = load_roles_1(arg_path_file_merge) role_size = len(all_arg_role_list) trigger_role_matrix = get_trigger_arg_matrix(trigger_role_matrix_file, type_size, role_size) train_roles = get_roles_for_train_1(train_role_flag, arg_path_file_merge) rel_2_index, index_2_rel = read_relation_index(relation_file) relation_matrix = random_init_rel_vec_factor( relation_file, tup_representation_size * tup_representation_size) print("start preparing data structures ... ") curSeed = 23455 rng = numpy.random.RandomState(curSeed) seed = rng.get_state()[1][0] print("seed: ", seed) # arg data matrix role_index_train_matrix, role_vector_train_matrix, input_arg_context_train_matrix, input_arg_train_matrix, \ arg_relation_binary_train_matrix, pos_neg_role_train_matrix, limited_roles_train_matrix = \ input_arg_matrix(arg_trigger_list_train, arg_trigger_type_list_train, arg_list_train, arg_path_left_list_train, arg_path_rel_list_train, arg_path_right_list_train, arg_role_list_train, word_vectors, all_arg_role_list, trigger_role_2_index, vector_size, arg_context_size, relation_size, rel_2_index, train_roles, trigger_role_matrix, arg_label_file) role_index_dev_matrix, role_vector_dev_matrix, input_arg_context_dev_matrix, input_arg_dev_matrix, \ arg_relation_binary_dev_matrix, pos_neg_role_dev_matrix, limited_roles_dev_matrix = \ input_arg_matrix_test(arg_trigger_list_dev, arg_trigger_type_list_dev, arg_list_dev, arg_path_left_list_dev, arg_path_rel_list_dev,arg_path_right_list_dev, arg_role_list_dev, word_vectors, all_arg_role_list, trigger_role_2_index, vector_size, arg_context_size, relation_size, rel_2_index, train_roles, trigger_role_matrix, arg_label_file) role_index_test_matrix, role_vector_test_matrix, input_arg_context_test_matrix, input_arg_test_matrix, \ arg_relation_binary_test_matrix, pos_neg_role_test_matrix, limited_roles_test_matrix = \ input_arg_matrix_test(arg_trigger_list_test, arg_trigger_type_list_test, arg_list_test, arg_path_left_list_test, arg_path_rel_list_test, arg_path_right_list_test, arg_role_list_test, word_vectors, all_arg_role_list, trigger_role_2_index, vector_size, arg_context_size, relation_size, rel_2_index, train_roles, trigger_role_matrix, arg_label_file) input_role_matrix, input_role_structure_matrix = role_matrix_1( all_arg_role_list, all_type_role_structures, embedding_file, role_context_size) time1 = time.time() dt = theano.config.floatX ## arg data train_set_content_arg = theano.shared( numpy.matrix(input_arg_context_train_matrix, dtype=dt)) valid_set_content_arg = theano.shared( numpy.matrix(input_arg_context_dev_matrix, dtype=dt)) test_set_content_arg = theano.shared( numpy.matrix(input_arg_context_test_matrix, dtype=dt)) train_set_arg = theano.shared( numpy.matrix(input_arg_train_matrix, dtype=dt)) valid_set_arg = theano.shared(numpy.matrix(input_arg_dev_matrix, dtype=dt)) test_set_arg = theano.shared(numpy.matrix(input_arg_test_matrix, dtype=dt)) train_set_relation_binary_arg = theano.shared( numpy.matrix(arg_relation_binary_train_matrix, dtype=dt)) valid_set_relation_binary_arg = theano.shared( numpy.matrix(arg_relation_binary_dev_matrix, dtype=dt)) test_set_relation_binary_arg = theano.shared( numpy.matrix(arg_relation_binary_test_matrix, dtype=dt)) train_set_posneg_arg = theano.shared( numpy.matrix(pos_neg_role_train_matrix, dtype=dt)) valid_set_posneg_arg = theano.shared( numpy.matrix(pos_neg_role_dev_matrix, dtype=dt)) test_set_posneg_arg = theano.shared( numpy.matrix(pos_neg_role_test_matrix, dtype=dt)) train_set_arg_y = theano.shared( numpy.array(role_index_train_matrix, dtype=numpy.dtype(numpy.int32))) valid_set_arg_y = theano.shared( numpy.array(role_index_dev_matrix, dtype=numpy.dtype(numpy.int32))) test_set_arg_y = theano.shared( numpy.array(role_index_test_matrix, dtype=numpy.dtype(numpy.int32))) train_set_arg_y_vector = theano.shared( numpy.matrix(role_vector_train_matrix, dtype=dt)) valid_set_arg_y_vector = theano.shared( numpy.matrix(role_vector_dev_matrix, dtype=dt)) test_set_arg_y_vector = theano.shared( numpy.matrix(role_vector_test_matrix, dtype=dt)) train_set_arg_limited_role = theano.shared( numpy.matrix(limited_roles_train_matrix, dtype=dt)) valid_set_arg_limited_role = theano.shared( numpy.matrix(limited_roles_dev_matrix, dtype=dt)) test_set_arg_limited_role = theano.shared( numpy.matrix(limited_roles_test_matrix, dtype=dt)) train_set_role = theano.shared(numpy.matrix(input_role_matrix, dtype=dt)) train_set_role_structure = theano.shared( numpy.matrix(input_role_structure_matrix, dtype=dt)) train_roles = theano.shared(numpy.matrix(train_roles, dtype=dt)) # compute number of minibatches for training, validation and testing n_train_batches = input_arg_train_matrix.shape[0] n_valid_batches = input_arg_dev_matrix.shape[0] n_test_batches = input_arg_test_matrix.shape[0] n_train_batches /= batch_size n_valid_batches /= batch_size n_test_batches /= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x_content_arg = T.matrix('x_content_arg') x_arg = T.matrix('x_arg') x_relation_binary_arg = T.matrix('x_relation_binary_arg') x_pos_neg_flag_arg = T.matrix('x_pos_neg_flag_arg') x_role = T.matrix('x_role') x_role_structure = T.matrix('x_role_structure') x_train_roles = T.matrix('x_train_roles') arg_y = T.ivector('arg_y') arg_y_vector = T.matrix('arg_y_vector') arg_limited_role = T.matrix('arg_limited_role') # [int] labels ishape = [tup_representation_size, arg_context_size] # this is the size of context matrizes time2 = time.time() print("time for preparing data structures: ", time2 - time1) # build the actual model print('start building the model ... ') time1 = time.time() # argument representation layer layer0_arg_input = x_content_arg.reshape( (batch_size, 1, ishape[0], ishape[1])) layer0_input_binary_relation = x_relation_binary_arg.reshape( (batch_size, 1, relation_size, ishape[1])) ## 100*1*26*5 # compose amr relation matrix to each tuple rel_w = theano.shared(value=relation_matrix, borrow=True) ## 26*400 compose_layer = ComposeLayerMatrix( input=layer0_arg_input, input_binary_relation=layer0_input_binary_relation, rel_w=rel_w, rel_vec_size=tup_representation_size) layer1_input = compose_layer.output filter_shape = (nkerns[0], 1, tup_representation_size, filter_size[1]) pool_size = (pool[0], pool[1]) fan_in = numpy.prod(filter_shape[1:]) fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(pool_size)) w_bound = numpy.sqrt(6. / (fan_in + fan_out)) conv_w = theano.shared(numpy.asarray(rng.uniform(low=-w_bound, high=w_bound, size=filter_shape), dtype=theano.config.floatX), borrow=True) b_values = numpy.zeros((filter_shape[0], ), dtype=theano.config.floatX) conv_b = theano.shared(value=b_values, borrow=True) layer1_arg_conv = LeNetConvPoolLayer(rng, W=conv_w, b=conv_b, input=layer1_input, image_shape=(batch_size, 1, ishape[0], arg_context_size), filter_shape=filter_shape, poolsize=pool_size) layer1_arg_output = layer1_arg_conv.output layer1_arg_flattened = layer1_arg_output.flatten(2) arg_features_shaped = x_arg.reshape((batch_size, embedding_size)) layer2_arg_input = T.concatenate( [layer1_arg_flattened, arg_features_shaped], axis=1) layer2_arg_input_size = nkerns[0] * pool[1] + embedding_size # arg role representation layer layer_role_input = x_role_structure.reshape( (role_size, 1, tup_representation_size, role_context_size)) filter_shape_role = (nkerns[0], 1, tup_representation_size, filter_size[1]) pool_size_role = (pool[0], pool[1]) # initialize the implicit relation tensor type_tensor_shape = (tup_representation_size, tup_representation_size, tup_representation_size) type_tensor_w = theano.shared(numpy.asarray(rng.uniform( low=-w_bound, high=w_bound, size=type_tensor_shape), dtype=theano.config.floatX), borrow=True) # compose relation tensor to each tuple compose_type_layer = ComposeLayerTensor(input=layer_role_input, tensor=type_tensor_w) layer_type_input1 = compose_type_layer.output layer1_conv_role = LeNetConvPoolLayer(rng, W=conv_w, b=conv_b, input=layer_type_input1, image_shape=(role_size, 1, tup_representation_size, role_context_size), filter_shape=filter_shape_role, poolsize=pool_size_role) layer1_role_output = layer1_conv_role.output layer1_role_flattened = layer1_role_output.flatten(2) role_shaped = x_role.reshape((role_size, embedding_size)) layer2_role_input = T.concatenate([layer1_role_flattened, role_shaped], axis=1) layer2_role_input_size = nkerns[0]**pool[1] + embedding_size # ranking based max margin loss layer train_roles_signal = x_train_roles.reshape((role_size, 1)) pos_neg_flag_arg = x_pos_neg_flag_arg.reshape((batch_size, 1)) limited_role = arg_limited_role.reshape((batch_size, role_size)) layer3 = MaxRankingMarginCosine1Arg1(rng=rng, input=layer2_arg_input, input_label=layer2_role_input, true_label=arg_y_vector, n_in=layer2_arg_input_size, n_in2=layer2_role_input_size, margin=margin, batch_size=batch_size, type_size=role_size, train_type_signal=train_roles_signal, pos_neg_flag=pos_neg_flag_arg, limited_role=limited_role) # cost and parameters update cost = layer3.loss # create a list of all model parameters to be fit by gradient descent param_list = [ layer1_arg_conv.params, compose_layer.params, compose_type_layer.params ] params = [] for p in param_list: params += p # the cost we minimize during training is the NLL of the model lambd1 = T.scalar('lambda1', dt) lambd2 = T.scalar('lambda2', dt) # L1 and L2 regularization possible reg2 = 0 reg1 = 0 for p in param_list: reg2 += T.sum(p[0]**2) reg1 += T.sum(abs(p[0])) print("reg1 ", reg1) print("reg2 ", reg2) cost += lambd2 * reg2 cost += lambd1 * reg1 lr = T.scalar('lr', dt) start = index * batch_size end = (index + 1) * batch_size validVariables = {} validVariables[x_content_arg] = valid_set_content_arg[start:end] validVariables[x_arg] = valid_set_arg[start:end] validVariables[x_role] = train_set_role validVariables[x_role_structure] = train_set_role_structure validVariables[x_relation_binary_arg] = valid_set_relation_binary_arg[ start:end] validVariables[arg_y] = valid_set_arg_y[start:end] validVariables[arg_y_vector] = valid_set_arg_y_vector[start:end] validVariables[x_train_roles] = train_roles validVariables[x_pos_neg_flag_arg] = valid_set_posneg_arg[start:end] validVariables[arg_limited_role] = valid_set_arg_limited_role[start:end] testVariables = {} testVariables[x_content_arg] = test_set_content_arg[start:end] testVariables[x_arg] = test_set_arg[start:end] testVariables[x_role] = train_set_role testVariables[x_role_structure] = train_set_role_structure testVariables[x_relation_binary_arg] = test_set_relation_binary_arg[ start:end] testVariables[arg_y] = test_set_arg_y[start:end] testVariables[arg_y_vector] = test_set_arg_y_vector[start:end] testVariables[x_train_roles] = train_roles testVariables[x_pos_neg_flag_arg] = test_set_posneg_arg[start:end] testVariables[arg_limited_role] = test_set_arg_limited_role[start:end] trainVariables = {} trainVariables[x_content_arg] = train_set_content_arg[start:end] trainVariables[x_arg] = train_set_arg[start:end] trainVariables[x_role] = train_set_role trainVariables[x_role_structure] = train_set_role_structure trainVariables[x_relation_binary_arg] = train_set_relation_binary_arg[ start:end] trainVariables[arg_y] = train_set_arg_y[start:end] trainVariables[arg_y_vector] = train_set_arg_y_vector[start:end] trainVariables[x_train_roles] = train_roles trainVariables[x_pos_neg_flag_arg] = train_set_posneg_arg[start:end] trainVariables[arg_limited_role] = train_set_arg_limited_role[start:end] print("length of train variables ", trainVariables) # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by SGD Since this model has many parameters, # it would be tedious to manually create an update rule for each model parameter. We thus create the updates # list by automatically looping over all (params[i],grads[i]) pairs. updates = [] rho = 0.9 epsilon = 1e-6 # for param_i, grad_i in zip(params, grads): # updates.append((param_i, param_i - lr * grad_i)) for p, g in zip(params, grads): acc = theano.shared(p.get_value() * 0.) acc_new = rho * acc + (1 - rho) * g**2 gradient_scaling = T.sqrt(acc_new + epsilon) g = g / gradient_scaling updates.append((acc, acc_new)) updates.append((p, p - lr * g)) test_model_confidence = theano.function([index], layer3.results(arg_y), on_unused_input='ignore', givens=testVariables) eval_model_confidence = theano.function([index], layer3.results(arg_y), on_unused_input='ignore', givens=validVariables) train_model = theano.function([index, lr, lambd1, lambd2], [cost, layer3.loss], updates=updates, on_unused_input='ignore', givens=trainVariables) time2 = time.time() print("time for building the model: ", time2 - time1) # train the model print('start training ... ') time1 = time.time() validation_frequency = num_examples_per_epoch / batch_size # validate after each epoch best_params = [] best_fscore = -1 last_fscore = -1 best_fscore_m1 = -1 best_iter = 0 best_fscoreEval = -1 best_fscore_m1Eval = -1 best_iterEval = 0 start_time = time.clock() epoch = 0 done_looping = False maxNoImprovement = 5 noImprovement = 0 while (epoch < n_epochs) and (not done_looping): print('epoch = ', epoch) epoch = epoch + 1 this_n_train_batches = num_examples_per_epoch / batch_size for minibatch_index in xrange(this_n_train_batches): iter = (epoch - 1) * this_n_train_batches + minibatch_index if iter % 100 == 0: print('training @ iter = ', iter) cost_ij, loss = train_model(minibatch_index, learning_rate, l1, l2) print("cost: ", cost_ij) print("loss1: ", loss) if (iter + 1) % validation_frequency == 0: # test data confidence_eval = [ test_model_confidence(i) for i in xrange(n_test_batches) ] confidence_list_eval = [] for r in range(0, len(confidence_eval)): for r1 in range(0, batch_size): hypo_result_eval = confidence_eval[r][0].item(r1) confidence_list_eval.append(hypo_result_eval) y_pred_eval = confidence_list_eval y_true_eval = role_index_test_matrix[:n_test_batches * batch_size] y_true_eval_2 = [] for i in range(len(y_true_eval)): y_true_eval_2.append(int(y_true_eval[i])) labels1 = [13, 14, 15, 16, 17] this_fscore_eval = f1_score(y_true_eval_2, y_pred_eval, labels=labels1, average='micro') this_fscore_macro_eval = f1_score(y_true_eval_2, y_pred_eval, labels=labels1, average='macro') print( 'EVAL: *** epoch %i, best_validation %f, best_validation_m1 %f, learning_rate %f, ' 'minibatch %i/%i, validation fscore %f %%' % (epoch, best_fscoreEval * 100., best_fscore_m1Eval * 100, learning_rate, minibatch_index + 1, this_n_train_batches, this_fscore_eval * 100.)) if this_fscore_eval > best_fscoreEval: best_fscoreEval = this_fscore_eval best_fscore_m1Eval = this_fscore_macro_eval best_iterEval = iter # dev data confidence = [ eval_model_confidence(i) for i in xrange(n_valid_batches) ] confidence_list = [] for r in range(0, len(confidence)): for r1 in range(0, batch_size): hypo_result = confidence[r][0].item(r1) confidence_list.append(hypo_result) y_pred = confidence_list y_true = role_index_dev_matrix[:n_valid_batches * batch_size] y_true_2 = [] for i in range(len(y_true)): y_true_2.append(int(y_true[i])) labels = [] for i in range(1, role_size): labels.append(i) this_fscore = f1_score(y_true_2, y_pred, labels=labels, average='micro') this_fscore_macro = f1_score(y_true_2, y_pred, labels=labels, average='macro') print( 'epoch %i, best_validation %f, best_validation_m1 %f, learning_rate %f, minibatch %i/%i, ' 'validation fscore %f %%' % (epoch, best_fscore * 100., best_fscore_m1 * 100, learning_rate, minibatch_index + 1, this_n_train_batches, this_fscore * 100.)) # if we got the best validation score until now if this_fscore > best_fscore: best_fscore = this_fscore best_fscore_m1 = this_fscore_macro best_iter = iter best_params = [] for p in param_list: p_param = [] for part in p: p_param.append(part.get_value(borrow=False)) best_params.append(p_param) noImprovement = 0 else: if this_fscore > last_fscore: noImprovement -= 1 noImprovement = max(noImprovement, 0) else: noImprovement += 1 updatestep = minibatch_index + this_n_train_batches * ( epoch - 1) if newbob: # learning rate schedule depending on dev result learning_rate /= 1.2 print("reducing learning rate to ", learning_rate) last_fscore = this_fscore if newbob: # learning rate schedule depending on dev result if noImprovement > maxNoImprovement or learning_rate < 0.0000001: done_looping = True break if not newbob: if epoch + 1 > 10: learning_rate /= 1.2 print("reducing learning rate to ", learning_rate) if epoch + 1 > 50: done_looping = True break end_time = time.clock() print('Optimization complete.') print( 'Best validation score of %f %% obtained for c=%i, nk=%i, f=%i, h=%i at iteration %i,' % (best_fscore * 100., arg_context_size, nkerns[0], filter_size[1], hidden_units, best_iter + 1)) time2 = time.time() print("time for training: ", time2 - time1) print('Saving net.') save_file = open(arg_network_file, 'wb') for p in best_params: for p_part in p: cPickle.dump(p_part, save_file, -1) save_file.close()
def __init__(self, nkerns=[48, 48, 48], miniBatchSize=200, nHidden=200, nClasses=2, nMaxPool=2, nChannels=1): """ nClasses : the number of target classes (e.g. 2 for binary classification) nMaxPool : number of pixels to max pool nChannels : number of input channels (e.g. 1 for single grayscale channel) """ rng = numpy.random.RandomState(23455) self.p = 65 self.miniBatchSize = miniBatchSize # Note: self.x and self.y will be re-bound to a subset of the # training/validation/test data dynamically by the update # stage of the appropriate function. self.x = T.tensor4('x') # membrane mini-batch self.y = T.ivector('y') # 1D vector of [int] labels # We now assume the input will already be reshaped to the # proper size (i.e. we don't need a theano resize op here). layer0_input = self.x #-------------------------------------------------- # LAYER 0 # layer0 convolution+max pool reduces image dimensions by: # 65 -> 62 -> 31 #-------------------------------------------------- fs0 = 4 # conv. filter size, layer 0 os0 = (self.p - fs0 + 1) / nMaxPool # image out size 0 assert (os0 == 31) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(self.miniBatchSize, nChannels, self.p, self.p), filter_shape=(nkerns[0], nChannels, fs0, fs0), poolsize=(nMaxPool, nMaxPool)) #-------------------------------------------------- # LAYER 1 # layer1 convolution+max pool reduces image dimensions by: # 31 -> 28 -> 14 #-------------------------------------------------- fs1 = 4 # filter size, layer 1 os1 = (os0 - fs1 + 1) / nMaxPool # image out size 1 assert (os1 == 14) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(self.miniBatchSize, nkerns[0], os0, os0), filter_shape=(nkerns[1], nkerns[0], fs1, fs1), poolsize=(nMaxPool, nMaxPool)) #-------------------------------------------------- # LAYER 2 # layer2 convolution+max pool reduces image dimensions by: # 14 -> 10 -> 5 #-------------------------------------------------- fs2 = 5 os2 = (os1 - fs2 + 1) / nMaxPool assert (os2 == 5) layer2 = LeNetConvPoolLayer(rng, input=layer1.output, image_shape=(self.miniBatchSize, nkerns[1], os1, os1), filter_shape=(nkerns[2], nkerns[1], fs2, fs2), poolsize=(nMaxPool, nMaxPool)) #-------------------------------------------------- # LAYER 3 # Fully connected sigmoidal layer, goes from # 5*5*48 -> 200 #-------------------------------------------------- layer3_input = layer2.output.flatten(2) layer3 = HiddenLayer(rng, input=layer3_input, n_in=nkerns[2] * os2 * os2, n_out=nHidden, activation=T.tanh) #-------------------------------------------------- # LAYER 4 # Classification via a logistic regression layer # 200 -> 2 #-------------------------------------------------- # classify the values of the fully-connected sigmoidal layer layer4 = LogisticRegression(input=layer3.output, n_in=nHidden, n_out=nClasses) self.layers = (layer0, layer1, layer2, layer3, layer4)
def start(inputfile): global in_time, out_time, cooldown_in_time, cooldown_out_time, classify global global_counter, winner_stride, cur_state, in_frame_num, actions_counter global test_set_x, test_set_y, shared_test_set_y rng = numpy.random.RandomState(23455) # ####################### build start ######################## # create an empty shared variables to be filled later data_x = numpy.zeros([1, 20 * 50 * 50]) data_y = numpy.zeros(20) train_set = (data_x, data_y) (test_set_x, test_set_y, shared_test_set_y) = \ shared_dataset(train_set) print 'building ... ' batch_size = 1 # allocate symbolic variables for the data index = T.lscalar() x = T.matrix('x') y = T.ivector('y') # image size layer0_w = 50 layer0_h = 50 layer1_w = (layer0_w - 4) / 2 layer1_h = (layer0_h - 4) / 2 layer2_w = (layer1_w - 2) / 2 layer2_h = (layer1_h - 2) / 2 layer3_w = (layer2_w - 2) / 2 layer3_h = (layer2_h - 2) / 2 # ##################### # BUILD ACTUAL MODEL # # ##################### # image sizes batchsize = batch_size in_channels = 20 in_width = 50 in_height = 50 # filter sizes flt_channels = 40 flt_time = 20 flt_width = 5 flt_height = 5 signals_shape = (batchsize, in_channels, in_height, in_width) filters_shape = (flt_channels, in_channels, flt_height, flt_width) layer0_input = x.reshape(signals_shape) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=signals_shape, filter_shape=filters_shape, poolsize=(2, 2)) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, flt_channels, layer1_w, layer1_h), filter_shape=(60, flt_channels, 3, 3), poolsize=(2, 2)) layer2 = LeNetConvPoolLayer(rng, input=layer1.output, image_shape=(batch_size, 60, layer2_w, layer2_h), filter_shape=(90, 60, 3, 3), poolsize=(2, 2)) layer3_input = layer2.output.flatten(2) layer3 = HiddenLayer(rng, input=layer3_input, n_in=90 * layer3_w * layer3_h, n_out=500, activation=T.tanh) layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=8) cost = layer4.negative_log_likelihood(y) classify = theano.function( [index], outputs=layer4.get_output_labels(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) # load weights print 'loading weights state' f = file('weights.save', 'rb') loaded_objects = [] for i in range(5): loaded_objects.append(cPickle.load(f)) f.close() layer0.__setstate__(loaded_objects[0]) layer1.__setstate__(loaded_objects[1]) layer2.__setstate__(loaded_objects[2]) layer3.__setstate__(loaded_objects[3]) layer4.__setstate__(loaded_objects[4]) # ####################### build done ######################## fromCam = False if fromCam: print 'using camera input' cap = cv2.VideoCapture(0) else: print 'using input file: ', inputfile cap = cv2.VideoCapture(inputfile) # my timing frame_rate = 5 frame_interval_ms = 1000 / frame_rate fourcc = cv2.VideoWriter_fourcc(*'XVID') video_writer = cv2.VideoWriter('../out/live_out.avi', fourcc, frame_rate, (640, 480)) frame_counter = 0 (ret, frame) = cap.read() proFrame = process_single_frame(frame) # init detectors st_a_det = RepDetector(proFrame, detector_strides[0]) st_b_det = RepDetector(proFrame, detector_strides[1]) st_c_det = RepDetector(proFrame, detector_strides[2]) frame_wise_counts = [] while True: in_frame_num += 1 if in_frame_num % 2 == 1: continue (ret, frame) = cap.read() if ret == 0: print 'unable to read frame' break proFrame = process_single_frame(frame) # handle stride A.... if frame_counter % st_a_det.stride_number == 0: st_a_det.count(proFrame) # handle stride B if frame_counter % st_b_det.stride_number == 0: st_b_det.count(proFrame) # handle stride C if frame_counter % st_c_det.stride_number == 0: st_c_det.count(proFrame) # display result on video................ blue_color = (130, 0, 0) green_color = (0, 130, 0) red_color = (0, 0, 130) orange_color = (0, 140, 0xFF) out_time = in_frame_num / 60 if cur_state == state.IN_REP and (out_time - in_time < 4 or global_counter < 5): draw_str(frame, (20, 120), ' new hypothesis (%d) ' % global_counter, orange_color, 1.5) if cur_state == state.IN_REP and out_time - in_time >= 4 \ and global_counter >= 5: draw_str( frame, (20, 120), 'action %d: counting... %d' % (actions_counter, global_counter), green_color, 2) if cur_state == state.COOLDOWN and global_counter >= 5: draw_str( frame, (20, 120), 'action %d: done. final counting: %d' % (actions_counter, global_counter), blue_color, 2) # print "pls", global_counter frame_wise_counts.append(global_counter) # print 'action %d: done. final counting: %d' % (actions_counter, global_counter) print "Dhruv", frame_wise_counts, global_counter return frame_wise_counts
def __init__(self, configfile, train = False): self.config = readConfig(configfile) self.addInputSize = 1 logger.info("additional mlp input") wordvectorfile = self.config["wordvectors"] logger.info("wordvectorfile " + str(wordvectorfile)) networkfile = self.config["net"] logger.info("networkfile " + str(networkfile)) hiddenunits = int(self.config["hidden"]) logger.info("hidden units " + str(hiddenunits)) hiddenunitsNER = 50 if "hiddenunitsNER" in self.config: hiddenunitsNER = int(self.config["hiddenunitsNER"]) logger.info("hidden units NER " + str(hiddenunitsNER)) learning_rate = float(self.config["lrate"]) logger.info("learning rate " + str(learning_rate)) if train: self.batch_size = int(self.config["batchsize"]) else: self.batch_size = 1 logger.info("batch size " + str(self.batch_size)) self.filtersize = [1,int(self.config["filtersize"])] nkerns = [int(self.config["nkerns"])] logger.info("nkerns " + str(nkerns)) pool = [1, int(self.config["kmax"])] self.contextsize = int(self.config["contextsize"]) logger.info("contextsize " + str(self.contextsize)) if self.contextsize < self.filtersize[1]: logger.info("setting filtersize to " + str(self.contextsize)) self.filtersize[1] = self.contextsize logger.info("filtersize " + str(self.filtersize)) sizeAfterConv = self.contextsize - self.filtersize[1] + 1 sizeAfterPooling = -1 if sizeAfterConv < pool[1]: logger.info("setting poolsize to " + str(sizeAfterConv)) pool[1] = sizeAfterConv sizeAfterPooling = pool[1] logger.info("kmax pooling: k = " + str(pool[1])) # reading word vectors self.wordvectors, self.vectorsize = readWordvectors(wordvectorfile) self.representationsize = self.vectorsize + 1 rng = numpy.random.RandomState(23455) if train: seed = rng.get_state()[1][0] logger.info("seed: " + str(seed)) # allocate symbolic variables for the data self.index = T.lscalar() # index to a [mini]batch self.xa = T.matrix('xa') # left context self.xb = T.matrix('xb') # middle context self.xc = T.matrix('xc') # right context self.y = T.imatrix('y') # label (only present in training) self.yNER1 = T.imatrix('yNER1') # label for first entity self.yNER2 = T.imatrix('yNER2') # label for second entity ishape = [self.representationsize, self.contextsize] # this is the size of context matrizes ###################### # BUILD ACTUAL MODEL # ###################### logger.info('... building the model') # Reshape input matrix to be compatible with our LeNetConvPoolLayer layer0a_input = self.xa.reshape((self.batch_size, 1, ishape[0], ishape[1])) layer0b_input = self.xb.reshape((self.batch_size, 1, ishape[0], ishape[1])) layer0c_input = self.xc.reshape((self.batch_size, 1, ishape[0], ishape[1])) self.y_reshaped = self.y.reshape((self.batch_size, 1)) yNER1reshaped = self.yNER1.reshape((self.batch_size, 1)) yNER2reshaped = self.yNER2.reshape((self.batch_size, 1)) # Construct convolutional pooling layer: filter_shape = (nkerns[0], 1, self.representationsize, self.filtersize[1]) poolsize=(pool[0], pool[1]) fan_in = numpy.prod(filter_shape[1:]) fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize)) W_bound = numpy.sqrt(6. / (fan_in + fan_out)) # the convolution weight matrix convW = theano.shared(numpy.asarray( rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX), borrow=True) # the bias is a 1D tensor -- one bias per output feature map b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) convB = theano.shared(value=b_values, borrow=True) self.layer0a = LeNetConvPoolLayer(rng, W=convW, b=convB, input=layer0a_input, image_shape=(self.batch_size, 1, ishape[0], ishape[1]), filter_shape=filter_shape, poolsize=poolsize) self.layer0b = LeNetConvPoolLayer(rng, W=convW, b=convB, input=layer0b_input, image_shape=(self.batch_size, 1, ishape[0], ishape[1]), filter_shape=filter_shape, poolsize=poolsize) self.layer0c = LeNetConvPoolLayer(rng, W=convW, b=convB, input=layer0c_input, image_shape=(self.batch_size, 1, ishape[0], ishape[1]), filter_shape=filter_shape, poolsize=poolsize) #layer0_output = T.concatenate([self.layer0a.output, self.layer0b.output, self.layer0c.output], axis = 3) layer0aflattened = self.layer0a.output.flatten(2).reshape((self.batch_size, nkerns[0] * sizeAfterPooling)) layer0bflattened = self.layer0b.output.flatten(2).reshape((self.batch_size, nkerns[0] * sizeAfterPooling)) layer0cflattened = self.layer0c.output.flatten(2).reshape((self.batch_size, nkerns[0] * sizeAfterPooling)) layer0_output = T.concatenate([layer0aflattened, layer0bflattened, layer0cflattened], axis = 1) self.layer1a = HiddenLayer(rng = rng, input = self.yNER1, n_in = 6, n_out = hiddenunitsNER, activation = T.tanh) self.layer1b = HiddenLayer(rng = rng, input = self.yNER2, n_in = 6, n_out = hiddenunitsNER, activation = T.tanh, W = self.layer1a.W, b = self.layer1a.b) layer2_input = T.concatenate([layer0_output, self.layer1a.output, self.layer1b.output], axis = 1) layer2_inputSize = 3 * nkerns[0] * sizeAfterPooling + 2 * hiddenunitsNER self.additionalFeatures = T.matrix('additionalFeatures') additionalFeatsShaped = self.additionalFeatures.reshape((self.batch_size, 1)) layer2_input = T.concatenate([layer2_input, additionalFeatsShaped], axis = 1) layer2_inputSize += self.addInputSize self.layer2 = HiddenLayer(rng, input=layer2_input, n_in=layer2_inputSize, n_out=hiddenunits, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer self.layer3 = LogisticRegression(input=self.layer2.output, n_in=hiddenunits, n_out=23) # create a list of all model parameters self.paramList = [self.layer3.params, self.layer2.params, self.layer1a.params, self.layer0a.params] self.params = [] for p in self.paramList: self.params += p logger.info(p) if not train: self.gotNetwork = 1 # load parameters if not os.path.isfile(networkfile): logger.error("network file does not exist") self.gotNetwork = 0 else: save_file = open(networkfile, 'rb') for p in self.params: p.set_value(cPickle.load(save_file), borrow=False) save_file.close()
in_channels = 20 in_width = 50 in_height = 50 #filter sizes flt_channels = 40 flt_time = 20 flt_width = 5 flt_height = 5 signals_shape = (batchsize, in_channels, in_height, in_width) filters_shape = (flt_channels, in_channels, flt_height, flt_width) layer0_input = x.reshape(signals_shape) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=signals_shape, filter_shape=filters_shape, poolsize=(2, 2)) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, flt_channels, layer1_w, layer1_h), filter_shape=(60, flt_channels, 3, 3), poolsize=(2, 2)) layer2 = LeNetConvPoolLayer(rng, input=layer1.output, image_shape=(batch_size, 60, layer2_w, layer2_h), filter_shape=(90, 60, 3, 3), poolsize=(2, 2)) layer3_input = layer2.output.flatten(2) layer3 = HiddenLayer(rng, input=layer3_input, n_in=90 * layer3_w * layer3_h , n_out=500, activation=T.tanh)