def __init__(self,K,node_num, nfeat, nhid, nclass, sampleSize, dropout,trainAttention): super(GAT, self).__init__() self.gc1 = GraphConvolution(K, node_num, nfeat, nhid, sampleSize[1],'False','True',trainAttention) self.gc2 = GraphConvolution(1, node_num, K*nhid, 14*nclass, sampleSize[0],'False','False',trainAttention) #self.gc3 = GraphConvolution(1, node_num, 4*7*nclass, 7*nclass, 'False','False') self.gc6 = LogisticRegression(14*nclass,1) self.dropout = dropout
def __init__(self,rng,params,cost_function='mse',optimizer = RMSprop): lr=params["lr"] batch_size=params["batch_size"] n_output=params['n_output'] corruption_level=params["corruption_level"] X = T.matrix(name="input",dtype=dtype) # batch of sequence of vector Y = T.matrix(name="output",dtype=dtype) # batch of sequence of vector is_train = T.iscalar('is_train') # pseudo boolean for switching between training and prediction bin_noise=rng.binomial(size=(batch_size,n_output/3,1), n=1,p=1 - corruption_level,dtype=theano.config.floatX) #bin_noise_3d= T.reshape(T.concatenate((bin_noise, bin_noise,bin_noise),axis=1),(batch_size,n_output/3,3)) bin_noise_3d= T.concatenate((bin_noise, bin_noise,bin_noise),axis=2) noise= rng.normal(size=(batch_size,n_output), std=0.03, avg=0.0,dtype=theano.config.floatX) noise_bin=T.reshape(noise,(batch_size,n_output/3,3))*bin_noise_3d X_train=T.reshape(noise_bin,(batch_size,n_output))+X X_tilde= T.switch(T.neq(is_train, 0), X_train, X) W_1_e =u.init_weight(shape=(n_output,1024),rng=rng,name="w_hid",sample="glorot") b_1_e=u.init_bias(1024,rng) W_2_e =u.init_weight(shape=(1024,2048),rng=rng,name="w_hid",sample="glorot") b_2_e=u.init_bias(2048,rng) W_2_d = W_2_e.T b_2_d=u.init_bias(1024,rng) W_1_d = W_1_e.T b_1_d=u.init_bias(n_output,rng) h_1_e=HiddenLayer(rng,X_tilde,0,0, W=W_1_e,b=b_1_e,activation=nn.relu) h_2_e=HiddenLayer(rng,h_1_e.output,0,0, W=W_2_e,b=b_2_e,activation=nn.relu) h_2_d=HiddenLayer(rng,h_2_e.output,0,0, W=W_2_d,b=b_2_d,activation=u.do_nothing) h_1_d=LogisticRegression(rng,h_2_d.output,0,0, W=W_1_d,b=b_1_d) self.output = h_1_d.y_pred self.params =h_1_e.params+h_2_e.params self.params.append(b_2_d) self.params.append(b_1_d) cost=get_err_fn(self,cost_function,Y) L2_reg=0.0001 L2_sqr = theano.shared(0.) for param in self.params: L2_sqr += (T.sum(param[0] ** 2)+T.sum(param[1] ** 2)) cost += L2_reg*L2_sqr _optimizer = optimizer(cost, self.params, lr=lr) self.train = theano.function(inputs=[X,Y,is_train],outputs=cost,updates=_optimizer.getUpdates(),allow_input_downcast=True) self.predictions = theano.function(inputs = [X,is_train], outputs = self.output,allow_input_downcast=True) self.mid_layer = theano.function(inputs = [X,is_train], outputs = h_2_e.output,allow_input_downcast=True) self.n_param=count_params(self.params)
def __init__(self, nkerns=[48], miniBatchSize=200): rng = numpy.random.RandomState(23455) nClasses = 2 nMaxPool = 2 nHidden = 200 self.p = 65 #self.x = T.tensor3('x') # membrane data set self.x = T.tensor4('x') # membrane mini-batch self.y = T.ivector('y') # 1D vector of [int] labels self.miniBatchSize = miniBatchSize # Reshape matrix of rasterized images # to a 4D tensor, # compatible with the LeNetConvPoolLayer #layer0_input = self.x.reshape((self.miniBatchSize, 1, self.p, self.p)) layer0_input = self.x #-------------------------------------------------- # LAYER 0 # layer0 convolution+max pool reduces image dimensions by: # 65 -> 62 -> 31 #-------------------------------------------------- fs0 = 4 # filter size, layer 0 os0 = (self.p - fs0 + 1) / nMaxPool # image out size 0 assert (os0 == 31) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(self.miniBatchSize, 1, self.p, self.p), filter_shape=(nkerns[0], 1, fs0, fs0), poolsize=(nMaxPool, nMaxPool)) #-------------------------------------------------- # LAYER 1 # Fully connected sigmoidal layer, goes from # X -> 200 #-------------------------------------------------- layer1_input = layer0.output.flatten(2) layer1 = HiddenLayer(rng, input=layer1_input, n_in=nkerns[0] * os0 * os0, n_out=nHidden, activation=T.tanh) #-------------------------------------------------- # LAYER 2 # Classification via a logistic regression layer # 200 -> 2 #-------------------------------------------------- # classify the values of the fully-connected sigmoidal layer layer2 = LogisticRegression(input=layer1.output, n_in=nHidden, n_out=nClasses) self.layers = (layer0, layer1, layer2)
def load_cls_for_simclr(cfg): simCLR,_,_ = load_simclr(cfg) logit = LogisticRegression(simCLR,cfg.cls.dataset.n_classes) if cfg.cls.load: model_fp = os.path.join( cfg.cls.model_path, "checkpoint_{}.tar".format(cfg.cls.epoch_num) ) logit.load_state_dict(torch.load(model_fp, map_location=cfg.cls.device.type)) cfg_adam = cfg.cls.optim.adam optimizer = torch.optim.Adam(model.parameters(), lr=cfg_adam.lr) # TODO: LARS scheduler = None return logit,optimizer,scheduler
def fit_logistic(image_size=(28, 28), datasets='../data/mnist.pkl.gz', outpath='../output/mnist_logistic_regression.params', learning_rate=0.13, n_epochs=1000, batch_size=600, patience=5000, patience_increase=2, improvement_threshold=0.995): index = T.lscalar() x = T.matrix('x') y = T.ivector('y') classifier = LogisticRegression( input=x, n_in=reduce(np.multiply, image_size), n_out=10 ) cost = classifier.negative_log_likelihood(y) learner = SupervisedMSGD( index, x, y, batch_size, learning_rate, load_data(datasets), outpath, classifier, cost ) best_validation_loss, best_iter, epoch, elapsed_time = learner.fit( n_epochs=n_epochs, patience=patience, patience_increase=patience_increase, improvement_threshold=improvement_threshold ) display_results(best_validation_loss, elapsed_time, epoch) return learner
def __init__(self, nkerns=[48, 48, 48, 48], miniBatchSize=200): rng = numpy.random.RandomState(23455) nClasses = 2 nMaxPool = 2 nHidden = 200 self.p = 95 #self.x = T.tensor3('x') # membrane data set self.x = T.tensor4('x') # membrane mini-batch self.y = T.ivector('y') # labels := 1D vector of [int] labels self.miniBatchSize = miniBatchSize # Reshape matrix of rasterized images # to a 4D tensor, # compatible with the LeNetConvPoolLayer #layer0_input = self.x.reshape((self.miniBatchSize, 1, self.p, self.p)) layer0_input = self.x #-------------------------------------------------- # LAYER 0 # layer0 convolution+max pool reduces image dimensions by: # 95 -> 92 -> 46 #-------------------------------------------------- fs0 = 4 # filter size, layer 0 os0 = (self.p - fs0 + 1) / nMaxPool # image out size 0 assert (os0 == 46) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(self.miniBatchSize, 1, self.p, self.p), filter_shape=(nkerns[0], 1, fs0, fs0), poolsize=(nMaxPool, nMaxPool)) #-------------------------------------------------- # LAYER 1 # layer1 convolution+max pool reduces image dimensions by: # 46 -> 42 -> 21 #-------------------------------------------------- fs1 = 5 # filter size, layer 1 os1 = (os0 - fs1 + 1) / nMaxPool # image out size 1 assert (os1 == 21) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(self.miniBatchSize, nkerns[0], os0, os0), filter_shape=(nkerns[1], nkerns[0], fs1, fs1), poolsize=(nMaxPool, nMaxPool)) #-------------------------------------------------- # LAYER 2 # layer2 convolution+max pool reduces image dimensions by: # 21 -> 18 -> 9 #-------------------------------------------------- fs2 = 4 os2 = (os1 - fs2 + 1) / nMaxPool assert (os2 == 9) layer2 = LeNetConvPoolLayer(rng, input=layer1.output, image_shape=(self.miniBatchSize, nkerns[0], os1, os1), filter_shape=(nkerns[2], nkerns[1], fs2, fs2), poolsize=(nMaxPool, nMaxPool)) #-------------------------------------------------- # LAYER 3 # layer3 convolution+max pool reduces image dimensions by: # 9 -> 6 -> 3 #-------------------------------------------------- fs3 = 4 os3 = (os2 - fs3 + 1) / nMaxPool assert (os3 == 3) layer3 = LeNetConvPoolLayer(rng, input=layer2.output, image_shape=(self.miniBatchSize, nkerns[0], os2, os2), filter_shape=(nkerns[3], nkerns[2], fs3, fs3), poolsize=(nMaxPool, nMaxPool)) #-------------------------------------------------- # LAYER 4 # Fully connected sigmoidal layer, goes from # 3*3*48 ~ 450 -> 200 #-------------------------------------------------- layer4_input = layer3.output.flatten(2) layer4 = HiddenLayer(rng, input=layer4_input, n_in=nkerns[3] * os3 * os3, n_out=nHidden, activation=T.tanh) #-------------------------------------------------- # LAYER 5 # Classification via a logistic regression layer # 200 -> 2 #-------------------------------------------------- # classify the values of the fully-connected sigmoidal layer layer5 = LogisticRegression(input=layer4.output, n_in=nHidden, n_out=nClasses) self.layers = (layer0, layer1, layer2, layer3, layer4, layer5)
poolsize=(1, kmaxEntities)) layers.append(cnnEntities) hidden_in = 2 * (2 * nkernsContext * kmaxContext + nkernsEntities * kmaxEntities) hiddenLayer = HiddenLayer(rng=rng, n_in=hidden_in, n_out=hiddenUnits) layers.append(hiddenLayer) hiddenLayerET = HiddenLayer(rng=rng, n_in=2 * nkernsContext * kmaxContext + nkernsEntities * kmaxEntities, n_out=hiddenUnitsET) layers.append(hiddenLayerET) randomInit = False if doCRF: randomInit = True outputLayer = LogisticRegression(n_in=hiddenUnits, n_out=numClasses, rng=rng, randomInit=randomInit) layers.append(outputLayer) outputLayerET = LogisticRegression(n_in=hiddenUnitsET, n_out=numClassesET, rng=rng, randomInit=randomInit) layers.append(outputLayerET) if doCRF: crfLayer = CRF(numClasses=numClasses + numClassesET, rng=rng, batchsizeVar=batchsizeVar, sequenceLength=3) layers.append(crfLayer) x1_resh = x1.reshape((batchsizeVar * numPerBag, contextsize))
def __init__(self, rng, params, cost_function='mse', optimizer=RMSprop): lr = params["lr"] batch_size = params["batch_size"] sequence_length = params["seq_length"] # minibatch) X = T.matrix(name="input", dtype=dtype) # batch of sequence of vector Y = T.matrix(name="output", dtype=dtype) # batch of sequence of vector is_train = T.iscalar( 'is_train' ) # pseudo boolean for switching between training and prediction #CNN global parameters. subsample = (1, 1) p_1 = 0.5 border_mode = "same" cnn_batch_size = batch_size pool_size = (2, 2) #Layer1: conv2+pool+drop filter_shape = (128, 1, 10, 10) input_shape = (cnn_batch_size, 1, 144, 176 ) #input_shape= (samples, channels, rows, cols) input = X.reshape(input_shape) c1 = ConvLayer(rng, input, filter_shape, input_shape, border_mode, subsample, activation=nn.relu) p1 = PoolLayer(c1.output, pool_size=pool_size, input_shape=c1.output_shape) dl1 = DropoutLayer(rng, input=p1.output, prob=p_1, is_train=is_train) #Layer2: conv2+pool subsample = (1, 1) filter_shape = (256, p1.output_shape[1], 3, 3) c2 = ConvLayer(rng, dl1.output, filter_shape, p1.output_shape, border_mode, subsample, activation=nn.relu) p2 = PoolLayer(c2.output, pool_size=pool_size, input_shape=c2.output_shape) #Layer3: conv2+pool filter_shape = (256, p2.output_shape[1], 3, 3) c3 = ConvLayer(rng, p2.output, filter_shape, p2.output_shape, border_mode, subsample, activation=nn.relu) p3 = PoolLayer(c3.output, pool_size=pool_size, input_shape=c3.output_shape) #Layer4: conv2+pool filter_shape = (128, p3.output_shape[1], 3, 3) c4 = ConvLayer(rng, p3.output, filter_shape, p3.output_shape, border_mode, subsample, activation=nn.relu) p4 = PoolLayer(c4.output, pool_size=pool_size, input_shape=c4.output_shape) #Layer5: hidden n_in = reduce(lambda x, y: x * y, p4.output_shape[1:]) x_flat = p4.output.flatten(2) h1 = HiddenLayer(rng, x_flat, n_in, 1024, activation=nn.relu) #Layer6: hidden lreg = LogisticRegression(rng, h1.output, 1024, params['n_output']) self.output = lreg.y_pred self.params = c1.params + c2.params + c3.params + c4.params + h1.params + lreg.params cost = get_err_fn(self, cost_function, Y) L2_reg = 0.0001 L2_sqr = theano.shared(0.) for param in self.params: L2_sqr += (T.sum(param[0]**2) + T.sum(param[1]**2)) cost += L2_reg * L2_sqr _optimizer = optimizer(cost, self.params, lr=lr) self.train = theano.function(inputs=[X, Y, is_train], outputs=cost, updates=_optimizer.getUpdates(), allow_input_downcast=True) self.predictions = theano.function(inputs=[X, is_train], outputs=self.output, allow_input_downcast=True) self.n_param = count_params(self.params)
def build_lenet(config): rng = np.random.RandomState(23455) x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector image_width = config.image_width batch_size = config.batch_size image_size = image_width**2 x_shared = T.cast(theano.shared(np.random.rand(batch_size, image_size), borrow=True), theano.config.floatX) y_shared = T.cast(theano.shared(np.random.randint(config.ydim, size=batch_size), borrow=True), 'int32') layer0_input = x.reshape((batch_size, 1, image_width, image_width)) # construct the first convolutional pooling layer layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 1, image_width, image_width), filter_shape=(config.num_kerns[0], 1, 5, 5), poolsize=(2, 2) ) # construct the second convolutional pooling layer layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, config.num_kerns[0], 12, 12), filter_shape=(config.num_kerns[1], config.num_kerns[0], 5, 5), poolsize=(2, 2) ) layer2_input = layer1.output.flatten(2) # construct a fully-connected layer layer2 = HiddenLayer( rng, input=layer2_input, n_in=config.num_kerns[1] * 4 * 4, n_out=500, activation=relu ) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=config.ydim) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a list of all model parameters to be fit by gradient descent params_W = [layer3.W, layer2.W, layer1.W, layer0.W] params_b = [layer3.b, layer2.b, layer1.b, layer0.b] params = params_W + params_b shared_cost = theano.shared(np.float32(0.0)) grads_temp = T.grad(cost, params) start_compilation = time.time() forward_step = theano.function([], [], updates=[(shared_cost, cost)], givens={x: x_shared, y: y_shared}) forward_backward_step = theano.function([], grads_temp, givens={x: x_shared, y: y_shared}) print 'compilation time: %.4f s' % (time.time() - start_compilation) return forward_step, forward_backward_step
input=layer1.output, image_shape=(batch_size, 60, layer2_w, layer2_h), filter_shape=(90, 60, 3, 3), poolsize=(2, 2), ) layer3_input = layer2.output.flatten(2) layer3 = HiddenLayer( rng, input=layer3_input, n_in=90 * layer3_w * layer3_h, n_out=500, activation=T.tanh, ) layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=8) cost = layer4.negative_log_likelihood(y) classify = theano.function( [index], outputs=layer4.get_output_labels(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size], }, ) # load weights print("loading weights state") f = open("weights.save", "rb")
def __init__(self, input, output, n_in, hidden_layers_sizes, n_out, dropout=None, optimizer=SGD, is_train=0): self.dense_layers = [] self.rbm_layers = [] self.params = [] self.consider_constants = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 self.rng = np.random.RandomState(888) self.theano_rng = RandomStreams(self.rng.randint(2**30)) for i in range(self.n_layers): if i == 0: input_size = n_in layer_input = input else: input_size = hidden_layers_sizes[i - 1] layer_input = self.dense_layers[-1].output dense_layer = DenseLayer(rng=self.rng, theano_rng=self.theano_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.softplus, dropout=dropout, is_train=is_train) rbm_layer = RBM(input=layer_input, rng=self.rng, theano_rng=self.theano_rng, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=dense_layer.W, hbias=dense_layer.b, dropout=dropout, h_activation=T.nnet.softplus, optimizer=optimizer, is_train=is_train) self.dense_layers.append(dense_layer) self.rbm_layers.append(rbm_layer) self.params.extend(dense_layer.params) if dense_layer.consider_constant is not None: self.consider_constants.extend(dense_layer.consider_constant) # end-for self.logistic_layer = LogisticRegression( input=self.dense_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_out) self.params.extend(self.logistic_layer.params) self.finetune_cost = self.logistic_layer.negative_loglikelihood(output) self.finetune_errors = self.logistic_layer.errors(output) self.input = input self.output = output self.is_train = is_train # model updates self.finetune_opt = optimizer(self.params)
def __init__(self, configfile, train = False): self.config = readConfig(configfile) self.addInputSize = 1 logger.info("additional mlp input") wordvectorfile = self.config["wordvectors"] logger.info("wordvectorfile " + str(wordvectorfile)) networkfile = self.config["net"] logger.info("networkfile " + str(networkfile)) hiddenunits = int(self.config["hidden"]) logger.info("hidden units " + str(hiddenunits)) hiddenunitsNER = 50 if "hiddenunitsNER" in self.config: hiddenunitsNER = int(self.config["hiddenunitsNER"]) logger.info("hidden units NER " + str(hiddenunitsNER)) learning_rate = float(self.config["lrate"]) logger.info("learning rate " + str(learning_rate)) if train: self.batch_size = int(self.config["batchsize"]) else: self.batch_size = 1 logger.info("batch size " + str(self.batch_size)) self.filtersize = [1,int(self.config["filtersize"])] nkerns = [int(self.config["nkerns"])] logger.info("nkerns " + str(nkerns)) pool = [1, int(self.config["kmax"])] self.contextsize = int(self.config["contextsize"]) logger.info("contextsize " + str(self.contextsize)) if self.contextsize < self.filtersize[1]: logger.info("setting filtersize to " + str(self.contextsize)) self.filtersize[1] = self.contextsize logger.info("filtersize " + str(self.filtersize)) sizeAfterConv = self.contextsize - self.filtersize[1] + 1 sizeAfterPooling = -1 if sizeAfterConv < pool[1]: logger.info("setting poolsize to " + str(sizeAfterConv)) pool[1] = sizeAfterConv sizeAfterPooling = pool[1] logger.info("kmax pooling: k = " + str(pool[1])) # reading word vectors self.wordvectors, self.vectorsize = readWordvectors(wordvectorfile) self.representationsize = self.vectorsize + 1 rng = numpy.random.RandomState(23455) if train: seed = rng.get_state()[1][0] logger.info("seed: " + str(seed)) # allocate symbolic variables for the data self.index = T.lscalar() # index to a [mini]batch self.xa = T.matrix('xa') # left context self.xb = T.matrix('xb') # middle context self.xc = T.matrix('xc') # right context self.y = T.imatrix('y') # label (only present in training) self.yNER1 = T.imatrix('yNER1') # label for first entity self.yNER2 = T.imatrix('yNER2') # label for second entity ishape = [self.representationsize, self.contextsize] # this is the size of context matrizes ###################### # BUILD ACTUAL MODEL # ###################### logger.info('... building the model') # Reshape input matrix to be compatible with our LeNetConvPoolLayer layer0a_input = self.xa.reshape((self.batch_size, 1, ishape[0], ishape[1])) layer0b_input = self.xb.reshape((self.batch_size, 1, ishape[0], ishape[1])) layer0c_input = self.xc.reshape((self.batch_size, 1, ishape[0], ishape[1])) self.y_reshaped = self.y.reshape((self.batch_size, 1)) yNER1reshaped = self.yNER1.reshape((self.batch_size, 1)) yNER2reshaped = self.yNER2.reshape((self.batch_size, 1)) # Construct convolutional pooling layer: filter_shape = (nkerns[0], 1, self.representationsize, self.filtersize[1]) poolsize=(pool[0], pool[1]) fan_in = numpy.prod(filter_shape[1:]) fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize)) W_bound = numpy.sqrt(6. / (fan_in + fan_out)) # the convolution weight matrix convW = theano.shared(numpy.asarray( rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX), borrow=True) # the bias is a 1D tensor -- one bias per output feature map b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) convB = theano.shared(value=b_values, borrow=True) self.layer0a = LeNetConvPoolLayer(rng, W=convW, b=convB, input=layer0a_input, image_shape=(self.batch_size, 1, ishape[0], ishape[1]), filter_shape=filter_shape, poolsize=poolsize) self.layer0b = LeNetConvPoolLayer(rng, W=convW, b=convB, input=layer0b_input, image_shape=(self.batch_size, 1, ishape[0], ishape[1]), filter_shape=filter_shape, poolsize=poolsize) self.layer0c = LeNetConvPoolLayer(rng, W=convW, b=convB, input=layer0c_input, image_shape=(self.batch_size, 1, ishape[0], ishape[1]), filter_shape=filter_shape, poolsize=poolsize) #layer0_output = T.concatenate([self.layer0a.output, self.layer0b.output, self.layer0c.output], axis = 3) layer0aflattened = self.layer0a.output.flatten(2).reshape((self.batch_size, nkerns[0] * sizeAfterPooling)) layer0bflattened = self.layer0b.output.flatten(2).reshape((self.batch_size, nkerns[0] * sizeAfterPooling)) layer0cflattened = self.layer0c.output.flatten(2).reshape((self.batch_size, nkerns[0] * sizeAfterPooling)) layer0_output = T.concatenate([layer0aflattened, layer0bflattened, layer0cflattened], axis = 1) self.layer1a = HiddenLayer(rng = rng, input = self.yNER1, n_in = 6, n_out = hiddenunitsNER, activation = T.tanh) self.layer1b = HiddenLayer(rng = rng, input = self.yNER2, n_in = 6, n_out = hiddenunitsNER, activation = T.tanh, W = self.layer1a.W, b = self.layer1a.b) layer2_input = T.concatenate([layer0_output, self.layer1a.output, self.layer1b.output], axis = 1) layer2_inputSize = 3 * nkerns[0] * sizeAfterPooling + 2 * hiddenunitsNER self.additionalFeatures = T.matrix('additionalFeatures') additionalFeatsShaped = self.additionalFeatures.reshape((self.batch_size, 1)) layer2_input = T.concatenate([layer2_input, additionalFeatsShaped], axis = 1) layer2_inputSize += self.addInputSize self.layer2 = HiddenLayer(rng, input=layer2_input, n_in=layer2_inputSize, n_out=hiddenunits, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer self.layer3 = LogisticRegression(input=self.layer2.output, n_in=hiddenunits, n_out=23) # create a list of all model parameters self.paramList = [self.layer3.params, self.layer2.params, self.layer1a.params, self.layer0a.params] self.params = [] for p in self.paramList: self.params += p logger.info(p) if not train: self.gotNetwork = 1 # load parameters if not os.path.isfile(networkfile): logger.error("network file does not exist") self.gotNetwork = 0 else: save_file = open(networkfile, 'rb') for p in self.params: p.set_value(cPickle.load(save_file), borrow=False) save_file.close()
def start(inputfile): global in_time, out_time, cooldown_in_time, cooldown_out_time, classify global global_counter, winner_stride, cur_state, in_frame_num, actions_counter global test_set_x, test_set_y, shared_test_set_y rng = numpy.random.RandomState(23455) # ####################### build start ######################## # create an empty shared variables to be filled later data_x = numpy.zeros([1, 20 * 50 * 50]) data_y = numpy.zeros(20) train_set = (data_x, data_y) (test_set_x, test_set_y, shared_test_set_y) = \ shared_dataset(train_set) print 'building ... ' batch_size = 1 # allocate symbolic variables for the data index = T.lscalar() x = T.matrix('x') y = T.ivector('y') # image size layer0_w = 50 layer0_h = 50 layer1_w = (layer0_w - 4) / 2 layer1_h = (layer0_h - 4) / 2 layer2_w = (layer1_w - 2) / 2 layer2_h = (layer1_h - 2) / 2 layer3_w = (layer2_w - 2) / 2 layer3_h = (layer2_h - 2) / 2 # ##################### # BUILD ACTUAL MODEL # # ##################### # image sizes batchsize = batch_size in_channels = 20 in_width = 50 in_height = 50 # filter sizes flt_channels = 40 flt_time = 20 flt_width = 5 flt_height = 5 signals_shape = (batchsize, in_channels, in_height, in_width) filters_shape = (flt_channels, in_channels, flt_height, flt_width) layer0_input = x.reshape(signals_shape) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=signals_shape, filter_shape=filters_shape, poolsize=(2, 2)) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, flt_channels, layer1_w, layer1_h), filter_shape=(60, flt_channels, 3, 3), poolsize=(2, 2)) layer2 = LeNetConvPoolLayer(rng, input=layer1.output, image_shape=(batch_size, 60, layer2_w, layer2_h), filter_shape=(90, 60, 3, 3), poolsize=(2, 2)) layer3_input = layer2.output.flatten(2) layer3 = HiddenLayer(rng, input=layer3_input, n_in=90 * layer3_w * layer3_h, n_out=500, activation=T.tanh) layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=8) cost = layer4.negative_log_likelihood(y) classify = theano.function( [index], outputs=layer4.get_output_labels(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) # load weights print 'loading weights state' f = file('weights.save', 'rb') loaded_objects = [] for i in range(5): loaded_objects.append(cPickle.load(f)) f.close() layer0.__setstate__(loaded_objects[0]) layer1.__setstate__(loaded_objects[1]) layer2.__setstate__(loaded_objects[2]) layer3.__setstate__(loaded_objects[3]) layer4.__setstate__(loaded_objects[4]) # ####################### build done ######################## fromCam = False if fromCam: print 'using camera input' cap = cv2.VideoCapture(0) else: print 'using input file: ', inputfile cap = cv2.VideoCapture(inputfile) # my timing frame_rate = 5 frame_interval_ms = 1000 / frame_rate fourcc = cv2.VideoWriter_fourcc(*'XVID') video_writer = cv2.VideoWriter('../out/live_out.avi', fourcc, frame_rate, (640, 480)) frame_counter = 0 (ret, frame) = cap.read() proFrame = process_single_frame(frame) # init detectors st_a_det = RepDetector(proFrame, detector_strides[0]) st_b_det = RepDetector(proFrame, detector_strides[1]) st_c_det = RepDetector(proFrame, detector_strides[2]) frame_wise_counts = [] while True: in_frame_num += 1 if in_frame_num % 2 == 1: continue (ret, frame) = cap.read() if ret == 0: print 'unable to read frame' break proFrame = process_single_frame(frame) # handle stride A.... if frame_counter % st_a_det.stride_number == 0: st_a_det.count(proFrame) # handle stride B if frame_counter % st_b_det.stride_number == 0: st_b_det.count(proFrame) # handle stride C if frame_counter % st_c_det.stride_number == 0: st_c_det.count(proFrame) # display result on video................ blue_color = (130, 0, 0) green_color = (0, 130, 0) red_color = (0, 0, 130) orange_color = (0, 140, 0xFF) out_time = in_frame_num / 60 if cur_state == state.IN_REP and (out_time - in_time < 4 or global_counter < 5): draw_str(frame, (20, 120), ' new hypothesis (%d) ' % global_counter, orange_color, 1.5) if cur_state == state.IN_REP and out_time - in_time >= 4 \ and global_counter >= 5: draw_str( frame, (20, 120), 'action %d: counting... %d' % (actions_counter, global_counter), green_color, 2) if cur_state == state.COOLDOWN and global_counter >= 5: draw_str( frame, (20, 120), 'action %d: done. final counting: %d' % (actions_counter, global_counter), blue_color, 2) # print "pls", global_counter frame_wise_counts.append(global_counter) # print 'action %d: done. final counting: %d' % (actions_counter, global_counter) print "Dhruv", frame_wise_counts, global_counter return frame_wise_counts
def __init__(self, numpy_rng, train_set_x, train_set_y, hidden_layers_sizes, n_ins=784, n_outs=10): """ This class is made to support a variable number of layers. :type numpy_rng: np.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type train_set_x: theano.shared float32 :param: train_set_x: Training data set, shape (n_samples, n_pixels) :type train_set_y: theano.shared, int32 :param: train_set_x: GT for training data, shape (n_samples) :type n_ins: int :param n_ins: dimension of the input to the SAE :type hidden_layers_sizes: list of ints :param hidden_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network """ self.sigmoid_layers = [] self.AE_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) self.train_set_x = train_set_x self.train_set_y = train_set_y assert self.n_layers > 0 self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector of for i in xrange(self.n_layers): # used to be n layers # construct the sigmoid layer = encoder stack if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer( rng=numpy_rng, input=layer_input, n_in=(n_ins if i == 0 else hidden_layers_sizes[i - 1]), n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) self.params.extend(sigmoid_layer.params) # init the DA_layer, takes weights from sigmoid layer AE_layer = AutoEncoder( numpy_rng=numpy_rng, input=layer_input, n_visible=(n_ins if i == 0 else hidden_layers_sizes[i - 1]), n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, bhid=sigmoid_layer.b) self.AE_layers.append(AE_layer) # on top of the layers # log layer for fine-tuning self.logLayer = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs) self.params.extend(self.logLayer.params) self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) self.errors = self.logLayer.errors(self.y)
layer2_inputSize = layer1outputsize layer2_input = layer1flattened elif combinationMethod == "noAtt": layer2_inputSize = layer0outputsize layer2_input = layer0flattened else: # concatenation layer2_inputSize = layer0outputsize + layer1outputsize layer2_input = T.concatenate([layer0flattened, layer1flattened], axis = 1) if useHiddenLayer: # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=layer2_inputSize, n_out=hiddenunits, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=hiddenunits, n_out=2) else: # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2_input, n_in=layer2_inputSize, n_out=2) # create a list of all model non-bricks parameters paramList = [layer3.params] if useHiddenLayer: paramList.append(layer2.params) if combinationMethod != "noAtt": paramList.append(layer1.params) # params from layer0 already have the blocks role params = [] for p in paramList: for i, p_part in enumerate(p): if i == 0:
def __init__(self, numpy_rng, theano_rng=None, y=None, alpha=0.9, sample_rate=0.1, n_ins=784, hidden_layers_sizes=[500, 500], n_outs=10, corruption_levels=[0.1, 0.1], allX=None, allY=None, srng=None): self.sigmoid_layers = [] self.sugar_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) self.allXs = [] if y == None: self.y = tensor.ivector(name='y') else: self.y = y assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2**30)) self.x = tensor.matrix('x') self.x = tensor.matrix('x') self.y = tensor.ivector('y') self.y = tensor.ivector('y') for i in xrange(self.n_layers): if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output if i == 0: self.allXs.append(allX) else: self.allXs.append( tensor.dot(self.allXs[i - 1], self.sigmoid_layers[-1].W) + self.sigmoid_layers[-1].b) sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=tensor.nnet.sigmoid) self.sigmoid_layers.append(sigmoid_layer) self.params.extend(sigmoid_layer.params) sugar_layer = sugar(numpy_rng=numpy_rng, alpha=alpha, sample_rate=sample_rate, x=layer_input, y=self.y, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, bhid=sigmoid_layer.b, allX=self.allXs[i], allY=allY, srng=srng) self.sugar_layers.append(sugar_layer) self.logLayer = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs) self.params.extend(self.logLayer.params) self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) self.errors = self.logLayer.errors(self.y)
def build_model(self, flag_preserve_params=False): ################### # build the model # logging.info('... building the model') # allocate symbolic variables for the data self.index = T.lscalar() # index to a [mini]batch self.x = T.matrix('x') # the data is presented as rasterized images # self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels, used to represent labels given by # data # the y as features, used for taking in intermediate layer "y" values self.y = T.matrix('y') # Reshape matrix of rasterized images of shape (batch_size,28*28) # to a 4D tensor, compatible with our LeNetConvPoolLayer self.layer0_input = self.x.reshape((self.batch_size, self.img_dim, self.img_size, self.img_size)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1,28-5+1)=(24,24) # maxpooling reduces this further to (24/2,24/2) = (12,12) # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12) self.layer0 = LeNetConvPoolLayer(self.rng, input=self.layer0_input, image_shape=(self.batch_size, self.img_dim, self.img_size, self.img_size), filter_shape=(self.nkerns[0], self.img_dim, self.filtersize[0], self.filtersize[0]), poolsize=(self.poolsize[0], self.poolsize[0]), activation=self.conv_activation) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1,12-5+1)=(8,8) # maxpooling reduces this further to (8/2,8/2) = (4,4) # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4) self.img_size1 = (self.img_size - self.filtersize[0] + 1) / self.poolsize[0] self.layer1 = LeNetConvPoolLayer(self.rng, input=self.layer0.output, image_shape=(self.batch_size, self.nkerns[0], self.img_size1, self.img_size1), filter_shape=(self.nkerns[1], self.nkerns[0], self.filtersize[1], self.filtersize[1]), poolsize=(self.poolsize[1], self.poolsize[1]), activation=self.conv_activation) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size,num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (20,32*4*4) = (20,512) self.layer2_input = self.layer1.output.flatten(2) self.img_size2 = (self.img_size1 - self.filtersize[1] + 1) / self.poolsize[1] # construct a fully-connected sigmoidal layer self.layer2 = HiddenLayer(self.rng, input=self.layer2_input, n_in=self.nkerns[1] * self.img_size2 * self.img_size2, n_out=self.num_hidden, activation=self.hidden_activation) # classify the values of the fully-connected sigmoidal layer self.layer3 = LogisticRegression(input=self.layer2.output, n_in=self.num_hidden, n_out=self.num_class, activation=self.logreg_activation) # regularization term self.decay_hidden = self.alpha_l1 * abs(self.layer2.W).sum() + \ self.alpha_l2 * (self.layer2.W ** 2).sum() self.decay_softmax = self.alpha_l1 * abs(self.layer3.W).sum() + \ self.alpha_l2 * (self.layer3.W ** 2).sum() # there's different choices of cost models if self.cost_type == 'nll_softmax': # the cost we minimize during training is the NLL of the model self.y = T.ivector('y') # index involved so has to use integer self.cost = self.layer3.negative_log_likelihood(self.y) + \ self.decay_hidden + self.decay_softmax + \ self.alpha_entropy * self.layer3.p_y_entropy elif self.cost_type == 'ssd_softmax': self.cost = T.mean((self.layer3.p_y_given_x - self.y) ** 2) + \ self.decay_hidden + self.decay_softmax elif self.cost_type == 'ssd_hidden': self.cost = T.mean((self.layer2.output - self.y) ** 2) + \ self.decay_hidden elif self.cost_type == 'ssd_conv': self.cost = T.mean((self.layer2_input - self.y) ** 2) # create a list of all model parameters to be fit by gradient descent # preserve parameters if the exist, used for keep parameter while # changing # some of the theano functions # but the user need to be aware that if the parameters should be kept # only if the network structure doesn't change if flag_preserve_params and hasattr(self, 'params'): pass params_temp = copy.deepcopy(self.params) else: params_temp = None self.params = self.layer3.params + self.layer2.params + self.layer1.params + self.layer0.params # if needed, assign old parameters if flag_preserve_params and (params_temp is not None): for ind in range(len(params_temp)): self.params[ind].set_value(params_temp[ind].get_value(), borrow=True) # create a list of gradients for all model parameters self.grads = T.grad(self.cost, self.params, disconnected_inputs='warn') # error function from the last layer logistic regression self.errors = self.layer3.errors
def __init__(self, nkerns=[48, 48, 48], miniBatchSize=200, nHidden=200, nClasses=2, nMaxPool=2, nChannels=1): """ nClasses : the number of target classes (e.g. 2 for binary classification) nMaxPool : number of pixels to max pool nChannels : number of input channels (e.g. 1 for single grayscale channel) """ rng = numpy.random.RandomState(23455) self.p = 65 self.miniBatchSize = miniBatchSize # Note: self.x and self.y will be re-bound to a subset of the # training/validation/test data dynamically by the update # stage of the appropriate function. self.x = T.tensor4('x') # membrane mini-batch self.y = T.ivector('y') # 1D vector of [int] labels # We now assume the input will already be reshaped to the # proper size (i.e. we don't need a theano resize op here). layer0_input = self.x #-------------------------------------------------- # LAYER 0 # layer0 convolution+max pool reduces image dimensions by: # 65 -> 62 -> 31 #-------------------------------------------------- fs0 = 4 # conv. filter size, layer 0 os0 = (self.p - fs0 + 1) / nMaxPool # image out size 0 assert (os0 == 31) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(self.miniBatchSize, nChannels, self.p, self.p), filter_shape=(nkerns[0], nChannels, fs0, fs0), poolsize=(nMaxPool, nMaxPool)) #-------------------------------------------------- # LAYER 1 # layer1 convolution+max pool reduces image dimensions by: # 31 -> 28 -> 14 #-------------------------------------------------- fs1 = 4 # filter size, layer 1 os1 = (os0 - fs1 + 1) / nMaxPool # image out size 1 assert (os1 == 14) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(self.miniBatchSize, nkerns[0], os0, os0), filter_shape=(nkerns[1], nkerns[0], fs1, fs1), poolsize=(nMaxPool, nMaxPool)) #-------------------------------------------------- # LAYER 2 # layer2 convolution+max pool reduces image dimensions by: # 14 -> 10 -> 5 #-------------------------------------------------- fs2 = 5 os2 = (os1 - fs2 + 1) / nMaxPool assert (os2 == 5) layer2 = LeNetConvPoolLayer(rng, input=layer1.output, image_shape=(self.miniBatchSize, nkerns[1], os1, os1), filter_shape=(nkerns[2], nkerns[1], fs2, fs2), poolsize=(nMaxPool, nMaxPool)) #-------------------------------------------------- # LAYER 3 # Fully connected sigmoidal layer, goes from # 5*5*48 -> 200 #-------------------------------------------------- layer3_input = layer2.output.flatten(2) layer3 = HiddenLayer(rng, input=layer3_input, n_in=nkerns[2] * os2 * os2, n_out=nHidden, activation=T.tanh) #-------------------------------------------------- # LAYER 4 # Classification via a logistic regression layer # 200 -> 2 #-------------------------------------------------- # classify the values of the fully-connected sigmoidal layer layer4 = LogisticRegression(input=layer3.output, n_in=nHidden, n_out=nClasses) self.layers = (layer0, layer1, layer2, layer3, layer4)
def prepare_network(): rng = numpy.random.RandomState(23455) print('Preparing Theano model...') mydatasets = load_initial_test_data() test_set_x, test_set_y, shared_test_set_y, valid_ds = mydatasets n_test_batches = test_set_x.get_value(borrow=True).shape[0] # allocate symbolic variables for the data index = T.lscalar() x = T.matrix('x') y = T.ivector('y') # image size layer0_w = 50 layer0_h = 50 layer1_w = (layer0_w - 4) // 2 layer1_h = (layer0_h - 4) // 2 layer2_w = (layer1_w - 2) // 2 layer2_h = (layer1_h - 2) // 2 layer3_w = (layer2_w - 2) // 2 layer3_h = (layer2_h - 2) // 2 ###################### # BUILD NETWORK # ###################### # image sizes batchsize = 1 in_channels = 20 in_width = 50 in_height = 50 #filter sizes flt_channels = 40 flt_time = 20 flt_width = 5 flt_height = 5 signals_shape = (batchsize, in_channels, in_height, in_width) filters_shape = (flt_channels, in_channels, flt_height, flt_width) layer0_input = x.reshape(signals_shape) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=signals_shape, filter_shape=filters_shape, poolsize=(2, 2)) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batchsize, flt_channels, layer1_w, layer1_h), filter_shape=(60, flt_channels, 3, 3), poolsize=(2, 2)) layer2 = LeNetConvPoolLayer(rng, input=layer1.output, image_shape=(batchsize, 60, layer2_w, layer2_h), filter_shape=(90, 60, 3, 3), poolsize=(2, 2)) layer3_input = layer2.output.flatten(2) layer3 = HiddenLayer(rng, input=layer3_input, n_in=90 * layer3_w * layer3_h, n_out=500, activation=T.tanh) layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=8) cost = layer4.negative_log_likelihood(y) classify = theano.function( [index], outputs=layer4.get_output_labels(y), givens={ x: test_set_x[index * batchsize:(index + 1) * batchsize], y: test_set_y[index * batchsize:(index + 1) * batchsize] }) print('Loading network weights...') weightFile = '../live_count/weights.save' f = open(weightFile, 'rb') loaded_objects = [] for i in range(5): loaded_objects.append(pickle.load(f)) f.close() layer0.__setstate__(loaded_objects[0]) layer1.__setstate__(loaded_objects[1]) layer2.__setstate__(loaded_objects[2]) layer3.__setstate__(loaded_objects[3]) layer4.__setstate__(loaded_objects[4]) return test_set_x, test_set_y, shared_test_set_y, valid_ds, classify, batchsize
def train_rep( learning_rate=0.002, L1_reg=0.0002, L2_reg=0.005, n_epochs=200, nkerns=[20, 50], batch_size=25, ): rng = numpy.random.RandomState(23455) train_dir = "../out/h5/" valid_dir = "../out/h5/" weights_dir = "./weights/" print("... load input data") filename = train_dir + "rep_train_data_1.gzip.h5" datasets = load_initial_data(filename) train_set_x, train_set_y, shared_train_set_y = datasets filename = valid_dir + "rep_valid_data_1.gzip.h5" datasets = load_initial_data(filename) valid_set_x, valid_set_y, shared_valid_set_y = datasets mydatasets = load_initial_test_data() test_set_x, test_set_y, shared_test_set_y, valid_ds = mydatasets # compute number of minibatches for training, validation and testing n_all_train_batches = 30000 n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_all_train_batches /= batch_size n_train_batches /= batch_size n_valid_batches /= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix("x") # the data is presented as rasterized images y = T.ivector("y") # the labels are presented as 1D vector of # [int] labels # image size layer0_w = 50 layer0_h = 50 layer1_w = (layer0_w - 4) / 2 layer1_h = (layer0_h - 4) / 2 layer2_w = (layer1_w - 2) / 2 layer2_h = (layer1_h - 2) / 2 layer3_w = (layer2_w - 2) / 2 layer3_h = (layer2_h - 2) / 2 ###################### # BUILD ACTUAL MODEL # ###################### print("... building the model") # image sizes batchsize = batch_size in_channels = 20 in_width = 50 in_height = 50 # filter sizes flt_channels = 40 flt_time = 20 flt_width = 5 flt_height = 5 signals_shape = (batchsize, in_channels, in_height, in_width) filters_shape = (flt_channels, in_channels, flt_height, flt_width) layer0_input = x.reshape(signals_shape) layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=signals_shape, filter_shape=filters_shape, poolsize=(2, 2), ) # TODO: incase of flt_time < in_time the output dimension will be different layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, flt_channels, layer1_w, layer1_h), filter_shape=(60, flt_channels, 3, 3), poolsize=(2, 2), ) layer2 = LeNetConvPoolLayer( rng, input=layer1.output, image_shape=(batch_size, 60, layer2_w, layer2_h), filter_shape=(90, 60, 3, 3), poolsize=(2, 2), ) layer3_input = layer2.output.flatten(2) layer3 = HiddenLayer( rng, input=layer3_input, n_in=90 * layer3_w * layer3_h, n_out=500, activation=T.tanh, ) layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=8) classify = theano.function( [index], outputs=layer4.get_output_labels(y), givens={ x: test_set_x[index * batch_size : (index + 1) * batch_size], y: test_set_y[index * batch_size : (index + 1) * batch_size], }, ) validate_model = theano.function( [index], layer4.errors(y), givens={ x: valid_set_x[index * batch_size : (index + 1) * batch_size], y: valid_set_y[index * batch_size : (index + 1) * batch_size], }, ) # create a list of all model parameters to be fit by gradient descent params = ( layer4.params + layer3.params + layer2.params + layer1.params + layer0.params ) # symbolic Theano variable that represents the L1 regularization term L1 = ( T.sum(abs(layer4.params[0])) + T.sum(abs(layer3.params[0])) + T.sum(abs(layer2.params[0])) + T.sum(abs(layer1.params[0])) + T.sum(abs(layer0.params[0])) ) # symbolic Theano variable that represents the squared L2 term L2_sqr = ( T.sum(layer4.params[0] ** 2) + T.sum(layer3.params[0] ** 2) + T.sum(layer2.params[0] ** 2) + T.sum(layer1.params[0] ** 2) + T.sum(layer0.params[0] ** 2) ) # the loss cost = layer4.negative_log_likelihood(y) + L1_reg * L1 + L2_reg * L2_sqr # create a list of gradients for all model parameters grads = T.grad(cost, params) updates = [] for param_i, grad_i in zip(params, grads): updates.append((param_i, param_i - learning_rate * grad_i)) train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size : (index + 1) * batch_size], y: train_set_y[index * batch_size : (index + 1) * batch_size], }, ) ############### # TRAIN MODEL # ############### print("... training") start_time = time.clock() epoch = 0 done_looping = False cost_ij = 0 train_files_num = 600 val_files_num = 100 startc = time.clock() while (epoch < n_epochs) and (not done_looping): endc = time.clock() print(("epoch %i, took %.2f minutes" % (epoch, (endc - startc) / 60.0))) startc = time.clock() epoch = epoch + 1 for nTrainSet in range(1, train_files_num + 1): # load next train data if nTrainSet % 50 == 0: print("training @ nTrainSet = ", nTrainSet, ", cost = ", cost_ij) filename = train_dir + "rep_train_data_" + str(nTrainSet) + ".gzip.h5" datasets = load_next_data(filename) ns_train_set_x, ns_train_set_y = datasets train_set_x.set_value(ns_train_set_x, borrow=True) shared_train_set_y.set_value( numpy.asarray(ns_train_set_y, dtype=theano.config.floatX), borrow=True ) n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size # train for minibatch_index in range(n_train_batches): # training itself # -------------------------------------- cost_ij = train_model(minibatch_index) # ------------------------- # at the end of each epoch run validation this_validation_loss = 0 for nValSet in range(1, val_files_num + 1): filename = valid_dir + "rep_valid_data_" + str(nValSet) + ".gzip.h5" datasets = load_next_data(filename) ns_valid_set_x, ns_valid_set_y = datasets valid_set_x.set_value(ns_valid_set_x, borrow=True) shared_valid_set_y.set_value( numpy.asarray(ns_valid_set_y, dtype=theano.config.floatX), borrow=True ) n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_valid_batches /= batch_size # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in range(n_valid_batches)] this_validation_loss += numpy.mean(validation_losses) this_validation_loss /= val_files_num print(( "epoch %i, minibatch %i/%i, validation error %f %%" % ( epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.0, ) )) # save snapshots print("saving weights state, epoch = ", epoch) f = file(weights_dir + "weights_epoch" + str(epoch) + ".save", "wb") state_L0 = layer0.__getstate__() pickle.dump(state_L0, f, protocol=pickle.HIGHEST_PROTOCOL) state_L1 = layer1.__getstate__() pickle.dump(state_L1, f, protocol=pickle.HIGHEST_PROTOCOL) state_L2 = layer2.__getstate__() pickle.dump(state_L2, f, protocol=pickle.HIGHEST_PROTOCOL) state_L3 = layer3.__getstate__() pickle.dump(state_L3, f, protocol=pickle.HIGHEST_PROTOCOL) state_L4 = layer4.__getstate__() pickle.dump(state_L4, f, protocol=pickle.HIGHEST_PROTOCOL) f.close() end_time = time.clock() print ("Optimization complete.") print(( "The code for file " + os.path.split(__file__)[1] + " ran for %.2fm" % ((end_time - start_time) / 60.0) ), file=sys.stderr)
def build_model(self, flag_preserve_params=False): logging.info('... building the model') # allocate symbolic variables for the data self.index = T.lscalar() # index to a [mini]batch self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels # Since we are dealing with a one hidden layer MLP, this will translate # into a HiddenLayer with a tanh activation function connected to the # LogisticRegression layer; the activation function can be replaced by # sigmoid or any other nonlinear function self.hiddenLayer = HiddenLayer(rng=self.rng, input=self.x, n_in=self.n_in, n_out=self.n_hidden, activation=self.hidden_activation) # The logistic regression layer gets as input the hidden units # of the hidden layer self.logRegressionLayer = LogisticRegression( input=self.hiddenLayer.output, n_in=self.n_hidden, n_out=self.n_out, activation=self.logreg_activation) # L1 norm ; one regularization option is to enforce L1 norm to # be small self.L1 = abs(self.hiddenLayer.W).sum() \ + abs(self.logRegressionLayer.W).sum() # square of L2 norm ; one regularization option is to enforce # square of L2 norm to be small self.L2_sqr = (self.hiddenLayer.W ** 2).sum() \ + (self.logRegressionLayer.W ** 2).sum() # negative log likelihood of the MLP is given by the negative # log likelihood of the output of the model, computed in the # logistic regression layer self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood # same holds for the function computing the number of errors self.errors = self.logRegressionLayer.errors # the parameters of the model are the parameters of the two layer it is # made out of self.params = self.hiddenLayer.params + self.logRegressionLayer.params self.cost = self.negative_log_likelihood(self.y) \ + self.alpha_l1 * self.L1 \ + self.alpha_l2 * self.L2_sqr self.grads = T.grad(self.cost, self.params) # fixed batch size based prediction self.predict_proba_batch = theano.function( [self.x], self.logRegressionLayer.p_y_given_x) self.predict_batch = theano.function( [self.x], T.argmax(self.logRegressionLayer.p_y_given_x, axis=1)) self.predict_cost_batch = theano.function([self.x, self.y], self.cost, allow_input_downcast=True)
layer2 = LeNetConvPoolLayer(rng, input=layer1.output, image_shape=(batch_size, 60, layer2_w, layer2_h), filter_shape=(90, 60, 3, 3), poolsize=(2, 2)) layer3_input = layer2.output.flatten(2) layer3 = HiddenLayer(rng, input=layer3_input, n_in=90 * layer3_w * layer3_h, n_out=500, activation=T.tanh) layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=8) # change the number of output labels cost = layer4.negative_log_likelihood(y) classify = theano.function( [index], outputs=layer4.get_output_labels(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) # load weights print('loading weights state') loaded_objects = [] with open('weights.save', 'rb') as f:
def __init__(self, configfile, train=False): self.slotList = [ "N", "per:age", "per:alternate_names", "per:children", "per:cause_of_death", "per:date_of_birth", "per:date_of_death", "per:employee_or_member_of", "per:location_of_birth", "per:location_of_death", "per:locations_of_residence", "per:origin", "per:schools_attended", "per:siblings", "per:spouse", "per:title", "org:alternate_names", "org:date_founded", "org:founded_by", "org:location_of_headquarters", "org:members", "org:parents", "org:top_members_employees" ] typeList = [ "O", "PERSON", "LOCATION", "ORGANIZATION", "DATE", "NUMBER" ] self.config = readConfig(configfile) self.addInputSize = 1 logger.info("additional mlp input") wordvectorfile = self.config["wordvectors"] logger.info("wordvectorfile " + wordvectorfile) networkfile = self.config["net"] logger.info("networkfile " + networkfile) hiddenunits = int(self.config["hidden"]) logger.info("hidden units " + str(hiddenunits)) hiddenunitsNer = hiddenunits if "hiddenunitsNER" in self.config: hiddenunitsNer = int(self.config["hiddenunitsNER"]) representationsizeNER = 50 if "representationsizeNER" in self.config: representationsizeNER = int(self.config["representationsizeNER"]) learning_rate = float(self.config["lrate"]) logger.info("learning rate " + str(learning_rate)) if train: self.batch_size = int(self.config["batchsize"]) else: self.batch_size = 1 logger.info("batch size " + str(self.batch_size)) self.filtersize = [1, int(self.config["filtersize"])] nkerns = [int(self.config["nkerns"])] logger.info("nkerns " + str(nkerns)) pool = [1, int(self.config["kmax"])] self.contextsize = int(self.config["contextsize"]) logger.info("contextsize " + str(self.contextsize)) if self.contextsize < self.filtersize[1]: logger.info("setting filtersize to " + str(self.contextsize)) self.filtersize[1] = self.contextsize logger.info("filtersize " + str(self.filtersize)) sizeAfterConv = self.contextsize - self.filtersize[1] + 1 sizeAfterPooling = -1 if sizeAfterConv < pool[1]: logger.info("setting poolsize to " + str(sizeAfterConv)) pool[1] = sizeAfterConv sizeAfterPooling = pool[1] logger.info("kmax pooling: k = " + str(pool[1])) # reading word vectors self.wordvectors, self.vectorsize = readWordvectors(wordvectorfile) self.representationsize = self.vectorsize + 1 rng = numpy.random.RandomState( 23455 ) # not relevant, parameters will be overwritten by stored model anyways if train: seed = rng.get_state()[1][0] logger.info("seed: " + str(seed)) numSFclasses = 23 numNERclasses = 6 # allocate symbolic variables for the data self.index = T.lscalar() # index to a [mini]batch self.xa = T.matrix('xa') # left context self.xb = T.matrix('xb') # middle context self.xc = T.matrix('xc') # right context self.y = T.imatrix('y') # label (only present in training) self.yNER1 = T.imatrix( 'yNER1') # label for first entity (only present in training) self.yNER2 = T.imatrix( 'yNER2') # label for second entity (only present in training) ishape = [self.representationsize, self.contextsize] # this is the size of context matrizes ###################### # BUILD ACTUAL MODEL # ###################### logger.info('... building the model') # Reshape input matrix to be compatible with LeNetConvPoolLayer layer0a_input = self.xa.reshape( (self.batch_size, 1, ishape[0], ishape[1])) layer0b_input = self.xb.reshape( (self.batch_size, 1, ishape[0], ishape[1])) layer0c_input = self.xc.reshape( (self.batch_size, 1, ishape[0], ishape[1])) y_reshaped = self.y.reshape((self.batch_size, 1)) yNER1reshaped = self.yNER1.reshape((self.batch_size, 1)) yNER2reshaped = self.yNER2.reshape((self.batch_size, 1)) # Construct convolutional pooling layer: filter_shape = (nkerns[0], 1, self.representationsize, self.filtersize[1]) poolsize = (pool[0], pool[1]) fan_in = numpy.prod(filter_shape[1:]) fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize)) W_bound = numpy.sqrt(6. / (fan_in + fan_out)) # the convolution weight matrix convW = theano.shared(numpy.asarray(rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX), borrow=True) # the bias is a 1D tensor -- one bias per output feature map b_values = numpy.zeros((filter_shape[0], ), dtype=theano.config.floatX) convB = theano.shared(value=b_values, borrow=True) self.layer0a = LeNetConvPoolLayer(rng, W=convW, b=convB, input=layer0a_input, image_shape=(self.batch_size, 1, ishape[0], ishape[1]), filter_shape=filter_shape, poolsize=poolsize) self.layer0b = LeNetConvPoolLayer(rng, W=convW, b=convB, input=layer0b_input, image_shape=(self.batch_size, 1, ishape[0], ishape[1]), filter_shape=filter_shape, poolsize=poolsize) self.layer0c = LeNetConvPoolLayer(rng, W=convW, b=convB, input=layer0c_input, image_shape=(self.batch_size, 1, ishape[0], ishape[1]), filter_shape=filter_shape, poolsize=poolsize) layer0aflattened = self.layer0a.output.flatten(2).reshape( (self.batch_size, nkerns[0] * sizeAfterPooling)) layer0bflattened = self.layer0b.output.flatten(2).reshape( (self.batch_size, nkerns[0] * sizeAfterPooling)) layer0cflattened = self.layer0c.output.flatten(2).reshape( (self.batch_size, nkerns[0] * sizeAfterPooling)) layer0outputSF = T.concatenate( [layer0aflattened, layer0bflattened, layer0cflattened], axis=1) layer0outputSFsize = 3 * (nkerns[0] * sizeAfterPooling) layer0outputNER1 = T.concatenate([layer0aflattened, layer0bflattened], axis=1) layer0outputNER2 = T.concatenate([layer0bflattened, layer0cflattened], axis=1) layer0outputNERsize = 2 * (nkerns[0] * sizeAfterPooling) layer2ner1 = HiddenLayer(rng, input=layer0outputNER1, n_in=layer0outputNERsize, n_out=hiddenunitsNer, activation=T.tanh) layer2ner2 = HiddenLayer(rng, input=layer0outputNER2, n_in=layer0outputNERsize, n_out=hiddenunitsNer, activation=T.tanh, W=layer2ner1.W, b=layer2ner1.b) # concatenate additional features to sentence representation self.additionalFeatures = T.matrix('additionalFeatures') self.additionalFeatsShaped = self.additionalFeatures.reshape( (self.batch_size, 1)) layer2SFinput = T.concatenate( [layer0outputSF, self.additionalFeatsShaped], axis=1) layer2SFinputSize = layer0outputSFsize + self.addInputSize layer2SF = HiddenLayer(rng, input=layer2SFinput, n_in=layer2SFinputSize, n_out=hiddenunits, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer3rel = LogisticRegression(input=layer2SF.output, n_in=hiddenunits, n_out=numSFclasses) layer3et = LogisticRegression(input=layer2ner1.output, n_in=hiddenunitsNer, n_out=numNERclasses) scoresForR1 = layer3rel.getScores(layer2SF.output) scoresForE1 = layer3et.getScores(layer2ner1.output) scoresForE2 = layer3et.getScores(layer2ner2.output) self.crfLayer = CRF(numClasses=numSFclasses + numNERclasses, rng=rng, batchsizeVar=self.batch_size, sequenceLength=3) scores = T.zeros((self.batch_size, 3, numSFclasses + numNERclasses)) scores = T.set_subtensor(scores[:, 0, numSFclasses:], scoresForE1) scores = T.set_subtensor(scores[:, 1, :numSFclasses], scoresForR1) scores = T.set_subtensor(scores[:, 2, numSFclasses:], scoresForE2) self.scores = scores self.y_conc = T.concatenate([ yNER1reshaped + numSFclasses, y_reshaped, yNER2reshaped + numSFclasses ], axis=1) # create a list of all model parameters self.paramList = [ self.crfLayer.params, layer3rel.params, layer3et.params, layer2SF.params, layer2ner1.params, self.layer0a.params ] self.params = [] for p in self.paramList: self.params += p logger.info(p) if not train: self.gotNetwork = 1 # load parameters if not os.path.isfile(networkfile): logger.error("network file does not exist") self.gotNetwork = 0 else: save_file = open(networkfile, 'rb') for p in self.params: p.set_value(cPickle.load(save_file), borrow=False) save_file.close() self.relation_scores_global = self.crfLayer.getProbForClass( self.scores, numSFclasses) self.predictions_global = self.crfLayer.getPrediction(self.scores)