def __init__(self): types, groundTruths, dataVectors = self.getData() # create all layers self.inputLayer = InputLayer(len(dataVectors[0])) self.hiddenLayer = HiddenLayer(len(dataVectors[0]), self.numOfHiddens, "lrelu") self.outputLayer = OutputLayer(self.numOfHiddens, len(types), "lrelu") t0 = time.time() self.trainNetwork(types, groundTruths, dataVectors) t1 = time.time() print("\nTime: " + str(t1 - t0)) print(self.hiddenLayer.weights) print("------------------------------------") print(self.outputLayer.weights)
class ReadLayer(object): def __init__(self, rng, h_shape, image_shape, N, name='Default_readlayer'): print('Building layer: ' + name) self.lin_transform = HiddenLayer( rng, n_in=h_shape[0] * h_shape[1], n_out=4, activation=None, irange=0.001, name='readlayer: linear transformation') self.reader = Reader( rng, image_shape=image_shape, N=N, name='readlayer: reader') self.params = self.lin_transform.params def one_step(self, h, image): linear = self.lin_transform.one_step(h) read, g_x, g_y, delta, sigma_sq = self.reader.one_step(linear, image) return read, g_x, g_y, delta, sigma_sq
def __init__(self, input, input_dims, target): self.input = input self.target = target conv_compat_shape = (1, 1, input_dims[0], input_dims[1]) conv_input = self.input.reshape(conv_compat_shape) layer1 = ConvPoolLayer(rng, input=conv_input, name='C1', filter_shape=(20, 1, 3, 3), input_shape=conv_compat_shape, poolsize=(2, 2)) layer2 = ConvPoolLayer(rng, input=layer1.output, name='C2', filter_shape=(20, 20, 3, 3), input_shape=layer1.output_shape, poolsize=(2, 2)) layer3 = HiddenLayer(rng, input=layer2.output.flatten(ndim=2), n_in=reduce(lambda x, y: x * y, layer2.output_shape), n_out=10, activation=T.tanh, name='output') self.output = T.nnet.softmax(layer3.output) self.params = layer1.params + layer2.params + layer3.params
def initialize(self): self.hiddenLayers = [] self.params = [] input = self.input rng = self.rng n_out = self.n_out path = self.get_path() fromFile = (path is not None) and os.path.exists( path ) if fromFile: with open(path, 'r') as file: print 'loading mlp file from file...', path d = cPickle.load(file) savedhiddenLayers = d[0] saved_logRegressionLayer = d[1] self.n_in = d[2] self.n_hidden = d[3] next_input = input next_n_in = self.n_in print 'self.n_hidden:', self.n_hidden for n_h in self.n_hidden: hl = HiddenLayer(rng=rng, input=next_input, n_in=next_n_in, n_out=n_h, activation=self.activation) next_input = hl.output next_n_in = n_h self.hiddenLayers.append(hl) self.params += hl.params self.logRegressionLayer = LogisticRegression( input=self.hiddenLayers[-1].output, n_in=self.n_hidden[-1], n_out=n_out) self.params += self.logRegressionLayer.params self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood self.errors = self.logRegressionLayer.errors self.p_y_given_x = self.logRegressionLayer.p_y_given_x self.y_pred = self.logRegressionLayer.y_pred if fromFile: for hl, shl in zip(self.hiddenLayers, savedhiddenLayers): hl.W.set_value(shl.W.get_value()) hl.b.set_value(shl.b.get_value()) self.logRegressionLayer.W.set_value(saved_logRegressionLayer.W.get_value()) self.logRegressionLayer.b.set_value(saved_logRegressionLayer.b.get_value()) self.cost = self.negative_log_likelihood
def __init__(self, rng, h_shape, image_shape, N, name='Default_readlayer'): print('Building layer: ' + name) self.lin_transform = HiddenLayer( rng, n_in=h_shape[0] * h_shape[1], n_out=4, activation=None, irange=0.001, name='readlayer: linear transformation') self.reader = Reader( rng, image_shape=image_shape, N=N, name='readlayer: reader') self.params = self.lin_transform.params
def __init__(self, input, input_dims, target): self.input = input self.target = target num_in = input_dims[0] * input_dims[1] layer1 = HiddenLayer(rng, input=input.flatten(), n_in=num_in, n_out=10, activation=T.tanh, name='FC1') layer2 = HiddenLayer(rng, input=layer1.output, n_in=10, n_out=10, activation=T.tanh, name='output') self.output = T.nnet.softmax(layer2.output) self.params = layer1.params + layer2.params
def __init__(self, input_dims, learning_rate, batch_size): self.input = T.tensor3(name='input', dtype=theano.config.floatX) self.target = T.matrix(name="target", dtype=theano.config.floatX) self.h_tm1 = T.matrix(name="hidden_output", dtype=theano.config.floatX) self.c_tm1 = T.matrix(name="hidden_state", dtype=theano.config.floatX) self.learning_rate = learning_rate N = 12 self.lstm_layer_sizes = [128, 128] self.read_layer = ReadLayer(rng, h_shape=(reduce(lambda x, y: x + y, self.lstm_layer_sizes), 1), image_shape=input_dims, N=N, name='Read Layer') self.conv_layer = ConvPoolLayer( rng, filter_shape=(30, 1, 3, 3), input_shape=(1, N, N), ) self.lstm_layer1 = LSTMLayer(rng, n_in=N * N, n_out=self.lstm_layer_sizes[0], name='LSTM1') self.lstm_layer2 = LSTMLayer(rng, n_in=self.lstm_layer_sizes[0], n_out=self.lstm_layer_sizes[1], name='LSTM2') self.output_layer = HiddenLayer(rng, n_in=self.lstm_layer_sizes[0] + self.lstm_layer_sizes[1] + 5 * 5 * 30, n_out=10, activation=None, name='output') self.params = self.read_layer.params + self.lstm_layer1.params +\ self.lstm_layer2.params + self.output_layer.params
def __init__(self, input_dims, learning_rate, batch_size): self.input = T.tensor3(name='input', dtype=theano.config.floatX) self.target = T.matrix(name="target", dtype=theano.config.floatX) self.h_tm1 = T.matrix(name="hidden_output", dtype=theano.config.floatX) self.c_tm1 = T.matrix(name="hidden_state", dtype=theano.config.floatX) self.learning_rate = learning_rate N = 12 self.lstm_layer_sizes = [128, 128] self.read_layer = ReadLayer( rng, h_shape=(reduce(lambda x, y: x + y, self.lstm_layer_sizes), 1), image_shape=input_dims, N=N, name='Read Layer' ) self.conv_layer = ConvPoolLayer( rng, filter_shape=(30, 1, 3, 3), input_shape=(1, N, N), ) self.lstm_layer1 = LSTMLayer( rng, n_in=N*N, n_out=self.lstm_layer_sizes[0], name='LSTM1' ) self.lstm_layer2 = LSTMLayer( rng, n_in=self.lstm_layer_sizes[0], n_out=self.lstm_layer_sizes[1], name='LSTM2' ) self.output_layer = HiddenLayer( rng, n_in=self.lstm_layer_sizes[0] + self.lstm_layer_sizes[1] + 5*5*30, n_out=10, activation=None, name='output' ) self.params = self.read_layer.params + self.lstm_layer1.params +\ self.lstm_layer2.params + self.output_layer.params
def __init__(self, rng, input1, input2, n_in, n_hidden1, n_hidden2, n_out, model=None, gamma=0.99): self.rng = rng self.n_in = n_in self.n_hidden1 = n_hidden1 self.n_hidden2 = n_hidden2 self.n_out = n_out if model is None: model = self.init_model() self.hiddenLayer1 = HiddenLayer( input1=input1, n_in=n_in, n_out=n_hidden1, activation=T.nnet.relu, # activation=T.tanh, W_values=model[0], b_values=model[2]) self.hiddenLayer2 = HiddenLayer( input1=self.hiddenLayer1.output1, n_in=n_hidden1, n_out=n_hidden2, activation=T.nnet.relu, # activation=T.tanh, W_values=model[1], b_values=model[3]) self.logRegressionLayer = LogisticRegression( input1=self.hiddenLayer2.output1, n_in=n_hidden2, n_out=n_out, W_values=model[4], b_values=model[5]) self.hiddenLayer1_ddqn = HiddenLayer( input1=input2, n_in=n_in, n_out=n_hidden1, activation=T.nnet.relu, # activation=T.tanh, W_values=model[0], b_values=model[2]) self.hiddenLayer2_ddqn = HiddenLayer( input1=self.hiddenLayer1_ddqn.output1, n_in=n_hidden1, n_out=n_hidden2, activation=T.nnet.relu, # activation=T.tanh, W_values=model[1], b_values=model[3]) self.logRegressionLayer_ddqn = LogisticRegression( input1=self.hiddenLayer2_ddqn.output1, n_in=n_hidden2, n_out=n_out, W_values=model[4], b_values=model[5]) self.hiddenLayer1_t = HiddenLayer(input1=input2, n_in=n_in, n_out=n_hidden1, activation=T.nnet.relu, W_values=model[0], b_values=model[2]) self.hiddenLayer2_t = HiddenLayer(input1=self.hiddenLayer1_t.output1, n_in=n_hidden1, n_out=n_hidden2, activation=T.nnet.relu, W_values=model[1], b_values=model[3]) self.logRegressionLayer_t = LogisticRegression( input1=self.hiddenLayer2_t.output1, n_in=n_hidden2, n_out=n_out, W_values=model[4], b_values=model[5]) self.L1 = (abs(self.hiddenLayer1.W).sum() + abs(self.hiddenLayer2.W).sum() + abs(self.logRegressionLayer.W).sum()) # square of L2 norm ; one regularization option is to enforce # square of L2 norm to be small self.L2_sqr = ((self.hiddenLayer1.W**2).sum() + (self.hiddenLayer2.W**2).sum() + (self.logRegressionLayer.W**2).sum()) self.params = self.hiddenLayer1.params + self.hiddenLayer2.params + self.logRegressionLayer.params # self.params = self.hiddenLayer1.params+ self.logRegressionLayer.params # end-snippet-3 # keep track of model input self.Qs = self.logRegressionLayer.Q self.Qddqn = self.logRegressionLayer_ddqn.Q self.Qsp = self.logRegressionLayer_t.Q self.input1 = input1 self.input2 = input2 ##################### # self.aidx = T.cast(T.round((input1[:,64]*4.0+4.0)*7.0+(input1[:,65]*3.5+3.5)), 'int32') # self.aidx = T.cast(T.argmax(input1[:,64:73],axis=1)*7+T.argmax(input1[:,73:80],axis=1), 'int32') self.aidx = T.cast(input1[:, 5] + 1, 'int32') ################## # self.cost = T.mean(T.max(self.logRegressionLayer.Qsp,axis=1)) # self.cost = T.mean(T.max(self.logRegressionLayer.Qsp,axis=1)-T.max(self.logRegressionLayer.Qs,axis=1)) # self.cost = self.Qs[0,0]-self.Qsp[0,0] self.target = input1[:, 0] + gamma * T.max(self.Qsp, axis=1) self.action_ddqn = T.argmax(self.Qddqn, axis=1) self.target_ddqn = input1[:, 0] + gamma * self.Qsp[ T.arange(self.action_ddqn.shape[0]), self.action_ddqn] self.Qcost = T.mean( 0.5 * (self.target_ddqn - self.Qs[T.arange(self.aidx.shape[0]), self.aidx])**2) self.Qcost_v = 0.5 * ( self.target_ddqn - self.Qs[T.arange(self.aidx.shape[0]), self.aidx])**2 # self.Qcost = T.mean(0.5*(self.target-self.Qs[T.arange(self.aidx.shape[0]),self.aidx])**2) self.cost = self.Qcost #+0.0001*self.L2_sqr self.cost_v = self.Qcost_v # self.errors = T.sqrt(T.mean(((input1[:,0]+0.97*T.max(self.logRegressionLayer.Qsp,axis=1)-T.max(self.logRegressionLayer.Qs,axis=1))/(input1[:,0]+0.95*T.max(self.logRegressionLayer.Qsp,axis=1)))**2)) #######parameters self.Wh1 = self.hiddenLayer1.W self.Wh2 = self.hiddenLayer2.W self.bh1 = self.hiddenLayer1.b self.bh2 = self.hiddenLayer2.b self.OW = self.logRegressionLayer.W self.Ob = self.logRegressionLayer.b self.Wh1t = self.hiddenLayer1_t.W self.Wh2t = self.hiddenLayer2_t.W self.bh1t = self.hiddenLayer1_t.b self.bh2t = self.hiddenLayer2_t.b self.OWt = self.logRegressionLayer_t.W self.Obt = self.logRegressionLayer_t.b self.Wh1ddqn = self.hiddenLayer1_ddqn.W self.Wh2ddqn = self.hiddenLayer2_ddqn.W self.bh1ddqn = self.hiddenLayer1_ddqn.b self.bh2ddqn = self.hiddenLayer2_ddqn.b self.OWddqn = self.logRegressionLayer_ddqn.W self.Obddqn = self.logRegressionLayer_ddqn.b
print(layer.feedforward(input)) #print(epoch) # to avoid unused variable error; delete later # testing np.random.seed(1) # data x = np.random.rand(1, 10) y = np.random.rand(1, 3) # network NN = NeuralNetwork(x, y) NN.add(Inputlayer(x.shape, (1, 5))) NN.add(HiddenLayer((1, 5), (1, 9), tanh)) NN.add(HiddenLayer((1, 9), (1, 3), tanh)) NN.add(OutputLayer((1, 3), y.shape)) # feedforward output1 = NN.layers[0].feedforward(x) output2 = NN.layers[1].feedforward(output1) output3 = NN.layers[2].feedforward(output2) output4 = NN.layers[3].feedforward(output3) #print(NN.fit(x,y,1,1)) """ print("outout1",output1) print("outout2",output2) print("outout3",output3) print("outout4",output4)
def __init__(self, rng, input1, input2, n_in, n_hidden1, n_hidden2, n_out, model=None, gamma=0.99): self.rng = rng self.n_in = n_in self.n_hidden1 = n_hidden1 self.n_hidden2 = n_hidden2 self.n_out = n_out if model is None: model = self.init_model() self.VhiddenLayer1 = HiddenLayer( input1=input1, n_in=n_in, n_out=n_hidden1, activation=T.nnet.relu, # activation=T.tanh, W_values=model[0], b_values=model[2]) self.VhiddenLayer2 = HiddenLayer( input1=self.VhiddenLayer1.output1, n_in=n_hidden1, n_out=n_hidden2, activation=T.nnet.relu, # activation=T.tanh, W_values=model[1], b_values=model[3]) self.VlogRegressionLayer = LogisticRegression( input1=self.VhiddenLayer2.output1, n_in=n_hidden2, n_out=1, W_values=model[4], b_values=model[5]) self.AhiddenLayer1 = HiddenLayer( input1=input1, n_in=n_in, n_out=n_hidden1, activation=T.nnet.relu, # activation=T.tanh, W_values=model[6], b_values=model[8]) self.AhiddenLayer2 = HiddenLayer( input1=self.AhiddenLayer1.output1, n_in=n_hidden1, n_out=n_hidden2, activation=T.nnet.relu, # activation=T.tanh, W_values=model[7], b_values=model[9]) self.AlogRegressionLayer = LogisticRegression( input1=self.AhiddenLayer2.output1, n_in=n_hidden2, n_out=n_out, W_values=model[10], b_values=model[11]) #######ddqn########## self.VhiddenLayer1_ddqn = HiddenLayer( input1=input2, n_in=n_in, n_out=n_hidden1, activation=T.nnet.relu, # activation=T.tanh, W_values=model[0], b_values=model[2]) self.VhiddenLayer2_ddqn = HiddenLayer( input1=self.VhiddenLayer1_ddqn.output1, n_in=n_hidden1, n_out=n_hidden2, activation=T.nnet.relu, # activation=T.tanh, W_values=model[1], b_values=model[3]) self.VlogRegressionLayer_ddqn = LogisticRegression( input1=self.VhiddenLayer2_ddqn.output1, n_in=n_hidden2, n_out=1, W_values=model[4], b_values=model[5]) self.AhiddenLayer1_ddqn = HiddenLayer( input1=input2, n_in=n_in, n_out=n_hidden1, activation=T.nnet.relu, # activation=T.tanh, W_values=model[6], b_values=model[8]) self.AhiddenLayer2_ddqn = HiddenLayer( input1=self.AhiddenLayer1_ddqn.output1, n_in=n_hidden1, n_out=n_hidden2, activation=T.nnet.relu, # activation=T.tanh, W_values=model[7], b_values=model[9]) self.AlogRegressionLayer_ddqn = LogisticRegression( input1=self.AhiddenLayer2_ddqn.output1, n_in=n_hidden2, n_out=n_out, W_values=model[10], b_values=model[11]) ######target########## self.VhiddenLayer1_t = HiddenLayer( input1=input2, n_in=n_in, n_out=n_hidden1, activation=T.nnet.relu, # activation=T.tanh, W_values=model[0], b_values=model[2]) self.VhiddenLayer2_t = HiddenLayer( input1=self.VhiddenLayer1_t.output1, n_in=n_hidden1, n_out=n_hidden2, activation=T.nnet.relu, # activation=T.tanh, W_values=model[1], b_values=model[3]) self.VlogRegressionLayer_t = LogisticRegression( input1=self.VhiddenLayer2_t.output1, n_in=n_hidden2, n_out=1, W_values=model[4], b_values=model[5]) self.AhiddenLayer1_t = HiddenLayer( input1=input2, n_in=n_in, n_out=n_hidden1, activation=T.nnet.relu, # activation=T.tanh, W_values=model[6], b_values=model[8]) self.AhiddenLayer2_t = HiddenLayer( input1=self.AhiddenLayer1_t.output1, n_in=n_hidden1, n_out=n_hidden2, activation=T.nnet.relu, # activation=T.tanh, W_values=model[7], b_values=model[9]) self.AlogRegressionLayer_t = LogisticRegression( input1=self.AhiddenLayer2_t.output1, n_in=n_hidden2, n_out=n_out, W_values=model[10], b_values=model[11]) self.params = self.VhiddenLayer1.params + self.VhiddenLayer2.params + self.VlogRegressionLayer.params + self.AhiddenLayer1.params + self.AhiddenLayer2.params + self.AlogRegressionLayer.params # keep track of model input self.Qs = T.extra_ops.repeat( self.VlogRegressionLayer.Q, n_out, axis=1) + ( self.AlogRegressionLayer.Q - T.mean(self.AlogRegressionLayer.Q, axis=1, keepdims=True)) self.Qddqn = T.extra_ops.repeat( self.VlogRegressionLayer_ddqn.Q, n_out, axis=1) + ( self.AlogRegressionLayer_ddqn.Q - T.mean(self.AlogRegressionLayer_ddqn.Q, axis=1, keepdims=True)) self.Qsp = T.extra_ops.repeat( self.VlogRegressionLayer_t.Q, n_out, axis=1) + ( self.AlogRegressionLayer_t.Q - T.mean(self.AlogRegressionLayer_t.Q, axis=1, keepdims=True)) # self.Qs = (self.AlogRegressionLayer.Q - T.mean(self.AlogRegressionLayer.Q,axis=1,keepdims=True)) + self.VlogRegressionLayer.Q # self.Qddqn = (self.AlogRegressionLayer_ddqn.Q - T.mean(self.AlogRegressionLayer_ddqn.Q,axis=1,keepdims=True)) + self.VlogRegressionLayer_ddqn.Q # self.Qsp = (self.AlogRegressionLayer_t.Q - T.mean(self.AlogRegressionLayer_t.Q,axis=1,keepdims=True)) + self.VlogRegressionLayer_t.Q # self.Qs = T.extra_ops.repeat(self.VlogRegressionLayer.Q,n_out,axis=1) + (self.AlogRegressionLayer.Q - T.extra_ops.repeat(T.mean(self.AlogRegressionLayer.Q,axis=1,keepdims=True),n_out,axis=1)) # self.Qddqn = T.extra_ops.repeat(self.VlogRegressionLayer_ddqn.Q,n_out,axis=1) + (self.AlogRegressionLayer_ddqn.Q - T.extra_ops.repeat(T.mean(self.AlogRegressionLayer_ddqn.Q,axis=1,keepdims=True),n_out,axis=1)) # self.Qsp = T.extra_ops.repeat(self.VlogRegressionLayer_t.Q,n_out,axis=1) + (self.AlogRegressionLayer_t.Q - T.extra_ops.repeat(T.mean(self.AlogRegressionLayer_t.Q,axis=1,keepdims=True),n_out,axis=1)) self.input1 = input1 self.input2 = input2 self.aidx = T.cast( T.round((input1[:, 64] * 4.0 + 4.0) * 7.0 + (input1[:, 65] * 3.5 + 3.5)), 'int32') # self.cost = T.mean(T.max(self.logRegressionLayer.Qsp,axis=1)) # self.cost = T.mean(T.max(self.logRegressionLayer.Qsp,axis=1)-T.max(self.logRegressionLayer.Qs,axis=1)) # self.cost = self.Qs[0,0]-self.Qsp[0,0] self.target = input1[:, 0] + gamma * T.max(self.Qsp, axis=1) self.action_ddqn = T.argmax(self.Qddqn, axis=1) self.target_ddqn = input1[:, 0] + gamma * self.Qsp[ T.arange(self.action_ddqn.shape[0]), self.action_ddqn] self.Qcost = T.mean( 0.5 * (self.target_ddqn - self.Qs[T.arange(self.aidx.shape[0]), self.aidx])**2) # self.Qcost = T.mean(0.5*(self.target-self.Qs[T.arange(self.aidx.shape[0]),self.aidx])**2) self.cost = self.Qcost #+0.0001*self.L2_sqr # self.errors = T.sqrt(T.mean(((input1[:,0]+0.97*T.max(self.logRegressionLayer.Qsp,axis=1)-T.max(self.logRegressionLayer.Qs,axis=1))/(input1[:,0]+0.95*T.max(self.logRegressionLayer.Qsp,axis=1)))**2)) #######parameters self.VWh1 = self.VhiddenLayer1.W self.VWh2 = self.VhiddenLayer2.W self.Vbh1 = self.VhiddenLayer1.b self.Vbh2 = self.VhiddenLayer2.b self.VOW = self.VlogRegressionLayer.W self.VOb = self.VlogRegressionLayer.b self.AWh1 = self.AhiddenLayer1.W self.AWh2 = self.AhiddenLayer2.W self.Abh1 = self.AhiddenLayer1.b self.Abh2 = self.AhiddenLayer2.b self.AOW = self.AlogRegressionLayer.W self.AOb = self.AlogRegressionLayer.b self.VWh1t = self.VhiddenLayer1_t.W self.VWh2t = self.VhiddenLayer2_t.W self.Vbh1t = self.VhiddenLayer1_t.b self.Vbh2t = self.VhiddenLayer2_t.b self.VOWt = self.VlogRegressionLayer_t.W self.VObt = self.VlogRegressionLayer_t.b self.AWh1t = self.AhiddenLayer1_t.W self.AWh2t = self.AhiddenLayer2_t.W self.Abh1t = self.AhiddenLayer1_t.b self.Abh2t = self.AhiddenLayer2_t.b self.AOWt = self.AlogRegressionLayer_t.W self.AObt = self.AlogRegressionLayer_t.b self.VWh1ddqn = self.VhiddenLayer1_ddqn.W self.VWh2ddqn = self.VhiddenLayer2_ddqn.W self.Vbh1ddqn = self.VhiddenLayer1_ddqn.b self.Vbh2ddqn = self.VhiddenLayer2_ddqn.b self.VOWddqn = self.VlogRegressionLayer_ddqn.W self.VObddqn = self.VlogRegressionLayer_ddqn.b self.AWh1ddqn = self.AhiddenLayer1_ddqn.W self.AWh2ddqn = self.AhiddenLayer2_ddqn.W self.Abh1ddqn = self.AhiddenLayer1_ddqn.b self.Abh2ddqn = self.AhiddenLayer2_ddqn.b self.AOWddqn = self.AlogRegressionLayer_ddqn.W self.AObddqn = self.AlogRegressionLayer_ddqn.b
class TestLSTM(AbstractModel): def __init__(self, input_dims, learning_rate, batch_size): self.input = T.tensor3(name='input', dtype=theano.config.floatX) self.target = T.matrix(name="target", dtype=theano.config.floatX) self.h_tm1 = T.matrix(name="hidden_output", dtype=theano.config.floatX) self.c_tm1 = T.matrix(name="hidden_state", dtype=theano.config.floatX) self.learning_rate = learning_rate N = 12 self.lstm_layer_sizes = [128, 128] self.read_layer = ReadLayer( rng, h_shape=(reduce(lambda x, y: x + y, self.lstm_layer_sizes), 1), image_shape=input_dims, N=N, name='Read Layer' ) self.conv_layer = ConvPoolLayer( rng, filter_shape=(30, 1, 3, 3), input_shape=(1, N, N), ) self.lstm_layer1 = LSTMLayer( rng, n_in=N*N, n_out=self.lstm_layer_sizes[0], name='LSTM1' ) self.lstm_layer2 = LSTMLayer( rng, n_in=self.lstm_layer_sizes[0], n_out=self.lstm_layer_sizes[1], name='LSTM2' ) self.output_layer = HiddenLayer( rng, n_in=self.lstm_layer_sizes[0] + self.lstm_layer_sizes[1] + 5*5*30, n_out=10, activation=None, name='output' ) self.params = self.read_layer.params + self.lstm_layer1.params +\ self.lstm_layer2.params + self.output_layer.params def get_predict_output(self, input, h_tm1, c_tm1): h, c, output, g_y, g_x, read, delta, sigma_sq = self.recurrent_step(input, h_tm1, c_tm1) return output, h, c, read, g_x, g_y, delta, sigma_sq def get_train_output(self, images, batch_size): images = images.dimshuffle([1, 0, 2, 3]) h0, c0 = self.get_initial_state(batch_size) [h, c, output, g_y, g_x, _, _, _], _ = theano.scan(fn=self.recurrent_step, outputs_info=[ h0, c0, None, None, None, None, None, None], sequences=images, ) return output, g_y, g_x def recurrent_step(self, image, h_tm1, c_tm1): read, g_x, g_y, delta, sigma_sq = self.read_layer.one_step(h_tm1, image) read_ = read.flatten(ndim=2) h_1, c_1 =\ self.lstm_layer1.one_step(read_, h_tm1[:, 0:self.lstm_layer_sizes[0]], c_tm1[:, 0:self.lstm_layer_sizes[0]]) h_2, c_2 =\ self.lstm_layer2.one_step(h_1, h_tm1[:, self.lstm_layer_sizes[0]:], c_tm1[:, self.lstm_layer_sizes[0]:] ) h = T.concatenate([h_1, h_2], axis=1) c = T.concatenate([c_1, c_2], axis=1) conv = self.conv_layer.one_step(read.dimshuffle([0, 'x', 1, 2])) conv = conv.flatten(ndim=2) lin_output = self.output_layer.one_step(T.concatenate([h_1, h_2, conv], axis=1)) output = T.nnet.softmax(lin_output) return [h, c, output, g_y, g_x, read, delta, sigma_sq] def step_with_att(self, h_tm1, c_tm1, image): read, g_x, g_y, delta, sigma_sq = self.read_layer.one_step( h_tm1, image) read_ = read_.flatten(ndim=2) h_1, c_1 =\ self.lstm_layer1.one_step(read_, h_tm1[:, 0:self.lstm_layer_sizes[0]], c_tm1[:, 0:self.lstm_layer_sizes[0]]) h_2, c_2 =\ self.lstm_layer2.one_step(h_1, h_tm1[:, self.lstm_layer_sizes[0]:], c_tm1[:, self.lstm_layer_sizes[0]:] ) return [h, c, read, g_x, g_y, delta, sigma_sq] def compile(self, train_batch_size): print("Compiling functions...") train_input = T.tensor4() target_y = T.matrix() target_x = T.matrix() train_output, g_y, g_x = self.get_train_output(train_input, train_batch_size) classification_loss = self.get_NLL_cost(train_output[-1], self.target) tracking_loss = self.get_tracking_cost(g_y, g_x, target_y, target_x) loss = 5 * classification_loss + tracking_loss updates = Adam(loss, self.params, lr=self.learning_rate) # updates = self.get_updates(loss, self.params, self.learning_rate) self.train_func = theano.function( inputs=[train_input, self.target, target_y, target_x], outputs=[train_output[-1], loss], updates=updates, allow_input_downcast=True ) h_tm1 = T.matrix() c_tm1 = T.matrix() predict_output, h, c, read, g_x, g_y, delta, sigma_sq = \ self.get_predict_output(self.input, h_tm1, c_tm1) self.predict_func = theano.function(inputs=[self.input, h_tm1, c_tm1], outputs=[predict_output, h, c, read, g_x, g_y, delta, sigma_sq], allow_input_downcast=True) print("Done!") def train(self, x, y, target_y, target_x): ''' x is in the form of [batch, time, height, width] y is [batch, target] ''' prediction, loss = self.train_func(x, y, target_y, target_x) return prediction, loss def get_initial_state(self, batch_size, shared=True): total_states = reduce(lambda x, y: x + y, self.lstm_layer_sizes) h0 = np.zeros((batch_size, total_states), dtype=theano.config.floatX) c0 = np.zeros((batch_size, total_states), dtype=theano.config.floatX) if shared: h0 = theano.shared( h0, name='h0', borrow=True) c0 = theano.shared( c0, name='c0', borrow=True) return h0, c0 # initial_state = self.lstm_layer1.initial_hidden_state # initial_state = initial_state.dimshuffle( # ['x', 0]).repeat(batch_size, axis=0) # return initial_state def predict(self, x, reset=True, batch_size=1): if reset: self.predict_h, self.predict_c = self.get_initial_state( batch_size, shared=False) if len(x.shape) == 2: x = np.expand_dims(x, axis=0) prediction, self.predict_h, self.predict_c, read, g_x, g_y, delta, sigma_sq =\ self.predict_func(x, self.predict_h, self.predict_c) return prediction, [read, g_x, g_y, delta, sigma_sq] def get_NLL_cost(self, output, target): NLL = -T.sum((T.log(output) * target), axis=1) return NLL.mean() def get_tracking_cost(self, g_y, g_x, target_y, target_x): loss = ( (target_y - g_y) ** 2) + ((target_x - g_x) ** 2) loss = T.sqrt(loss + 1e-4) return loss.mean() def get_updates(self, cost, params, learning_rate): gradients = T.grad(cost, params) updates = updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, gradients) ] return updates def deserialize(self, hidden): result = [] start = 0 for size in self.lstm_layer_sizes: result.append(hidden[start:size].reshape((size, 1))) start = start + size return result
class TestLSTM(AbstractModel): def __init__(self, input_dims, learning_rate, batch_size): self.input = T.tensor3(name='input', dtype=theano.config.floatX) self.target = T.matrix(name="target", dtype=theano.config.floatX) self.h_tm1 = T.matrix(name="hidden_output", dtype=theano.config.floatX) self.c_tm1 = T.matrix(name="hidden_state", dtype=theano.config.floatX) self.learning_rate = learning_rate N = 12 self.lstm_layer_sizes = [128, 128] self.read_layer = ReadLayer(rng, h_shape=(reduce(lambda x, y: x + y, self.lstm_layer_sizes), 1), image_shape=input_dims, N=N, name='Read Layer') self.conv_layer = ConvPoolLayer( rng, filter_shape=(30, 1, 3, 3), input_shape=(1, N, N), ) self.lstm_layer1 = LSTMLayer(rng, n_in=N * N, n_out=self.lstm_layer_sizes[0], name='LSTM1') self.lstm_layer2 = LSTMLayer(rng, n_in=self.lstm_layer_sizes[0], n_out=self.lstm_layer_sizes[1], name='LSTM2') self.output_layer = HiddenLayer(rng, n_in=self.lstm_layer_sizes[0] + self.lstm_layer_sizes[1] + 5 * 5 * 30, n_out=10, activation=None, name='output') self.params = self.read_layer.params + self.lstm_layer1.params +\ self.lstm_layer2.params + self.output_layer.params def get_predict_output(self, input, h_tm1, c_tm1): h, c, output, g_y, g_x, read, delta, sigma_sq = self.recurrent_step( input, h_tm1, c_tm1) return output, h, c, read, g_x, g_y, delta, sigma_sq def get_train_output(self, images, batch_size): images = images.dimshuffle([1, 0, 2, 3]) h0, c0 = self.get_initial_state(batch_size) [h, c, output, g_y, g_x, _, _, _], _ = theano.scan( fn=self.recurrent_step, outputs_info=[h0, c0, None, None, None, None, None, None], sequences=images, ) return output, g_y, g_x def recurrent_step(self, image, h_tm1, c_tm1): read, g_x, g_y, delta, sigma_sq = self.read_layer.one_step( h_tm1, image) read_ = read.flatten(ndim=2) h_1, c_1 =\ self.lstm_layer1.one_step(read_, h_tm1[:, 0:self.lstm_layer_sizes[0]], c_tm1[:, 0:self.lstm_layer_sizes[0]]) h_2, c_2 =\ self.lstm_layer2.one_step(h_1, h_tm1[:, self.lstm_layer_sizes[0]:], c_tm1[:, self.lstm_layer_sizes[0]:] ) h = T.concatenate([h_1, h_2], axis=1) c = T.concatenate([c_1, c_2], axis=1) conv = self.conv_layer.one_step(read.dimshuffle([0, 'x', 1, 2])) conv = conv.flatten(ndim=2) lin_output = self.output_layer.one_step( T.concatenate([h_1, h_2, conv], axis=1)) output = T.nnet.softmax(lin_output) return [h, c, output, g_y, g_x, read, delta, sigma_sq] def step_with_att(self, h_tm1, c_tm1, image): read, g_x, g_y, delta, sigma_sq = self.read_layer.one_step( h_tm1, image) read_ = read_.flatten(ndim=2) h_1, c_1 =\ self.lstm_layer1.one_step(read_, h_tm1[:, 0:self.lstm_layer_sizes[0]], c_tm1[:, 0:self.lstm_layer_sizes[0]]) h_2, c_2 =\ self.lstm_layer2.one_step(h_1, h_tm1[:, self.lstm_layer_sizes[0]:], c_tm1[:, self.lstm_layer_sizes[0]:] ) return [h, c, read, g_x, g_y, delta, sigma_sq] def compile(self, train_batch_size): print("Compiling functions...") train_input = T.tensor4() target_y = T.matrix() target_x = T.matrix() train_output, g_y, g_x = self.get_train_output(train_input, train_batch_size) classification_loss = self.get_NLL_cost(train_output[-1], self.target) tracking_loss = self.get_tracking_cost(g_y, g_x, target_y, target_x) loss = 5 * classification_loss + tracking_loss updates = Adam(loss, self.params, lr=self.learning_rate) # updates = self.get_updates(loss, self.params, self.learning_rate) self.train_func = theano.function( inputs=[train_input, self.target, target_y, target_x], outputs=[train_output[-1], loss], updates=updates, allow_input_downcast=True) h_tm1 = T.matrix() c_tm1 = T.matrix() predict_output, h, c, read, g_x, g_y, delta, sigma_sq = \ self.get_predict_output(self.input, h_tm1, c_tm1) self.predict_func = theano.function( inputs=[self.input, h_tm1, c_tm1], outputs=[predict_output, h, c, read, g_x, g_y, delta, sigma_sq], allow_input_downcast=True) print("Done!") def train(self, x, y, target_y, target_x): ''' x is in the form of [batch, time, height, width] y is [batch, target] ''' prediction, loss = self.train_func(x, y, target_y, target_x) return prediction, loss def get_initial_state(self, batch_size, shared=True): total_states = reduce(lambda x, y: x + y, self.lstm_layer_sizes) h0 = np.zeros((batch_size, total_states), dtype=theano.config.floatX) c0 = np.zeros((batch_size, total_states), dtype=theano.config.floatX) if shared: h0 = theano.shared(h0, name='h0', borrow=True) c0 = theano.shared(c0, name='c0', borrow=True) return h0, c0 # initial_state = self.lstm_layer1.initial_hidden_state # initial_state = initial_state.dimshuffle( # ['x', 0]).repeat(batch_size, axis=0) # return initial_state def predict(self, x, reset=True, batch_size=1): if reset: self.predict_h, self.predict_c = self.get_initial_state( batch_size, shared=False) if len(x.shape) == 2: x = np.expand_dims(x, axis=0) prediction, self.predict_h, self.predict_c, read, g_x, g_y, delta, sigma_sq =\ self.predict_func(x, self.predict_h, self.predict_c) return prediction, [read, g_x, g_y, delta, sigma_sq] def get_NLL_cost(self, output, target): NLL = -T.sum((T.log(output) * target), axis=1) return NLL.mean() def get_tracking_cost(self, g_y, g_x, target_y, target_x): loss = ((target_y - g_y)**2) + ((target_x - g_x)**2) loss = T.sqrt(loss + 1e-4) return loss.mean() def get_updates(self, cost, params, learning_rate): gradients = T.grad(cost, params) updates = updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, gradients)] return updates def deserialize(self, hidden): result = [] start = 0 for size in self.lstm_layer_sizes: result.append(hidden[start:size].reshape((size, 1))) start = start + size return result
class Network: numOfHiddens = 8 # numOfOutputs = 4 bias = 0.1 lrate = 0.01 sumError = 0 successRateLastTurn = .5 precision = 0.00001 continueTraining = True numOfSuccess = 1 numOfFailure = 0 def __init__(self): types, groundTruths, dataVectors = self.getData() # create all layers self.inputLayer = InputLayer(len(dataVectors[0])) self.hiddenLayer = HiddenLayer(len(dataVectors[0]), self.numOfHiddens, "lrelu") self.outputLayer = OutputLayer(self.numOfHiddens, len(types), "lrelu") t0 = time.time() self.trainNetwork(types, groundTruths, dataVectors) t1 = time.time() print("\nTime: " + str(t1 - t0)) print(self.hiddenLayer.weights) print("------------------------------------") print(self.outputLayer.weights) def getData(self): rawData = self.readCSV() shuffle(rawData) types, groundTruths = self.getGTs(rawData) dataVectors = np.array([self._assignBias(row) for row in rawData], float) return types, groundTruths, dataVectors def _assignBias(self, vector): vector[-1] = self.bias return vector def readCSV(self): with open("samples_4_classes_normalized.csv", mode="r") as dataFile: return list(csv.reader(dataFile))[1:] def getGTs(self, rawData): types = list({row[-1] for row in rawData}) groundTruths = np.full((len(rawData), len(types)), 0, int) for i, row in enumerate(rawData): groundTruths[i][types.index(row[-1])] = 1 return types, groundTruths def trainNetwork(self, types, groundTruths, dataVectors): epoCounter = 0 while self.continueTraining: epoCounter += 1 self.trainEpoch(types, groundTruths, dataVectors, str(epoCounter)) def trainEpoch(self, types, groundTruths, dataVectors, epoCounter): counter = 0 for vector, groundTruth in zip(dataVectors, groundTruths): self.feedSample(vector, groundTruth) # control operation counter += 1 if counter % 100 == 0: print("Epo: " + epoCounter + " Data: " + str(counter) + "/40000 Prec: " + " TErr: " + str(1 - self.sumError / counter) + " Clf SR: " + str(1 - self.numOfFailure / self.numOfSuccess), end="\r") if abs(self.successRateLastTurn - self.sumError / counter) < self.precision: self.continueTraining = False return self.successRateLastTurn = self.sumError / counter def feedSample(self, dataVector, groundTruth): actVectorInput = self.inputLayer.feedSample(dataVector) actVectorHidden = self.hiddenLayer.feedSample(actVectorInput) actVectorOutput = self.outputLayer.feedSample(actVectorHidden) errorVector = self.getErrorVector(actVectorOutput, groundTruth) self.sumError += sum(errorVector) / len(errorVector) self.predict(actVectorOutput, groundTruth) self.backprop(errorVector, actVectorHidden, actVectorInput) def getErrorVector(self, actVectorOutput, groundTruth): return [ truth - act for act, truth in zip(actVectorOutput, groundTruth) ] def backprop(self, errorVector, actVectorHidden, actVectorInput): self.hiddenLayer.backprop(self.lrate, actVectorInput, errorVector, self.outputLayer) self.outputLayer.backprop(self.lrate, errorVector, actVectorHidden) def predict(self, actVectorOutput, groundTruth): if groundTruth[np.argmax(actVectorOutput)] > 0: self.numOfSuccess += 1 else: self.numOfFailure += 1