Ejemplo n.º 1
0
    def __init__(self):
        types, groundTruths, dataVectors = self.getData()
        # create all layers
        self.inputLayer = InputLayer(len(dataVectors[0]))
        self.hiddenLayer = HiddenLayer(len(dataVectors[0]), self.numOfHiddens,
                                       "lrelu")
        self.outputLayer = OutputLayer(self.numOfHiddens, len(types), "lrelu")

        t0 = time.time()
        self.trainNetwork(types, groundTruths, dataVectors)
        t1 = time.time()
        print("\nTime: " + str(t1 - t0))
        print(self.hiddenLayer.weights)
        print("------------------------------------")
        print(self.outputLayer.weights)
Ejemplo n.º 2
0
class ReadLayer(object):

    def __init__(self, rng, h_shape, image_shape, N, name='Default_readlayer'):
        print('Building layer: ' + name)

        self.lin_transform = HiddenLayer(
            rng,
            n_in=h_shape[0] * h_shape[1],
            n_out=4,
            activation=None,
            irange=0.001,
            name='readlayer: linear transformation')

        self.reader = Reader(
            rng,
            image_shape=image_shape,
            N=N,
            name='readlayer: reader')

        self.params = self.lin_transform.params

    def one_step(self, h, image):
        linear = self.lin_transform.one_step(h)

        read, g_x, g_y, delta, sigma_sq = self.reader.one_step(linear, image)
        return read, g_x, g_y, delta, sigma_sq
Ejemplo n.º 3
0
    def __init__(self, input, input_dims, target):
        self.input = input
        self.target = target
        conv_compat_shape = (1, 1, input_dims[0], input_dims[1])
        conv_input = self.input.reshape(conv_compat_shape)

        layer1 = ConvPoolLayer(rng,
                               input=conv_input,
                               name='C1',
                               filter_shape=(20, 1, 3, 3),
                               input_shape=conv_compat_shape,
                               poolsize=(2, 2))

        layer2 = ConvPoolLayer(rng,
                               input=layer1.output,
                               name='C2',
                               filter_shape=(20, 20, 3, 3),
                               input_shape=layer1.output_shape,
                               poolsize=(2, 2))

        layer3 = HiddenLayer(rng,
                             input=layer2.output.flatten(ndim=2),
                             n_in=reduce(lambda x, y: x * y,
                                         layer2.output_shape),
                             n_out=10,
                             activation=T.tanh,
                             name='output')
        self.output = T.nnet.softmax(layer3.output)
        self.params = layer1.params + layer2.params + layer3.params
Ejemplo n.º 4
0
    def initialize(self):

        self.hiddenLayers = []
        self.params = []
        input = self.input
        rng = self.rng
        n_out = self.n_out

        path = self.get_path()

        fromFile = (path is not None) and os.path.exists( path )

        if fromFile: 
            with open(path, 'r') as file:
                print 'loading mlp file from file...', path
                d = cPickle.load(file)
                savedhiddenLayers        = d[0]
                saved_logRegressionLayer = d[1]
                self.n_in                = d[2]
                self.n_hidden            = d[3]

        next_input = input
        next_n_in = self.n_in
           

        print 'self.n_hidden:', self.n_hidden
         
        for n_h in self.n_hidden:
            hl = HiddenLayer(rng=rng, input=next_input,
                             n_in=next_n_in, n_out=n_h,
                             activation=self.activation)
            next_input = hl.output
            next_n_in = n_h
            self.hiddenLayers.append(hl)
            self.params += hl.params
            
        self.logRegressionLayer = LogisticRegression(
            input=self.hiddenLayers[-1].output,
            n_in=self.n_hidden[-1],
            n_out=n_out)
        
        self.params += self.logRegressionLayer.params
        
        self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood

        self.errors = self.logRegressionLayer.errors
        self.p_y_given_x = self.logRegressionLayer.p_y_given_x
        self.y_pred = self.logRegressionLayer.y_pred

        if fromFile:
            for hl, shl in zip(self.hiddenLayers, savedhiddenLayers):
                hl.W.set_value(shl.W.get_value())
                hl.b.set_value(shl.b.get_value())

            self.logRegressionLayer.W.set_value(saved_logRegressionLayer.W.get_value())
            self.logRegressionLayer.b.set_value(saved_logRegressionLayer.b.get_value())

        self.cost = self.negative_log_likelihood 
Ejemplo n.º 5
0
    def __init__(self, rng, h_shape, image_shape, N, name='Default_readlayer'):
        print('Building layer: ' + name)

        self.lin_transform = HiddenLayer(
            rng,
            n_in=h_shape[0] * h_shape[1],
            n_out=4,
            activation=None,
            irange=0.001,
            name='readlayer: linear transformation')

        self.reader = Reader(
            rng,
            image_shape=image_shape,
            N=N,
            name='readlayer: reader')

        self.params = self.lin_transform.params
Ejemplo n.º 6
0
 def __init__(self, input, input_dims, target):
     self.input = input
     self.target = target
     num_in = input_dims[0] * input_dims[1]
     layer1 = HiddenLayer(rng,
                          input=input.flatten(),
                          n_in=num_in,
                          n_out=10,
                          activation=T.tanh,
                          name='FC1')
     layer2 = HiddenLayer(rng,
                          input=layer1.output,
                          n_in=10,
                          n_out=10,
                          activation=T.tanh,
                          name='output')
     self.output = T.nnet.softmax(layer2.output)
     self.params = layer1.params + layer2.params
Ejemplo n.º 7
0
    def __init__(self, input_dims, learning_rate, batch_size):
        self.input = T.tensor3(name='input', dtype=theano.config.floatX)
        self.target = T.matrix(name="target", dtype=theano.config.floatX)
        self.h_tm1 = T.matrix(name="hidden_output", dtype=theano.config.floatX)
        self.c_tm1 = T.matrix(name="hidden_state", dtype=theano.config.floatX)
        self.learning_rate = learning_rate

        N = 12

        self.lstm_layer_sizes = [128, 128]
        self.read_layer = ReadLayer(rng,
                                    h_shape=(reduce(lambda x, y: x + y,
                                                    self.lstm_layer_sizes), 1),
                                    image_shape=input_dims,
                                    N=N,
                                    name='Read Layer')
        self.conv_layer = ConvPoolLayer(
            rng,
            filter_shape=(30, 1, 3, 3),
            input_shape=(1, N, N),
        )

        self.lstm_layer1 = LSTMLayer(rng,
                                     n_in=N * N,
                                     n_out=self.lstm_layer_sizes[0],
                                     name='LSTM1')
        self.lstm_layer2 = LSTMLayer(rng,
                                     n_in=self.lstm_layer_sizes[0],
                                     n_out=self.lstm_layer_sizes[1],
                                     name='LSTM2')

        self.output_layer = HiddenLayer(rng,
                                        n_in=self.lstm_layer_sizes[0] +
                                        self.lstm_layer_sizes[1] + 5 * 5 * 30,
                                        n_out=10,
                                        activation=None,
                                        name='output')

        self.params = self.read_layer.params + self.lstm_layer1.params +\
            self.lstm_layer2.params + self.output_layer.params
Ejemplo n.º 8
0
    def __init__(self, input_dims, learning_rate, batch_size):
        self.input = T.tensor3(name='input', dtype=theano.config.floatX)
        self.target = T.matrix(name="target", dtype=theano.config.floatX)
        self.h_tm1 = T.matrix(name="hidden_output", dtype=theano.config.floatX)
        self.c_tm1 = T.matrix(name="hidden_state", dtype=theano.config.floatX)
        self.learning_rate = learning_rate

        N = 12

        self.lstm_layer_sizes = [128, 128]
        self.read_layer = ReadLayer(
            rng,
            h_shape=(reduce(lambda x, y: x + y, self.lstm_layer_sizes), 1),
            image_shape=input_dims,
            N=N,
            name='Read Layer'
        )
        self.conv_layer = ConvPoolLayer(
            rng,
            filter_shape=(30, 1, 3, 3),
            input_shape=(1, N, N),
        )

        self.lstm_layer1 = LSTMLayer(
            rng,
            n_in=N*N,
            n_out=self.lstm_layer_sizes[0],
            name='LSTM1'
        )
        self.lstm_layer2 = LSTMLayer(
            rng,
            n_in=self.lstm_layer_sizes[0],
            n_out=self.lstm_layer_sizes[1],
            name='LSTM2'
        )

        self.output_layer = HiddenLayer(
            rng,
            n_in=self.lstm_layer_sizes[0] + self.lstm_layer_sizes[1] + 5*5*30,
            n_out=10,
            activation=None,
            name='output'
        )

        self.params = self.read_layer.params + self.lstm_layer1.params +\
            self.lstm_layer2.params + self.output_layer.params
Ejemplo n.º 9
0
    def __init__(self,
                 rng,
                 input1,
                 input2,
                 n_in,
                 n_hidden1,
                 n_hidden2,
                 n_out,
                 model=None,
                 gamma=0.99):
        self.rng = rng
        self.n_in = n_in
        self.n_hidden1 = n_hidden1
        self.n_hidden2 = n_hidden2
        self.n_out = n_out
        if model is None:
            model = self.init_model()

        self.hiddenLayer1 = HiddenLayer(
            input1=input1,
            n_in=n_in,
            n_out=n_hidden1,
            activation=T.nnet.relu,
            #            activation=T.tanh,
            W_values=model[0],
            b_values=model[2])

        self.hiddenLayer2 = HiddenLayer(
            input1=self.hiddenLayer1.output1,
            n_in=n_hidden1,
            n_out=n_hidden2,
            activation=T.nnet.relu,
            #            activation=T.tanh,
            W_values=model[1],
            b_values=model[3])
        self.logRegressionLayer = LogisticRegression(
            input1=self.hiddenLayer2.output1,
            n_in=n_hidden2,
            n_out=n_out,
            W_values=model[4],
            b_values=model[5])

        self.hiddenLayer1_ddqn = HiddenLayer(
            input1=input2,
            n_in=n_in,
            n_out=n_hidden1,
            activation=T.nnet.relu,
            #            activation=T.tanh,
            W_values=model[0],
            b_values=model[2])

        self.hiddenLayer2_ddqn = HiddenLayer(
            input1=self.hiddenLayer1_ddqn.output1,
            n_in=n_hidden1,
            n_out=n_hidden2,
            activation=T.nnet.relu,
            #            activation=T.tanh,
            W_values=model[1],
            b_values=model[3])
        self.logRegressionLayer_ddqn = LogisticRegression(
            input1=self.hiddenLayer2_ddqn.output1,
            n_in=n_hidden2,
            n_out=n_out,
            W_values=model[4],
            b_values=model[5])

        self.hiddenLayer1_t = HiddenLayer(input1=input2,
                                          n_in=n_in,
                                          n_out=n_hidden1,
                                          activation=T.nnet.relu,
                                          W_values=model[0],
                                          b_values=model[2])

        self.hiddenLayer2_t = HiddenLayer(input1=self.hiddenLayer1_t.output1,
                                          n_in=n_hidden1,
                                          n_out=n_hidden2,
                                          activation=T.nnet.relu,
                                          W_values=model[1],
                                          b_values=model[3])
        self.logRegressionLayer_t = LogisticRegression(
            input1=self.hiddenLayer2_t.output1,
            n_in=n_hidden2,
            n_out=n_out,
            W_values=model[4],
            b_values=model[5])

        self.L1 = (abs(self.hiddenLayer1.W).sum() +
                   abs(self.hiddenLayer2.W).sum() +
                   abs(self.logRegressionLayer.W).sum())

        # square of L2 norm ; one regularization option is to enforce
        # square of L2 norm to be small
        self.L2_sqr = ((self.hiddenLayer1.W**2).sum() +
                       (self.hiddenLayer2.W**2).sum() +
                       (self.logRegressionLayer.W**2).sum())

        self.params = self.hiddenLayer1.params + self.hiddenLayer2.params + self.logRegressionLayer.params
        #        self.params = self.hiddenLayer1.params+ self.logRegressionLayer.params
        # end-snippet-3
        # keep track of model input
        self.Qs = self.logRegressionLayer.Q
        self.Qddqn = self.logRegressionLayer_ddqn.Q
        self.Qsp = self.logRegressionLayer_t.Q
        self.input1 = input1
        self.input2 = input2
        #####################
        #        self.aidx = T.cast(T.round((input1[:,64]*4.0+4.0)*7.0+(input1[:,65]*3.5+3.5)), 'int32')
        #        self.aidx = T.cast(T.argmax(input1[:,64:73],axis=1)*7+T.argmax(input1[:,73:80],axis=1), 'int32')
        self.aidx = T.cast(input1[:, 5] + 1, 'int32')
        ##################
        #        self.cost = T.mean(T.max(self.logRegressionLayer.Qsp,axis=1))
        #        self.cost = T.mean(T.max(self.logRegressionLayer.Qsp,axis=1)-T.max(self.logRegressionLayer.Qs,axis=1))
        #        self.cost = self.Qs[0,0]-self.Qsp[0,0]

        self.target = input1[:, 0] + gamma * T.max(self.Qsp, axis=1)
        self.action_ddqn = T.argmax(self.Qddqn, axis=1)
        self.target_ddqn = input1[:, 0] + gamma * self.Qsp[
            T.arange(self.action_ddqn.shape[0]), self.action_ddqn]
        self.Qcost = T.mean(
            0.5 * (self.target_ddqn -
                   self.Qs[T.arange(self.aidx.shape[0]), self.aidx])**2)
        self.Qcost_v = 0.5 * (
            self.target_ddqn -
            self.Qs[T.arange(self.aidx.shape[0]), self.aidx])**2
        #        self.Qcost = T.mean(0.5*(self.target-self.Qs[T.arange(self.aidx.shape[0]),self.aidx])**2)
        self.cost = self.Qcost  #+0.0001*self.L2_sqr
        self.cost_v = self.Qcost_v
        #        self.errors = T.sqrt(T.mean(((input1[:,0]+0.97*T.max(self.logRegressionLayer.Qsp,axis=1)-T.max(self.logRegressionLayer.Qs,axis=1))/(input1[:,0]+0.95*T.max(self.logRegressionLayer.Qsp,axis=1)))**2))
        #######parameters
        self.Wh1 = self.hiddenLayer1.W
        self.Wh2 = self.hiddenLayer2.W
        self.bh1 = self.hiddenLayer1.b
        self.bh2 = self.hiddenLayer2.b
        self.OW = self.logRegressionLayer.W
        self.Ob = self.logRegressionLayer.b
        self.Wh1t = self.hiddenLayer1_t.W
        self.Wh2t = self.hiddenLayer2_t.W
        self.bh1t = self.hiddenLayer1_t.b
        self.bh2t = self.hiddenLayer2_t.b
        self.OWt = self.logRegressionLayer_t.W
        self.Obt = self.logRegressionLayer_t.b
        self.Wh1ddqn = self.hiddenLayer1_ddqn.W
        self.Wh2ddqn = self.hiddenLayer2_ddqn.W
        self.bh1ddqn = self.hiddenLayer1_ddqn.b
        self.bh2ddqn = self.hiddenLayer2_ddqn.b
        self.OWddqn = self.logRegressionLayer_ddqn.W
        self.Obddqn = self.logRegressionLayer_ddqn.b
Ejemplo n.º 10
0
                print(layer.feedforward(input))

        #print(epoch) # to avoid unused variable error; delete later


# testing
np.random.seed(1)

# data
x = np.random.rand(1, 10)
y = np.random.rand(1, 3)

# network
NN = NeuralNetwork(x, y)
NN.add(Inputlayer(x.shape, (1, 5)))
NN.add(HiddenLayer((1, 5), (1, 9), tanh))
NN.add(HiddenLayer((1, 9), (1, 3), tanh))
NN.add(OutputLayer((1, 3), y.shape))

# feedforward
output1 = NN.layers[0].feedforward(x)
output2 = NN.layers[1].feedforward(output1)
output3 = NN.layers[2].feedforward(output2)
output4 = NN.layers[3].feedforward(output3)

#print(NN.fit(x,y,1,1))
"""
print("outout1",output1)
print("outout2",output2)
print("outout3",output3)
print("outout4",output4)
Ejemplo n.º 11
0
    def __init__(self,
                 rng,
                 input1,
                 input2,
                 n_in,
                 n_hidden1,
                 n_hidden2,
                 n_out,
                 model=None,
                 gamma=0.99):
        self.rng = rng
        self.n_in = n_in
        self.n_hidden1 = n_hidden1
        self.n_hidden2 = n_hidden2
        self.n_out = n_out
        if model is None:
            model = self.init_model()

        self.VhiddenLayer1 = HiddenLayer(
            input1=input1,
            n_in=n_in,
            n_out=n_hidden1,
            activation=T.nnet.relu,
            #            activation=T.tanh,
            W_values=model[0],
            b_values=model[2])
        self.VhiddenLayer2 = HiddenLayer(
            input1=self.VhiddenLayer1.output1,
            n_in=n_hidden1,
            n_out=n_hidden2,
            activation=T.nnet.relu,
            #            activation=T.tanh,
            W_values=model[1],
            b_values=model[3])
        self.VlogRegressionLayer = LogisticRegression(
            input1=self.VhiddenLayer2.output1,
            n_in=n_hidden2,
            n_out=1,
            W_values=model[4],
            b_values=model[5])
        self.AhiddenLayer1 = HiddenLayer(
            input1=input1,
            n_in=n_in,
            n_out=n_hidden1,
            activation=T.nnet.relu,
            #            activation=T.tanh,
            W_values=model[6],
            b_values=model[8])
        self.AhiddenLayer2 = HiddenLayer(
            input1=self.AhiddenLayer1.output1,
            n_in=n_hidden1,
            n_out=n_hidden2,
            activation=T.nnet.relu,
            #            activation=T.tanh,
            W_values=model[7],
            b_values=model[9])
        self.AlogRegressionLayer = LogisticRegression(
            input1=self.AhiddenLayer2.output1,
            n_in=n_hidden2,
            n_out=n_out,
            W_values=model[10],
            b_values=model[11])
        #######ddqn##########
        self.VhiddenLayer1_ddqn = HiddenLayer(
            input1=input2,
            n_in=n_in,
            n_out=n_hidden1,
            activation=T.nnet.relu,
            #            activation=T.tanh,
            W_values=model[0],
            b_values=model[2])
        self.VhiddenLayer2_ddqn = HiddenLayer(
            input1=self.VhiddenLayer1_ddqn.output1,
            n_in=n_hidden1,
            n_out=n_hidden2,
            activation=T.nnet.relu,
            #            activation=T.tanh,
            W_values=model[1],
            b_values=model[3])
        self.VlogRegressionLayer_ddqn = LogisticRegression(
            input1=self.VhiddenLayer2_ddqn.output1,
            n_in=n_hidden2,
            n_out=1,
            W_values=model[4],
            b_values=model[5])
        self.AhiddenLayer1_ddqn = HiddenLayer(
            input1=input2,
            n_in=n_in,
            n_out=n_hidden1,
            activation=T.nnet.relu,
            #            activation=T.tanh,
            W_values=model[6],
            b_values=model[8])
        self.AhiddenLayer2_ddqn = HiddenLayer(
            input1=self.AhiddenLayer1_ddqn.output1,
            n_in=n_hidden1,
            n_out=n_hidden2,
            activation=T.nnet.relu,
            #            activation=T.tanh,
            W_values=model[7],
            b_values=model[9])
        self.AlogRegressionLayer_ddqn = LogisticRegression(
            input1=self.AhiddenLayer2_ddqn.output1,
            n_in=n_hidden2,
            n_out=n_out,
            W_values=model[10],
            b_values=model[11])

        ######target##########
        self.VhiddenLayer1_t = HiddenLayer(
            input1=input2,
            n_in=n_in,
            n_out=n_hidden1,
            activation=T.nnet.relu,
            #            activation=T.tanh,
            W_values=model[0],
            b_values=model[2])
        self.VhiddenLayer2_t = HiddenLayer(
            input1=self.VhiddenLayer1_t.output1,
            n_in=n_hidden1,
            n_out=n_hidden2,
            activation=T.nnet.relu,
            #            activation=T.tanh,
            W_values=model[1],
            b_values=model[3])
        self.VlogRegressionLayer_t = LogisticRegression(
            input1=self.VhiddenLayer2_t.output1,
            n_in=n_hidden2,
            n_out=1,
            W_values=model[4],
            b_values=model[5])
        self.AhiddenLayer1_t = HiddenLayer(
            input1=input2,
            n_in=n_in,
            n_out=n_hidden1,
            activation=T.nnet.relu,
            #            activation=T.tanh,
            W_values=model[6],
            b_values=model[8])
        self.AhiddenLayer2_t = HiddenLayer(
            input1=self.AhiddenLayer1_t.output1,
            n_in=n_hidden1,
            n_out=n_hidden2,
            activation=T.nnet.relu,
            #            activation=T.tanh,
            W_values=model[7],
            b_values=model[9])
        self.AlogRegressionLayer_t = LogisticRegression(
            input1=self.AhiddenLayer2_t.output1,
            n_in=n_hidden2,
            n_out=n_out,
            W_values=model[10],
            b_values=model[11])

        self.params = self.VhiddenLayer1.params + self.VhiddenLayer2.params + self.VlogRegressionLayer.params + self.AhiddenLayer1.params + self.AhiddenLayer2.params + self.AlogRegressionLayer.params
        # keep track of model input
        self.Qs = T.extra_ops.repeat(
            self.VlogRegressionLayer.Q, n_out, axis=1) + (
                self.AlogRegressionLayer.Q -
                T.mean(self.AlogRegressionLayer.Q, axis=1, keepdims=True))
        self.Qddqn = T.extra_ops.repeat(
            self.VlogRegressionLayer_ddqn.Q, n_out, axis=1) + (
                self.AlogRegressionLayer_ddqn.Q -
                T.mean(self.AlogRegressionLayer_ddqn.Q, axis=1, keepdims=True))
        self.Qsp = T.extra_ops.repeat(
            self.VlogRegressionLayer_t.Q, n_out, axis=1) + (
                self.AlogRegressionLayer_t.Q -
                T.mean(self.AlogRegressionLayer_t.Q, axis=1, keepdims=True))

        # self.Qs =  (self.AlogRegressionLayer.Q - T.mean(self.AlogRegressionLayer.Q,axis=1,keepdims=True)) + self.VlogRegressionLayer.Q
        # self.Qddqn =  (self.AlogRegressionLayer_ddqn.Q - T.mean(self.AlogRegressionLayer_ddqn.Q,axis=1,keepdims=True)) + self.VlogRegressionLayer_ddqn.Q
        # self.Qsp = (self.AlogRegressionLayer_t.Q - T.mean(self.AlogRegressionLayer_t.Q,axis=1,keepdims=True)) + self.VlogRegressionLayer_t.Q

        # self.Qs = T.extra_ops.repeat(self.VlogRegressionLayer.Q,n_out,axis=1) + (self.AlogRegressionLayer.Q - T.extra_ops.repeat(T.mean(self.AlogRegressionLayer.Q,axis=1,keepdims=True),n_out,axis=1))
        # self.Qddqn = T.extra_ops.repeat(self.VlogRegressionLayer_ddqn.Q,n_out,axis=1) + (self.AlogRegressionLayer_ddqn.Q - T.extra_ops.repeat(T.mean(self.AlogRegressionLayer_ddqn.Q,axis=1,keepdims=True),n_out,axis=1))
        # self.Qsp = T.extra_ops.repeat(self.VlogRegressionLayer_t.Q,n_out,axis=1) + (self.AlogRegressionLayer_t.Q - T.extra_ops.repeat(T.mean(self.AlogRegressionLayer_t.Q,axis=1,keepdims=True),n_out,axis=1))
        self.input1 = input1
        self.input2 = input2
        self.aidx = T.cast(
            T.round((input1[:, 64] * 4.0 + 4.0) * 7.0 +
                    (input1[:, 65] * 3.5 + 3.5)), 'int32')
        #        self.cost = T.mean(T.max(self.logRegressionLayer.Qsp,axis=1))
        #        self.cost = T.mean(T.max(self.logRegressionLayer.Qsp,axis=1)-T.max(self.logRegressionLayer.Qs,axis=1))
        #        self.cost = self.Qs[0,0]-self.Qsp[0,0]

        self.target = input1[:, 0] + gamma * T.max(self.Qsp, axis=1)
        self.action_ddqn = T.argmax(self.Qddqn, axis=1)
        self.target_ddqn = input1[:, 0] + gamma * self.Qsp[
            T.arange(self.action_ddqn.shape[0]), self.action_ddqn]
        self.Qcost = T.mean(
            0.5 * (self.target_ddqn -
                   self.Qs[T.arange(self.aidx.shape[0]), self.aidx])**2)
        #        self.Qcost = T.mean(0.5*(self.target-self.Qs[T.arange(self.aidx.shape[0]),self.aidx])**2)
        self.cost = self.Qcost  #+0.0001*self.L2_sqr
        #        self.errors = T.sqrt(T.mean(((input1[:,0]+0.97*T.max(self.logRegressionLayer.Qsp,axis=1)-T.max(self.logRegressionLayer.Qs,axis=1))/(input1[:,0]+0.95*T.max(self.logRegressionLayer.Qsp,axis=1)))**2))
        #######parameters
        self.VWh1 = self.VhiddenLayer1.W
        self.VWh2 = self.VhiddenLayer2.W
        self.Vbh1 = self.VhiddenLayer1.b
        self.Vbh2 = self.VhiddenLayer2.b
        self.VOW = self.VlogRegressionLayer.W
        self.VOb = self.VlogRegressionLayer.b
        self.AWh1 = self.AhiddenLayer1.W
        self.AWh2 = self.AhiddenLayer2.W
        self.Abh1 = self.AhiddenLayer1.b
        self.Abh2 = self.AhiddenLayer2.b
        self.AOW = self.AlogRegressionLayer.W
        self.AOb = self.AlogRegressionLayer.b
        self.VWh1t = self.VhiddenLayer1_t.W
        self.VWh2t = self.VhiddenLayer2_t.W
        self.Vbh1t = self.VhiddenLayer1_t.b
        self.Vbh2t = self.VhiddenLayer2_t.b
        self.VOWt = self.VlogRegressionLayer_t.W
        self.VObt = self.VlogRegressionLayer_t.b
        self.AWh1t = self.AhiddenLayer1_t.W
        self.AWh2t = self.AhiddenLayer2_t.W
        self.Abh1t = self.AhiddenLayer1_t.b
        self.Abh2t = self.AhiddenLayer2_t.b
        self.AOWt = self.AlogRegressionLayer_t.W
        self.AObt = self.AlogRegressionLayer_t.b
        self.VWh1ddqn = self.VhiddenLayer1_ddqn.W
        self.VWh2ddqn = self.VhiddenLayer2_ddqn.W
        self.Vbh1ddqn = self.VhiddenLayer1_ddqn.b
        self.Vbh2ddqn = self.VhiddenLayer2_ddqn.b
        self.VOWddqn = self.VlogRegressionLayer_ddqn.W
        self.VObddqn = self.VlogRegressionLayer_ddqn.b
        self.AWh1ddqn = self.AhiddenLayer1_ddqn.W
        self.AWh2ddqn = self.AhiddenLayer2_ddqn.W
        self.Abh1ddqn = self.AhiddenLayer1_ddqn.b
        self.Abh2ddqn = self.AhiddenLayer2_ddqn.b
        self.AOWddqn = self.AlogRegressionLayer_ddqn.W
        self.AObddqn = self.AlogRegressionLayer_ddqn.b
Ejemplo n.º 12
0
class TestLSTM(AbstractModel):

    def __init__(self, input_dims, learning_rate, batch_size):
        self.input = T.tensor3(name='input', dtype=theano.config.floatX)
        self.target = T.matrix(name="target", dtype=theano.config.floatX)
        self.h_tm1 = T.matrix(name="hidden_output", dtype=theano.config.floatX)
        self.c_tm1 = T.matrix(name="hidden_state", dtype=theano.config.floatX)
        self.learning_rate = learning_rate

        N = 12

        self.lstm_layer_sizes = [128, 128]
        self.read_layer = ReadLayer(
            rng,
            h_shape=(reduce(lambda x, y: x + y, self.lstm_layer_sizes), 1),
            image_shape=input_dims,
            N=N,
            name='Read Layer'
        )
        self.conv_layer = ConvPoolLayer(
            rng,
            filter_shape=(30, 1, 3, 3),
            input_shape=(1, N, N),
        )

        self.lstm_layer1 = LSTMLayer(
            rng,
            n_in=N*N,
            n_out=self.lstm_layer_sizes[0],
            name='LSTM1'
        )
        self.lstm_layer2 = LSTMLayer(
            rng,
            n_in=self.lstm_layer_sizes[0],
            n_out=self.lstm_layer_sizes[1],
            name='LSTM2'
        )

        self.output_layer = HiddenLayer(
            rng,
            n_in=self.lstm_layer_sizes[0] + self.lstm_layer_sizes[1] + 5*5*30,
            n_out=10,
            activation=None,
            name='output'
        )

        self.params = self.read_layer.params + self.lstm_layer1.params +\
            self.lstm_layer2.params + self.output_layer.params

    def get_predict_output(self, input, h_tm1, c_tm1):

        h, c, output, g_y, g_x, read, delta, sigma_sq = self.recurrent_step(input,
            h_tm1, c_tm1)
        return output, h, c, read, g_x, g_y, delta, sigma_sq

    def get_train_output(self, images, batch_size):

        images = images.dimshuffle([1, 0, 2, 3])
        h0, c0 = self.get_initial_state(batch_size)
        [h, c, output, g_y, g_x, _, _, _], _ = theano.scan(fn=self.recurrent_step,
                                                  outputs_info=[
                                                      h0, c0, None, None, None, None, None, None],
                                                  sequences=images,
                                                  )
        return output, g_y, g_x

    def recurrent_step(self, image, h_tm1, c_tm1):
        read, g_x, g_y, delta, sigma_sq = self.read_layer.one_step(h_tm1, image)
        
        read_ = read.flatten(ndim=2)

        h_1, c_1 =\
            self.lstm_layer1.one_step(read_,
                                      h_tm1[:, 0:self.lstm_layer_sizes[0]],
                                      c_tm1[:, 0:self.lstm_layer_sizes[0]])
        h_2, c_2 =\
            self.lstm_layer2.one_step(h_1,
                                      h_tm1[:, self.lstm_layer_sizes[0]:],
                                      c_tm1[:, self.lstm_layer_sizes[0]:]
                                      )
        h = T.concatenate([h_1, h_2], axis=1)
        c = T.concatenate([c_1, c_2], axis=1)
        conv = self.conv_layer.one_step(read.dimshuffle([0, 'x', 1, 2]))
        conv = conv.flatten(ndim=2)
        lin_output = self.output_layer.one_step(T.concatenate([h_1, h_2, conv], axis=1))
        output = T.nnet.softmax(lin_output)
        return [h, c, output, g_y, g_x, read, delta, sigma_sq]

    def step_with_att(self, h_tm1, c_tm1, image):
        read, g_x, g_y, delta, sigma_sq = self.read_layer.one_step(
            h_tm1, image)
        read_ = read_.flatten(ndim=2)
        h_1, c_1 =\
            self.lstm_layer1.one_step(read_,
                                      h_tm1[:, 0:self.lstm_layer_sizes[0]],
                                      c_tm1[:, 0:self.lstm_layer_sizes[0]])
        h_2, c_2 =\
            self.lstm_layer2.one_step(h_1,
                                      h_tm1[:, self.lstm_layer_sizes[0]:],
                                      c_tm1[:, self.lstm_layer_sizes[0]:]
                                      )

        return [h, c, read, g_x, g_y, delta, sigma_sq]

    def compile(self, train_batch_size):
        print("Compiling functions...")
        train_input = T.tensor4()
        target_y = T.matrix()
        target_x = T.matrix()
        train_output, g_y, g_x = self.get_train_output(train_input,
                                                       train_batch_size)
        classification_loss = self.get_NLL_cost(train_output[-1], self.target)
        tracking_loss = self.get_tracking_cost(g_y, g_x, target_y, target_x)
        loss = 5 * classification_loss + tracking_loss
        updates = Adam(loss, self.params, lr=self.learning_rate)
        # updates = self.get_updates(loss, self.params, self.learning_rate)
        self.train_func = theano.function(
            inputs=[train_input, self.target, target_y, target_x],
            outputs=[train_output[-1], loss],
            updates=updates,
            allow_input_downcast=True
        )

        h_tm1 = T.matrix()
        c_tm1 = T.matrix()
        predict_output, h, c, read, g_x, g_y, delta, sigma_sq = \
            self.get_predict_output(self.input, h_tm1, c_tm1)

        self.predict_func = theano.function(inputs=[self.input, h_tm1, c_tm1],
                                            outputs=[predict_output,
                                                     h,
                                                     c,
                                                     read,
                                                     g_x,
                                                     g_y,
                                                     delta,
                                                     sigma_sq],
                                            allow_input_downcast=True)
        print("Done!")

    def train(self, x, y, target_y, target_x):
        '''
        x is in the form of [batch, time, height, width]
        y is [batch, target]
        '''
        prediction, loss = self.train_func(x, y, target_y, target_x)
        return prediction, loss

    def get_initial_state(self, batch_size, shared=True):
        total_states = reduce(lambda x, y: x + y, self.lstm_layer_sizes)
        h0 = np.zeros((batch_size, total_states), dtype=theano.config.floatX)
        c0 = np.zeros((batch_size, total_states), dtype=theano.config.floatX)
        if shared:
            h0 = theano.shared(
                h0,
                name='h0',
                borrow=True)
            c0 = theano.shared(
                c0,
                name='c0',
                borrow=True)
        return h0, c0
        # initial_state = self.lstm_layer1.initial_hidden_state
        # initial_state = initial_state.dimshuffle(
        #     ['x', 0]).repeat(batch_size, axis=0)
        # return initial_state

    def predict(self, x, reset=True, batch_size=1):
        if reset:
            self.predict_h, self.predict_c = self.get_initial_state(
                batch_size, shared=False)

        if len(x.shape) == 2:
            x = np.expand_dims(x, axis=0)

        prediction, self.predict_h, self.predict_c, read, g_x, g_y, delta, sigma_sq =\
            self.predict_func(x, self.predict_h, self.predict_c)

        return prediction, [read, g_x, g_y, delta, sigma_sq]

    def get_NLL_cost(self, output, target):
        NLL = -T.sum((T.log(output) * target), axis=1)
        return NLL.mean()

    def get_tracking_cost(self, g_y, g_x, target_y, target_x):
        loss = (
            (target_y - g_y) ** 2) + ((target_x - g_x) ** 2)
        loss = T.sqrt(loss + 1e-4)
        return loss.mean()

    def get_updates(self, cost, params, learning_rate):
        gradients = T.grad(cost, params)
        updates = updates = [
            (param_i, param_i - learning_rate * grad_i)
            for param_i, grad_i in zip(params, gradients)
        ]
        return updates

    def deserialize(self, hidden):
        result = []
        start = 0
        for size in self.lstm_layer_sizes:
            result.append(hidden[start:size].reshape((size, 1)))
            start = start + size
        return result
Ejemplo n.º 13
0
class TestLSTM(AbstractModel):
    def __init__(self, input_dims, learning_rate, batch_size):
        self.input = T.tensor3(name='input', dtype=theano.config.floatX)
        self.target = T.matrix(name="target", dtype=theano.config.floatX)
        self.h_tm1 = T.matrix(name="hidden_output", dtype=theano.config.floatX)
        self.c_tm1 = T.matrix(name="hidden_state", dtype=theano.config.floatX)
        self.learning_rate = learning_rate

        N = 12

        self.lstm_layer_sizes = [128, 128]
        self.read_layer = ReadLayer(rng,
                                    h_shape=(reduce(lambda x, y: x + y,
                                                    self.lstm_layer_sizes), 1),
                                    image_shape=input_dims,
                                    N=N,
                                    name='Read Layer')
        self.conv_layer = ConvPoolLayer(
            rng,
            filter_shape=(30, 1, 3, 3),
            input_shape=(1, N, N),
        )

        self.lstm_layer1 = LSTMLayer(rng,
                                     n_in=N * N,
                                     n_out=self.lstm_layer_sizes[0],
                                     name='LSTM1')
        self.lstm_layer2 = LSTMLayer(rng,
                                     n_in=self.lstm_layer_sizes[0],
                                     n_out=self.lstm_layer_sizes[1],
                                     name='LSTM2')

        self.output_layer = HiddenLayer(rng,
                                        n_in=self.lstm_layer_sizes[0] +
                                        self.lstm_layer_sizes[1] + 5 * 5 * 30,
                                        n_out=10,
                                        activation=None,
                                        name='output')

        self.params = self.read_layer.params + self.lstm_layer1.params +\
            self.lstm_layer2.params + self.output_layer.params

    def get_predict_output(self, input, h_tm1, c_tm1):

        h, c, output, g_y, g_x, read, delta, sigma_sq = self.recurrent_step(
            input, h_tm1, c_tm1)
        return output, h, c, read, g_x, g_y, delta, sigma_sq

    def get_train_output(self, images, batch_size):

        images = images.dimshuffle([1, 0, 2, 3])
        h0, c0 = self.get_initial_state(batch_size)
        [h, c, output, g_y, g_x, _, _, _], _ = theano.scan(
            fn=self.recurrent_step,
            outputs_info=[h0, c0, None, None, None, None, None, None],
            sequences=images,
        )
        return output, g_y, g_x

    def recurrent_step(self, image, h_tm1, c_tm1):
        read, g_x, g_y, delta, sigma_sq = self.read_layer.one_step(
            h_tm1, image)

        read_ = read.flatten(ndim=2)

        h_1, c_1 =\
            self.lstm_layer1.one_step(read_,
                                      h_tm1[:, 0:self.lstm_layer_sizes[0]],
                                      c_tm1[:, 0:self.lstm_layer_sizes[0]])
        h_2, c_2 =\
            self.lstm_layer2.one_step(h_1,
                                      h_tm1[:, self.lstm_layer_sizes[0]:],
                                      c_tm1[:, self.lstm_layer_sizes[0]:]
                                      )
        h = T.concatenate([h_1, h_2], axis=1)
        c = T.concatenate([c_1, c_2], axis=1)
        conv = self.conv_layer.one_step(read.dimshuffle([0, 'x', 1, 2]))
        conv = conv.flatten(ndim=2)
        lin_output = self.output_layer.one_step(
            T.concatenate([h_1, h_2, conv], axis=1))
        output = T.nnet.softmax(lin_output)
        return [h, c, output, g_y, g_x, read, delta, sigma_sq]

    def step_with_att(self, h_tm1, c_tm1, image):
        read, g_x, g_y, delta, sigma_sq = self.read_layer.one_step(
            h_tm1, image)
        read_ = read_.flatten(ndim=2)
        h_1, c_1 =\
            self.lstm_layer1.one_step(read_,
                                      h_tm1[:, 0:self.lstm_layer_sizes[0]],
                                      c_tm1[:, 0:self.lstm_layer_sizes[0]])
        h_2, c_2 =\
            self.lstm_layer2.one_step(h_1,
                                      h_tm1[:, self.lstm_layer_sizes[0]:],
                                      c_tm1[:, self.lstm_layer_sizes[0]:]
                                      )

        return [h, c, read, g_x, g_y, delta, sigma_sq]

    def compile(self, train_batch_size):
        print("Compiling functions...")
        train_input = T.tensor4()
        target_y = T.matrix()
        target_x = T.matrix()
        train_output, g_y, g_x = self.get_train_output(train_input,
                                                       train_batch_size)
        classification_loss = self.get_NLL_cost(train_output[-1], self.target)
        tracking_loss = self.get_tracking_cost(g_y, g_x, target_y, target_x)
        loss = 5 * classification_loss + tracking_loss
        updates = Adam(loss, self.params, lr=self.learning_rate)
        # updates = self.get_updates(loss, self.params, self.learning_rate)
        self.train_func = theano.function(
            inputs=[train_input, self.target, target_y, target_x],
            outputs=[train_output[-1], loss],
            updates=updates,
            allow_input_downcast=True)

        h_tm1 = T.matrix()
        c_tm1 = T.matrix()
        predict_output, h, c, read, g_x, g_y, delta, sigma_sq = \
            self.get_predict_output(self.input, h_tm1, c_tm1)

        self.predict_func = theano.function(
            inputs=[self.input, h_tm1, c_tm1],
            outputs=[predict_output, h, c, read, g_x, g_y, delta, sigma_sq],
            allow_input_downcast=True)
        print("Done!")

    def train(self, x, y, target_y, target_x):
        '''
        x is in the form of [batch, time, height, width]
        y is [batch, target]
        '''
        prediction, loss = self.train_func(x, y, target_y, target_x)
        return prediction, loss

    def get_initial_state(self, batch_size, shared=True):
        total_states = reduce(lambda x, y: x + y, self.lstm_layer_sizes)
        h0 = np.zeros((batch_size, total_states), dtype=theano.config.floatX)
        c0 = np.zeros((batch_size, total_states), dtype=theano.config.floatX)
        if shared:
            h0 = theano.shared(h0, name='h0', borrow=True)
            c0 = theano.shared(c0, name='c0', borrow=True)
        return h0, c0
        # initial_state = self.lstm_layer1.initial_hidden_state
        # initial_state = initial_state.dimshuffle(
        #     ['x', 0]).repeat(batch_size, axis=0)
        # return initial_state

    def predict(self, x, reset=True, batch_size=1):
        if reset:
            self.predict_h, self.predict_c = self.get_initial_state(
                batch_size, shared=False)

        if len(x.shape) == 2:
            x = np.expand_dims(x, axis=0)

        prediction, self.predict_h, self.predict_c, read, g_x, g_y, delta, sigma_sq =\
            self.predict_func(x, self.predict_h, self.predict_c)

        return prediction, [read, g_x, g_y, delta, sigma_sq]

    def get_NLL_cost(self, output, target):
        NLL = -T.sum((T.log(output) * target), axis=1)
        return NLL.mean()

    def get_tracking_cost(self, g_y, g_x, target_y, target_x):
        loss = ((target_y - g_y)**2) + ((target_x - g_x)**2)
        loss = T.sqrt(loss + 1e-4)
        return loss.mean()

    def get_updates(self, cost, params, learning_rate):
        gradients = T.grad(cost, params)
        updates = updates = [(param_i, param_i - learning_rate * grad_i)
                             for param_i, grad_i in zip(params, gradients)]
        return updates

    def deserialize(self, hidden):
        result = []
        start = 0
        for size in self.lstm_layer_sizes:
            result.append(hidden[start:size].reshape((size, 1)))
            start = start + size
        return result
Ejemplo n.º 14
0
class Network:
    numOfHiddens = 8
    # numOfOutputs = 4
    bias = 0.1
    lrate = 0.01

    sumError = 0
    successRateLastTurn = .5
    precision = 0.00001
    continueTraining = True

    numOfSuccess = 1
    numOfFailure = 0

    def __init__(self):
        types, groundTruths, dataVectors = self.getData()
        # create all layers
        self.inputLayer = InputLayer(len(dataVectors[0]))
        self.hiddenLayer = HiddenLayer(len(dataVectors[0]), self.numOfHiddens,
                                       "lrelu")
        self.outputLayer = OutputLayer(self.numOfHiddens, len(types), "lrelu")

        t0 = time.time()
        self.trainNetwork(types, groundTruths, dataVectors)
        t1 = time.time()
        print("\nTime: " + str(t1 - t0))
        print(self.hiddenLayer.weights)
        print("------------------------------------")
        print(self.outputLayer.weights)

    def getData(self):
        rawData = self.readCSV()
        shuffle(rawData)
        types, groundTruths = self.getGTs(rawData)
        dataVectors = np.array([self._assignBias(row) for row in rawData],
                               float)
        return types, groundTruths, dataVectors

    def _assignBias(self, vector):
        vector[-1] = self.bias
        return vector

    def readCSV(self):
        with open("samples_4_classes_normalized.csv", mode="r") as dataFile:
            return list(csv.reader(dataFile))[1:]

    def getGTs(self, rawData):
        types = list({row[-1] for row in rawData})
        groundTruths = np.full((len(rawData), len(types)), 0, int)

        for i, row in enumerate(rawData):
            groundTruths[i][types.index(row[-1])] = 1
        return types, groundTruths

    def trainNetwork(self, types, groundTruths, dataVectors):
        epoCounter = 0
        while self.continueTraining:
            epoCounter += 1
            self.trainEpoch(types, groundTruths, dataVectors, str(epoCounter))

    def trainEpoch(self, types, groundTruths, dataVectors, epoCounter):
        counter = 0
        for vector, groundTruth in zip(dataVectors, groundTruths):
            self.feedSample(vector, groundTruth)

            # control operation
            counter += 1
            if counter % 100 == 0:
                print("Epo: " + epoCounter + " Data: " + str(counter) +
                      "/40000 Prec: " + " TErr: " +
                      str(1 - self.sumError / counter) + " Clf SR: " +
                      str(1 - self.numOfFailure / self.numOfSuccess),
                      end="\r")
                if abs(self.successRateLastTurn -
                       self.sumError / counter) < self.precision:
                    self.continueTraining = False
                    return
                self.successRateLastTurn = self.sumError / counter

    def feedSample(self, dataVector, groundTruth):
        actVectorInput = self.inputLayer.feedSample(dataVector)
        actVectorHidden = self.hiddenLayer.feedSample(actVectorInput)
        actVectorOutput = self.outputLayer.feedSample(actVectorHidden)

        errorVector = self.getErrorVector(actVectorOutput, groundTruth)
        self.sumError += sum(errorVector) / len(errorVector)
        self.predict(actVectorOutput, groundTruth)
        self.backprop(errorVector, actVectorHidden, actVectorInput)

    def getErrorVector(self, actVectorOutput, groundTruth):
        return [
            truth - act for act, truth in zip(actVectorOutput, groundTruth)
        ]

    def backprop(self, errorVector, actVectorHidden, actVectorInput):
        self.hiddenLayer.backprop(self.lrate, actVectorInput, errorVector,
                                  self.outputLayer)
        self.outputLayer.backprop(self.lrate, errorVector, actVectorHidden)

    def predict(self, actVectorOutput, groundTruth):
        if groundTruth[np.argmax(actVectorOutput)] > 0:
            self.numOfSuccess += 1
        else:
            self.numOfFailure += 1