コード例 #1
0
ファイル: test_pfunc.py プロジェクト: gwtaylor/Theano
    def test_param_allow_downcast_int(self):
        a = tensor.wvector('a')  # int16
        b = tensor.bvector('b')  # int8
        c = tensor.bscalar('c')  # int8
        f = pfunc([Param(a, allow_downcast=True),
                   Param(b, allow_downcast=False),
                   Param(c, allow_downcast=None)],
                  (a + b + c))

        # Both values are in range. Since they're not ndarrays (but lists),
        # they will be converted, and their value checked.
        assert numpy.all(f([3], [6], 1) == 10)

        # Values are in range, but a dtype too large has explicitly been given
        # For performance reasons, no check of the data is explicitly performed
        # (It might be OK to change this in the future.)
        self.assertRaises(TypeError, f,
                [3], numpy.array([6], dtype='int16'), 1)

        # Value too big for a, silently ignored
        assert numpy.all(f([2 ** 20], numpy.ones(1, dtype='int8'), 1) == 2)

        # Value too big for b, raises TypeError
        self.assertRaises(TypeError, f, [3], [312], 1)

        # Value too big for c, raises TypeError
        self.assertRaises(TypeError, f, [3], [6], 806)
コード例 #2
0
    def test_param_allow_downcast_int(self):
        a = tensor.wvector("a")  # int16
        b = tensor.bvector("b")  # int8
        c = tensor.bscalar("c")  # int8
        f = pfunc(
            [
                In(a, allow_downcast=True),
                In(b, allow_downcast=False),
                In(c, allow_downcast=None),
            ],
            (a + b + c),
        )

        # Both values are in range. Since they're not ndarrays (but lists),
        # they will be converted, and their value checked.
        assert np.all(f([3], [6], 1) == 10)

        # Values are in range, but a dtype too large has explicitly been given
        # For performance reasons, no check of the data is explicitly performed
        # (It might be OK to change this in the future.)
        with pytest.raises(TypeError):
            f([3], np.array([6], dtype="int16"), 1)

        # Value too big for a, silently ignored
        assert np.all(f([2**20], np.ones(1, dtype="int8"), 1) == 2)

        # Value too big for b, raises TypeError
        with pytest.raises(TypeError):
            f([3], [312], 1)

        # Value too big for c, raises TypeError
        with pytest.raises(TypeError):
            f([3], [6], 806)
コード例 #3
0
    def test_param_allow_downcast_int(self):
        a = tensor.wvector('a')  # int16
        b = tensor.bvector('b')  # int8
        c = tensor.bscalar('c')  # int8
        f = pfunc([
            Param(a, allow_downcast=True),
            Param(b, allow_downcast=False),
            Param(c, allow_downcast=None)
        ], (a + b + c))

        # Both values are in range. Since they're not ndarrays (but lists),
        # they will be converted, and their value checked.
        assert numpy.all(f([3], [6], 1) == 10)

        # Values are in range, but a dtype too large has explicitly been given
        # For performance reasons, no check of the data is explicitly performed
        # (It might be OK to change this in the future.)
        self.assertRaises(TypeError, f, [3], numpy.array([6], dtype='int16'),
                          1)

        # Value too big for a, silently ignored
        assert numpy.all(f([2**20], numpy.ones(1, dtype='int8'), 1) == 2)

        # Value too big for b, raises TypeError
        self.assertRaises(TypeError, f, [3], [312], 1)

        # Value too big for c, raises TypeError
        self.assertRaises(TypeError, f, [3], [6], 806)
コード例 #4
0
    def build_2048_ann(self, nb, nh, nh2):
        '''
        
        '''
        #nb = input nodes
        #nh = first hidden layer size
        #nh2 = second hidden layer size
        print("building")
        w1 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nb, nh)))
        w2 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nh, nh2)))
        w3 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nh2, 4)))
        input = T.dvector('input')
        target = T.wvector('target')
        x1 = T.switch(T.dot(input, w1) > 0, T.dot(input, w1), 0)
        x2 = T.switch(T.dot(x1, w2) > 0, T.dot(x1, w2), 0)
        x3 = Tann.softmax(T.dot(x2, w3))
        error = T.sum(pow((target - x3), 2))
        params = [w1, w2, w3]
        gradients = T.grad(error, params)
        backprops = [(p, p - self.lrate * g)
                     for p, g in zip(params, gradients)]

        self.trainer = theano.function(inputs=[input, target],
                                       outputs=error,
                                       updates=backprops,
                                       allow_input_downcast=True)
        self.predictor = theano.function(inputs=[input],
                                         outputs=x3,
                                         allow_input_downcast=True)
        print("Built")
コード例 #5
0
    def test_allow_input_downcast_int(self):
        a = tensor.wvector("a")  # int16
        b = tensor.bvector("b")  # int8
        c = tensor.bscalar("c")  # int8

        f = pfunc([a, b, c], (a + b + c), allow_input_downcast=True)
        # Value too big for a, b, or c, silently ignored
        assert f([2**20], [1], 0) == 1
        assert f([3], [312], 0) == 59
        assert f([3], [1], 806) == 42

        g = pfunc([a, b, c], (a + b + c), allow_input_downcast=False)
        # All values are in range. Since they're not ndarrays (but lists
        # or scalars), they will be converted, and their value checked.
        assert np.all(g([3], [6], 0) == 9)

        # Values are in range, but a dtype too large has explicitly been given
        # For performance reasons, no check of the data is explicitly performed
        # (It might be OK to change this in the future.)
        with pytest.raises(TypeError):
            g([3], np.array([6], dtype="int16"), 0)

        # Value too big for b, raises TypeError
        with pytest.raises(TypeError):
            g([3], [312], 0)

        h = pfunc([a, b, c], (a + b + c))  # Default: allow_input_downcast=None
        # Everything here should behave like with False
        assert np.all(h([3], [6], 0) == 9)
        with pytest.raises(TypeError):
            h([3], np.array([6], dtype="int16"), 0)
        with pytest.raises(TypeError):
            h([3], [312], 0)
コード例 #6
0
ファイル: ann.py プロジェクト: Bergalerga/AIProg
    def build_custom_ann(self, layer_list, ann_type = "rlu", nb = 784):
        '''

        '''
        layer_list = [nb] + layer_list
        input = T.dvector('input')
        target = T.wvector('target')
        w_list = []
        x_list = []
        w_list.append(theano.shared(np.random.uniform(low=-.1, high=.1, size=(layer_list[0],layer_list[1]))))
        if ann_type == "rlu":
            x_list.append(T.switch(T.dot(input,w_list[0]) > 0, T.dot(input,w_list[0]), 0))
        elif ann_type == "sigmoid":
            x_list.append(Tann.sigmoid(T.dot(input, w_list[0])))
        elif ann_type == "ht":
            x_list.append(T.tanh(T.dot(input, w_list[0])))

        for count in range(0, len(layer_list) - 2):
            w_list.append(theano.shared(np.random.uniform(low=-.1, high=.1, size=(layer_list[count + 1],layer_list[count + 2]))))
            if ann_type=="rlu":
                x_list.append(T.switch(T.dot(x_list[count],w_list[count + 1]) > 0, T.dot(x_list[count], w_list[count + 1]), 0))
            elif ann_type == "sigmoid":
                x_list.append(Tann.sigmoid(T.dot(x_list[count],w_list[count + 1])))
            elif ann_type == "ht":
                x_list.append(T.tanh(T.dot(x_list[count],w_list[count + 1])))
        w_list.append(theano.shared(np.random.uniform(low=-.1, high=.1, size=(layer_list[-1], 10))))
        x_list.append(T.switch(T.dot(x_list[-1],w_list[-1]) > 0, T.dot(x_list[-1],w_list[-1]), 0))

        error = T.sum(pow((target - x_list[-1]), 2))
        params = w_list
        gradients = T.grad(error, params) 
        backprops = [(p, p - self.lrate*g) for p,g in zip(params,gradients)]

        self.trainer = theano.function(inputs=[input, target], outputs=error, updates=backprops, allow_input_downcast=True)
        self.predictor = theano.function(inputs=[input], outputs=x_list[-1], allow_input_downcast=True)
コード例 #7
0
ファイル: main.py プロジェクト: talepre/AIProg
    def build_rectified_linear2_ann(self, nb, nh, nh2):
        '''
        Builds a neural network, using rectified linear units 2 as the activation function.
        '''
        print("Building rectified linear ann")
        w1 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nb, nh)))
        w2 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nh, nh2)))
        w3 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nh2, 10)))
        input = T.dvector('input')
        target = T.wvector('target')
        x1 = T.switch(T.dot(input, w1) > 0, T.dot(input, w1), 0)
        x2 = T.switch(T.dot(x1, w2) > 0, T.dot(x1, w2), 0)
        x3 = T.switch(T.dot(x2, w3) > 0, T.dot(x2, w3), 0)
        error = T.sum(pow((target - x3), 2))
        params = [w1, w2, w3]
        gradients = T.grad(error, params)
        backprops = [(p, p - self.lrate * g)
                     for p, g in zip(params, gradients)]

        self.trainer = theano.function(inputs=[input, target],
                                       outputs=error,
                                       updates=backprops,
                                       allow_input_downcast=True)
        self.predictor = theano.function(inputs=[input],
                                         outputs=x3,
                                         allow_input_downcast=True)
コード例 #8
0
ファイル: test_pfunc.py プロジェクト: gwtaylor/Theano
    def test_allow_input_downcast_int(self):
        a = tensor.wvector('a')  # int16
        b = tensor.bvector('b')  # int8
        c = tensor.bscalar('c')  # int8

        f = pfunc([a, b, c], (a + b + c), allow_input_downcast=True)
        # Value too big for a, b, or c, silently ignored
        assert f([2 ** 20], [1], 0) == 1
        assert f([3], [312], 0) == 59
        assert f([3], [1], 806) == 42

        g = pfunc([a, b, c], (a + b + c), allow_input_downcast=False)
        # All values are in range. Since they're not ndarrays (but lists
        # or scalars), they will be converted, and their value checked.
        assert numpy.all(g([3], [6], 0) == 9)

        # Values are in range, but a dtype too large has explicitly been given
        # For performance reasons, no check of the data is explicitly performed
        # (It might be OK to change this in the future.)
        self.assertRaises(TypeError, g,
                [3], numpy.array([6], dtype='int16'), 0)

        # Value too big for b, raises TypeError
        self.assertRaises(TypeError, g, [3], [312], 0)

        h = pfunc([a, b, c], (a + b + c))  # Default: allow_input_downcast=None
        # Everything here should behave like with False
        assert numpy.all(h([3], [6], 0) == 9)
        self.assertRaises(TypeError, h,
                [3], numpy.array([6], dtype='int16'), 0)
        self.assertRaises(TypeError, h, [3], [312], 0)
コード例 #9
0
ファイル: memory_network.py プロジェクト: JimStearns206/taxi
 def inputs(self):
     return {
         "call_type": tensor.bvector("call_type"),
         "origin_call": tensor.ivector("origin_call"),
         "origin_stand": tensor.bvector("origin_stand"),
         "taxi_id": tensor.wvector("taxi_id"),
         "timestamp": tensor.ivector("timestamp"),
         "day_type": tensor.bvector("day_type"),
         "missing_data": tensor.bvector("missing_data"),
         "latitude": tensor.matrix("latitude"),
         "longitude": tensor.matrix("longitude"),
         "destination_latitude": tensor.vector("destination_latitude"),
         "destination_longitude": tensor.vector("destination_longitude"),
         "travel_time": tensor.ivector("travel_time"),
         "first_k_latitude": tensor.matrix("first_k_latitude"),
         "first_k_longitude": tensor.matrix("first_k_longitude"),
         "last_k_latitude": tensor.matrix("last_k_latitude"),
         "last_k_longitude": tensor.matrix("last_k_longitude"),
         "input_time": tensor.ivector("input_time"),
         "week_of_year": tensor.bvector("week_of_year"),
         "day_of_week": tensor.bvector("day_of_week"),
         "qhour_of_day": tensor.bvector("qhour_of_day"),
         "candidate_call_type": tensor.bvector("candidate_call_type"),
         "candidate_origin_call": tensor.ivector("candidate_origin_call"),
         "candidate_origin_stand": tensor.bvector("candidate_origin_stand"),
         "candidate_taxi_id": tensor.wvector("candidate_taxi_id"),
         "candidate_timestamp": tensor.ivector("candidate_timestamp"),
         "candidate_day_type": tensor.bvector("candidate_day_type"),
         "candidate_missing_data": tensor.bvector("candidate_missing_data"),
         "candidate_latitude": tensor.matrix("candidate_latitude"),
         "candidate_longitude": tensor.matrix("candidate_longitude"),
         "candidate_destination_latitude": tensor.vector("candidate_destination_latitude"),
         "candidate_destination_longitude": tensor.vector("candidate_destination_longitude"),
         "candidate_travel_time": tensor.ivector("candidate_travel_time"),
         "candidate_first_k_latitude": tensor.matrix("candidate_first_k_latitude"),
         "candidate_first_k_longitude": tensor.matrix("candidate_first_k_longitude"),
         "candidate_last_k_latitude": tensor.matrix("candidate_last_k_latitude"),
         "candidate_last_k_longitude": tensor.matrix("candidate_last_k_longitude"),
         "candidate_input_time": tensor.ivector("candidate_input_time"),
         "candidate_week_of_year": tensor.bvector("candidate_week_of_year"),
         "candidate_day_of_week": tensor.bvector("candidate_day_of_week"),
         "candidate_qhour_of_day": tensor.bvector("candidate_qhour_of_day"),
     }
コード例 #10
0
ファイル: rnn.py プロジェクト: JimStearns206/taxi
 def inputs(self):
     return {'call_type': tensor.bvector('call_type'),
             'origin_call': tensor.ivector('origin_call'),
             'origin_stand': tensor.bvector('origin_stand'),
             'taxi_id': tensor.wvector('taxi_id'),
             'timestamp': tensor.ivector('timestamp'),
             'day_type': tensor.bvector('day_type'),
             'missing_data': tensor.bvector('missing_data'),
             'latitude': tensor.matrix('latitude'),
             'longitude': tensor.matrix('longitude'),
             'latitude_mask': tensor.matrix('latitude_mask'),
             'longitude_mask': tensor.matrix('longitude_mask'),
             'week_of_year': tensor.bvector('week_of_year'),
             'day_of_week': tensor.bvector('day_of_week'),
             'qhour_of_day': tensor.bvector('qhour_of_day'),
             'destination_latitude': tensor.vector('destination_latitude'),
             'destination_longitude': tensor.vector('destination_longitude')}
コード例 #11
0
    def create_models(self):

        if self.verbose:
            print("Creating Training model...")

        x = T.tensor3('x', dtype=theano.config.floatX)
        y = T.wvector('y')  # int16

        self.model.create_computational_graph(x, y)

        index = T.wscalar()  # int16

        self.train_model = \
            theano.function(
                [index],
                [self.model.cost, self.model.error,
                 self.model.negative_log_likelihood, self.model.penalty,
                 self.model.sensitivity, self.model.specificity],
                updates=self.model.updates,
                givens={x:
                        self.data_handler.training_data[
                            index * self.mini_batch_size:
                            (index + 1) * self.mini_batch_size],
                        y:
                        self.data_handler.training_labels[
                            index * self.mini_batch_size:
                            (index + 1) * self.mini_batch_size]
                        }
            )
        if self.verbose:
            print("Training model created.")

        if self.verbose:
            print("Creating Test model...")
        self.test_model = \
            theano.function(
                [x, y],
                [self.model.error,
                 self.model.sensitivity, self.model.specificity,
                 self.model.fully_connected_layer_output])

        if self.verbose:
            print("Test model created.")

        self.feature_extractor = theano.function(
            [x], self.model.fully_connected_layer_output)
コード例 #12
0
ファイル: main.py プロジェクト: Bergalerga/AIProg
    def build_sigmoid_ann(self,nb,nh):
        '''
        Builds a neural network, using sigmoids as the activation function.
        '''
        print("Building sigmoid ann")
        w1 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nb,nh)))
        w2 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nh,10)))
        input = T.dvector('input')
        target = T.wvector('target')
        x1 = Tann.sigmoid(T.dot(input,w1))
        x2 = Tann.sigmoid(T.dot(x1,w2))
        error = T.sum(pow((target - x2), 2))
        params = [w1, w2]
        gradients = T.grad(error, params)
        backprop_acts = [(p, p - self.lrate*g) for p,g in zip(params,gradients)]

        self.trainer = theano.function(inputs=[input, target], outputs=error, updates=backprop_acts, allow_input_downcast=True)
        self.predictor = theano.function(inputs=[input], outputs=x2, allow_input_downcast=True)
コード例 #13
0
 def inputs(self):
     return {
         'call_type': tensor.bvector('call_type'),
         'origin_call': tensor.ivector('origin_call'),
         'origin_stand': tensor.bvector('origin_stand'),
         'taxi_id': tensor.wvector('taxi_id'),
         'timestamp': tensor.ivector('timestamp'),
         'day_type': tensor.bvector('day_type'),
         'missing_data': tensor.bvector('missing_data'),
         'latitude': tensor.matrix('latitude'),
         'longitude': tensor.matrix('longitude'),
         'latitude_mask': tensor.matrix('latitude_mask'),
         'longitude_mask': tensor.matrix('longitude_mask'),
         'week_of_year': tensor.bvector('week_of_year'),
         'day_of_week': tensor.bvector('day_of_week'),
         'qhour_of_day': tensor.bvector('qhour_of_day'),
         'destination_latitude': tensor.vector('destination_latitude'),
         'destination_longitude': tensor.vector('destination_longitude')
     }
コード例 #14
0
ファイル: test_matrixmul.py プロジェクト: yo-ga/TextDetector
def test_matrixmul():
    """
    Tests for projection
    """
    rng = np.random.RandomState(222)
    dtypes = [
        'int16', 'int32', 'int64'
    ]
    tensor_x = [
        tensor.wmatrix(),
        tensor.imatrix(),
        tensor.lmatrix(),
        tensor.wvector(),
        tensor.ivector(),
        tensor.lvector()
    ]
    np_W, np_x = [], []
    for dtype in dtypes:
        np_W.append(rng.rand(10, np.random.randint(1, 10)))
        np_x.append(rng.randint(
            0, 10, (rng.random_integers(5),
                    rng.random_integers(5))
        ).astype(dtype))
    for dtype in dtypes:
        np_W.append(rng.rand(10, np.random.randint(1, 10)))
        np_x.append(
            rng.randint(0, 10, (rng.random_integers(5),)).astype(dtype)
        )

    tensor_W = [sharedX(W) for W in np_W]
    matrixmul = [MatrixMul(W) for W in tensor_W]
    assert all(mm.get_params()[0] == W for mm, W in zip(matrixmul, tensor_W))

    fn = [theano.function([x], mm.project(x))
          for x, mm in zip(tensor_x, matrixmul)]
    for W, x, f in zip(np_W, np_x, fn):
        W_x = W[x]
        if x.ndim == 2:
            W_x = W_x.reshape((W_x.shape[0], np.prod(W_x.shape[1:])))
        else:
            W_x = W_x.flatten()
        np.testing.assert_allclose(f(x), W_x)
コード例 #15
0
ファイル: main.py プロジェクト: Bergalerga/AIProg
    def build_rectified_linear2_ann(self, nb, nh, nh2):
        '''
        Builds a neural network, using rectified linear units 2 as the activation function.
        '''
        print("Building rectified linear ann")
        w1 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nb,nh)))
        w2 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nh,nh2)))
        w3 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nh2, 10)))
        input = T.dvector('input')
        target = T.wvector('target')
        x1 = T.switch(T.dot(input,w1) > 0, T.dot(input,w1), 0)
        x2 = T.switch(T.dot(x1,w2) > 0, T.dot(x1,w2), 0)
        x3 = T.switch(T.dot(x2, w3) > 0, T.dot(x2, w3), 0)
        error = T.sum(pow((target - x3), 2))
        params = [w1, w2, w3]
        gradients = T.grad(error, params)
        backprops = [(p, p - self.lrate*g) for p,g in zip(params,gradients)]

        self.trainer = theano.function(inputs=[input, target], outputs=error, updates=backprops, allow_input_downcast=True)
        self.predictor = theano.function(inputs=[input], outputs=x3, allow_input_downcast=True)
コード例 #16
0
def compileModel(data, nInputs, nOutputs, hiddenLayersSize = [1200, 1200], dropoutRates = [0.2, 0.5, 0.5],
                  activation = 'relu', weightInitMode = 'normal', regularizer = 0.0001):
    """
    Creates a symbolic model given the specified parameters using Theano
    
    Output:
    A list containing three the training, validation and test compiled functions of Theano
    """
    
    
    np.random.seed(815)
    
    x = T.matrix('x')
    y = T.wvector('y')
    learningRate = T.scalar('learningRate')
    regularization = T.scalar('regularization')
    
    #Data sets
    train_x, train_y = data[0]
    valid_x, valid_y = data[1]
    test_x, test_y = data[2]
    
    nnet = MLP(x, nInputs, hiddenLayersSize, nOutputs, dropoutRates = dropoutRates,
                activation = activation, weightInitMode = weightInitMode)
    
    loss = nnet.loss(y, regularization)
    error = nnet.error(y)
    
    gParams = T.grad(loss, nnet.params)
    
    weightUpdates = [(param, param - learningRate * gParam) for param, gParam in zip(nnet.params, gParams)]    
    
    
    batchIndicesVecctor = T.ivector('batchIndicesVecctor')
    trainF = function([batchIndicesVecctor, learningRate, regularization], Out(sbasic.gpu_from_host(loss), borrow = True), updates = weightUpdates, givens = {x: train_x[batchIndicesVecctor], y: train_y[batchIndicesVecctor]})
    validF = function([batchIndicesVecctor], Out(sbasic.gpu_from_host(T.cast(error, T.config.floatX)), borrow = True), givens = {x: valid_x[batchIndicesVecctor], y: valid_y[batchIndicesVecctor]})
    testF = function([batchIndicesVecctor], Out(sbasic.gpu_from_host(T.cast(error, T.config.floatX)), borrow = True), givens = {x: test_x[batchIndicesVecctor], y: test_y[batchIndicesVecctor]})
    
    return [trainF, validF, testF]
コード例 #17
0
ファイル: main.py プロジェクト: talepre/AIProg
    def build_sigmoid_ann(self, nb, nh):
        '''
        Builds a neural network, using sigmoids as the activation function.
        '''
        print("Building sigmoid ann")
        w1 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nb, nh)))
        w2 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nh, 10)))
        input = T.dvector('input')
        target = T.wvector('target')
        x1 = Tann.sigmoid(T.dot(input, w1))
        x2 = Tann.sigmoid(T.dot(x1, w2))
        error = T.sum(pow((target - x2), 2))
        params = [w1, w2]
        gradients = T.grad(error, params)
        backprop_acts = [(p, p - self.lrate * g)
                         for p, g in zip(params, gradients)]

        self.trainer = theano.function(inputs=[input, target],
                                       outputs=error,
                                       updates=backprop_acts,
                                       allow_input_downcast=True)
        self.predictor = theano.function(inputs=[input],
                                         outputs=x2,
                                         allow_input_downcast=True)
コード例 #18
0
ファイル: ann.py プロジェクト: Bergalerga/AIProg
    def build_2048_ann(self, nb, nh, nh2):
        '''
        
        '''
        #nb = input nodes
        #nh = first hidden layer size
        #nh2 = second hidden layer size
        print("building")
        w1 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nb,nh)))
        w2 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nh,nh2)))
        w3 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nh2, 4)))
        input = T.dvector('input')
        target = T.wvector('target')
        x1 = T.switch(T.dot(input,w1) > 0, T.dot(input,w1), 0)
        x2 = T.switch(T.dot(x1,w2) > 0, T.dot(x1,w2), 0)
        x3 = Tann.softmax(T.dot(x2, w3))
        error = T.sum(pow((target - x3), 2))
        params = [w1, w2, w3]
        gradients = T.grad(error, params)
        backprops = [(p, p - self.lrate*g) for p,g in zip(params,gradients)]

        self.trainer = theano.function(inputs=[input, target], outputs=error, updates=backprops, allow_input_downcast=True)
        self.predictor = theano.function(inputs=[input], outputs=x3, allow_input_downcast=True)
        print("Built")
コード例 #19
0
    def __init__(self,input_image_size, batchsize=None, ImageDepth = 1,
                 InputImageDimensions = None, bSupportVariableBatchsize=True,
                 bDropoutEnabled_ = False, bInputIsFlattened=False, verbose = 1, bWeightDecay = False):
        """ Otherwise:
            Assuming that <input_image_size> == Image_width == Image_height UNLESS it is a tuple
            <InputImageDimensions> my be

            <ImageDepth> is 1 by default, but change it to 3 if you use RGB images, 4 if you use RGB-D images etc.

            bDropoutEnabled_ must be set to True if it is to be used anywhere in the network!
            You can disable it at any time in the future (incurring a speed-performance loss as compared to disabling it right here)
        """
        if bSupportVariableBatchsize==True:
            batchsize = None
            self.batchsize = None
            #print "bSupportVariableBatchsize is in EXPERIMENTAL stage!"

        else:
            self.batchsize = batchsize
        if not isinstance(InputImageDimensions, list) and not isinstance(InputImageDimensions, tuple):
            if InputImageDimensions is None:
                print "assuming input dimension==1 (if wrong: specify <InputImageDimensions> or set <input_image_size> as a tuple)"
                InputImageDimensions=1
            input_image_size = (int(input_image_size),)*InputImageDimensions

        self.y = T.wvector('y_cnn_labels')   # the labels are presented as 1D vector (int16) (ivector is int32)
        self.rng = numpy.random.RandomState(int(time.time()))
        self.layers = [] #will contain all layers ([0] input layer ---> [-1] output layer)
        self.autoencoderChains=[]
        self.output_layers = [] # this will stay empty, UNLESS you use addOutputFunction ... these layers will NOT be included in self.layers!
        self.SGD_global_LR_output_layers_multiplicator = theano.shared(np.float32(1.0))
        self.TotalForwardPassCost = 0 # number of multiplications done
        self.verbose = verbose
        self.output = None
        #self.output_layers_params = []
        self.params=[] # after calling CompileOutputFunctions():
        self.bDropoutEnabled = bDropoutEnabled_

        # Reshape matrix of rasterized images of shape (batch_size,input_image_size*input_image_size)
        # to a 4D tensor, compatible with our ConvPoolLayer

        if ImageDepth==1 and InputImageDimensions!=3:
            if bInputIsFlattened or InputImageDimensions==1:
                self.x = T.fmatrix('x_cnn_input')   # the data is presented as rasterized images (np.float32)
            else:
                self.x = T.ftensor4('x_cnn_input')
        else:
            if InputImageDimensions==3:
                self.x = T.TensorType('float32',(False,)*5,name='x_cnn_input')('x_cnn_input')
            else:
                self.x = T.ftensor4('x_cnn_input')   #

        assert InputImageDimensions in [1,2,3],"MixedConvNN::InputImageDimensions  currently unsupported"

        if InputImageDimensions==2:
            if self.batchsize != None:
                self.layer0_input = self.x.reshape((batchsize, ImageDepth, input_image_size[0], input_image_size[1])) #1st entry is batch_size, but it is 1 for the all-pure-convolutional net
            else:
                self.layer0_input = self.x
            self.input_shape = (batchsize, ImageDepth, input_image_size[0], input_image_size[1]) # valid for FIRST LAYER only. each layer has one entry called like this
        elif InputImageDimensions==3:
            if self.batchsize != None:

                self.layer0_input = self.x.reshape((batchsize, input_image_size[0], ImageDepth, input_image_size[1], input_image_size[2])) #1st entry is batch_size, but it is 1 for the all-pure-convolutional net
            else:
                self.layer0_input = self.x
            self.input_shape = (batchsize, input_image_size[0], ImageDepth, input_image_size[1], input_image_size[2])
        else:
            if self.batchsize != None:
                self.layer0_input = self.x.reshape((batchsize, input_image_size[0]))
            else:
                self.layer0_input = self.x
            self.input_shape = (batchsize, input_image_size[0]) # valid for FIRST LAYER only. each layer has one entry called like this

        self.SGD_global_LR = theano.shared(np.float32(1e-3))
        self.SGD_momentum = theano.shared(np.float32(0.9))
        

        self.debug_functions=[]
        self.debug_functions_conv_output=[]
        self.debug_gradients_function=None
        self.debug_lgradients_function=None

        self.output_stride = 1 #for fragment-max-pooling (fast segmentation/sliding window)

        self.bWeightDecay = bWeightDecay
        self.CompileSGD   = NN_Optimizers.CompileSGD
        self.CompileRPROP = NN_Optimizers.CompileRPROP
        #self.compileCG    = NN_Optimizers.compileCG
        #self.CompileARP   = NN_Optimizers.CompileARP
        self.CompileADADELTA   = NN_Optimizers.CompileADADELTA
コード例 #20
0
ファイル: memory_network.py プロジェクト: JimStearns206/taxi
 def inputs(self):
     return {
         'call_type':
         tensor.bvector('call_type'),
         'origin_call':
         tensor.ivector('origin_call'),
         'origin_stand':
         tensor.bvector('origin_stand'),
         'taxi_id':
         tensor.wvector('taxi_id'),
         'timestamp':
         tensor.ivector('timestamp'),
         'day_type':
         tensor.bvector('day_type'),
         'missing_data':
         tensor.bvector('missing_data'),
         'latitude':
         tensor.matrix('latitude'),
         'longitude':
         tensor.matrix('longitude'),
         'destination_latitude':
         tensor.vector('destination_latitude'),
         'destination_longitude':
         tensor.vector('destination_longitude'),
         'travel_time':
         tensor.ivector('travel_time'),
         'first_k_latitude':
         tensor.matrix('first_k_latitude'),
         'first_k_longitude':
         tensor.matrix('first_k_longitude'),
         'last_k_latitude':
         tensor.matrix('last_k_latitude'),
         'last_k_longitude':
         tensor.matrix('last_k_longitude'),
         'input_time':
         tensor.ivector('input_time'),
         'week_of_year':
         tensor.bvector('week_of_year'),
         'day_of_week':
         tensor.bvector('day_of_week'),
         'qhour_of_day':
         tensor.bvector('qhour_of_day'),
         'candidate_call_type':
         tensor.bvector('candidate_call_type'),
         'candidate_origin_call':
         tensor.ivector('candidate_origin_call'),
         'candidate_origin_stand':
         tensor.bvector('candidate_origin_stand'),
         'candidate_taxi_id':
         tensor.wvector('candidate_taxi_id'),
         'candidate_timestamp':
         tensor.ivector('candidate_timestamp'),
         'candidate_day_type':
         tensor.bvector('candidate_day_type'),
         'candidate_missing_data':
         tensor.bvector('candidate_missing_data'),
         'candidate_latitude':
         tensor.matrix('candidate_latitude'),
         'candidate_longitude':
         tensor.matrix('candidate_longitude'),
         'candidate_destination_latitude':
         tensor.vector('candidate_destination_latitude'),
         'candidate_destination_longitude':
         tensor.vector('candidate_destination_longitude'),
         'candidate_travel_time':
         tensor.ivector('candidate_travel_time'),
         'candidate_first_k_latitude':
         tensor.matrix('candidate_first_k_latitude'),
         'candidate_first_k_longitude':
         tensor.matrix('candidate_first_k_longitude'),
         'candidate_last_k_latitude':
         tensor.matrix('candidate_last_k_latitude'),
         'candidate_last_k_longitude':
         tensor.matrix('candidate_last_k_longitude'),
         'candidate_input_time':
         tensor.ivector('candidate_input_time'),
         'candidate_week_of_year':
         tensor.bvector('candidate_week_of_year'),
         'candidate_day_of_week':
         tensor.bvector('candidate_day_of_week'),
         'candidate_qhour_of_day':
         tensor.bvector('candidate_qhour_of_day')
     }
コード例 #21
0
def compileModel(data,
                 nInputs,
                 nOutputs,
                 hiddenLayersSize=[1200, 1200],
                 dropoutRates=[0.2, 0.5, 0.5],
                 activation='relu',
                 weightInitMode='normal',
                 regularizer=0.0001):
    """
    Creates a symbolic model given the specified parameters using Theano
    
    Output:
    A list containing three the training, validation and test compiled functions of Theano
    """

    np.random.seed(815)

    x = T.matrix('x')
    y = T.wvector('y')
    learningRate = T.scalar('learningRate')
    regularization = T.scalar('regularization')

    #Data sets
    train_x, train_y = data[0]
    valid_x, valid_y = data[1]
    test_x, test_y = data[2]

    nnet = MLP(x,
               nInputs,
               hiddenLayersSize,
               nOutputs,
               dropoutRates=dropoutRates,
               activation=activation,
               weightInitMode=weightInitMode)

    loss = nnet.loss(y, regularization)
    error = nnet.error(y)

    gParams = T.grad(loss, nnet.params)

    weightUpdates = [(param, param - learningRate * gParam)
                     for param, gParam in zip(nnet.params, gParams)]

    batchIndicesVecctor = T.ivector('batchIndicesVecctor')
    trainF = function([batchIndicesVecctor, learningRate, regularization],
                      Out(sbasic.gpu_from_host(loss), borrow=True),
                      updates=weightUpdates,
                      givens={
                          x: train_x[batchIndicesVecctor],
                          y: train_y[batchIndicesVecctor]
                      })
    validF = function([batchIndicesVecctor],
                      Out(sbasic.gpu_from_host(T.cast(error, T.config.floatX)),
                          borrow=True),
                      givens={
                          x: valid_x[batchIndicesVecctor],
                          y: valid_y[batchIndicesVecctor]
                      })
    testF = function([batchIndicesVecctor],
                     Out(sbasic.gpu_from_host(T.cast(error, T.config.floatX)),
                         borrow=True),
                     givens={
                         x: test_x[batchIndicesVecctor],
                         y: test_y[batchIndicesVecctor]
                     })

    return [trainF, validF, testF]
コード例 #22
0
    path)  # the artificial data is set up in dlp_art_data.py

#dlp.neglogl(theta0, W, X, ZA, ZB, ZE, S, setup)

import theano
import theano.tensor as t
#f = theano.function()

thetat = t.fvector()
Wt = t.fmatrix()
Xt = t.fvector()
ZAt = t.fmatrix()
ZBt = t.fmatrix()
ZEt = t.fmatrix()
St = t.fvector()
setupt = t.wvector()

nlogl = dlp.neglogl(thetat, Wt, Xt, ZAt, ZBt, ZEt, St, setup)


class linreg(object):
    def __init__(self, beta, y, x):
        self.beta = beta
        self.y = y
        self.x = x

    def mu(self):
        return t.dot(self.beta, self.x.T)

    def rss(self):
        diff = (self.y - self.mu())**2
コード例 #23
0
    if len(valid_data[bb]) >= batch_size:
        valid_gens.append(WordLMGenerator([valid_data[bb], valid_mask[bb]], glove, \
            sequence_length, stride_length, buckets[bb], batch_size))

#for i in range(len(train_gens)):
#    train_gen = train_gens[i]
#    for index in range(train_gen.max_index):
#        # run minibatch
#        for trainset in train_gen.get_minibatch(index):  # data, mask, label, reset
#            print(i, index)

#================Build graph================#

x = T.ftensor3('X')  # (batch_size, sequence_length, 300)
m = T.wmatrix('M')  # (batch_size, sequence_length)
r = T.wvector('r')  # (batch_size,)
x_ext = T.ftensor3('X_ext')
m_ext = T.wmatrix('M_ext')
y_ext = T.imatrix('Y_ext')
r_ext = T.wvector('r_ext')

encoder = SimpleGraph(experiment_name + '_enc', batch_size)
encoder.add_layer(LSTMRecurrentLayer(input_shape=(300, ),
                                     output_shape=(512, ),
                                     forget_bias_one=True,
                                     peephole=True,
                                     output_return_index=[-1],
                                     save_state_index=stride_length - 1,
                                     also_return_cell=True,
                                     precompute=False,
                                     unroll=False,
コード例 #24
0
    def __init__(self,
                 input_image_size,
                 batchsize=None,
                 ImageDepth=1,
                 InputImageDimensions=None,
                 bSupportVariableBatchsize=True,
                 bDropoutEnabled_=False,
                 bInputIsFlattened=False,
                 verbose=1,
                 bWeightDecay=False):
        """ Otherwise:
            Assuming that <input_image_size> == Image_width == Image_height UNLESS it is a tuple
            <InputImageDimensions> my be

            <ImageDepth> is 1 by default, but change it to 3 if you use RGB images, 4 if you use RGB-D images etc.

            bDropoutEnabled_ must be set to True if it is to be used anywhere in the network!
            You can disable it at any time in the future (incurring a speed-performance loss as compared to disabling it right here)
        """
        if bSupportVariableBatchsize == True:
            batchsize = None
            self.batchsize = None
            #print "bSupportVariableBatchsize is in EXPERIMENTAL stage!"

        else:
            self.batchsize = batchsize
        if not isinstance(InputImageDimensions, list) and not isinstance(
                InputImageDimensions, tuple):
            if InputImageDimensions is None:
                print(
                    "assuming input dimension==1 (if wrong: specify <InputImageDimensions> or set <input_image_size> as a tuple)"
                )
                InputImageDimensions = 1
            input_image_size = (int(input_image_size), ) * InputImageDimensions

        self.y = T.wvector(
            'y_cnn_labels'
        )  # the labels are presented as 1D vector (int16) (ivector is int32)
        self.rng = numpy.random.RandomState(int(time.time()))
        self.layers = [
        ]  #will contain all layers ([0] input layer ---> [-1] output layer)
        self.autoencoderChains = []
        self.output_layers = [
        ]  # this will stay empty, UNLESS you use addOutputFunction ... these layers will NOT be included in self.layers!
        self.SGD_global_LR_output_layers_multiplicator = theano.shared(
            np.float32(1.0))
        self.TotalForwardPassCost = 0  # number of multiplications done
        self.verbose = verbose
        self.output = None
        #self.output_layers_params = []
        self.params = []  # after calling CompileOutputFunctions():
        self.bDropoutEnabled = bDropoutEnabled_

        # Reshape matrix of rasterized images of shape (batch_size,input_image_size*input_image_size)
        # to a 4D tensor, compatible with our ConvPoolLayer

        if ImageDepth == 1 and InputImageDimensions != 3:
            if bInputIsFlattened or InputImageDimensions == 1:
                self.x = T.fmatrix(
                    'x_cnn_input'
                )  # the data is presented as rasterized images (np.float32)
            else:
                self.x = T.ftensor4('x_cnn_input')
        else:
            if InputImageDimensions == 3:
                self.x = T.TensorType('float32', (False, ) * 5,
                                      name='x_cnn_input')('x_cnn_input')
            else:
                self.x = T.ftensor4('x_cnn_input')  #

        assert InputImageDimensions in [
            1, 2, 3
        ], "MixedConvNN::InputImageDimensions  currently unsupported"

        if InputImageDimensions == 2:
            if self.batchsize != None:
                self.layer0_input = self.x.reshape(
                    (batchsize, ImageDepth, input_image_size[0],
                     input_image_size[1])
                )  #1st entry is batch_size, but it is 1 for the all-pure-convolutional net
            else:
                self.layer0_input = self.x
            self.input_shape = (
                batchsize, ImageDepth, input_image_size[0], input_image_size[1]
            )  # valid for FIRST LAYER only. each layer has one entry called like this
        elif InputImageDimensions == 3:
            if self.batchsize != None:

                self.layer0_input = self.x.reshape(
                    (batchsize, input_image_size[0], ImageDepth,
                     input_image_size[1], input_image_size[2])
                )  #1st entry is batch_size, but it is 1 for the all-pure-convolutional net
            else:
                self.layer0_input = self.x
            self.input_shape = (batchsize, input_image_size[0], ImageDepth,
                                input_image_size[1], input_image_size[2])
        else:
            if self.batchsize != None:
                self.layer0_input = self.x.reshape(
                    (batchsize, input_image_size[0]))
            else:
                self.layer0_input = self.x
            self.input_shape = (
                batchsize, input_image_size[0]
            )  # valid for FIRST LAYER only. each layer has one entry called like this

        self.SGD_global_LR = theano.shared(np.float32(1e-3))
        self.SGD_momentum = theano.shared(np.float32(0.9))

        self.debug_functions = []
        self.debug_functions_conv_output = []
        self.debug_gradients_function = None
        self.debug_lgradients_function = None

        self.output_stride = 1  #for fragment-max-pooling (fast segmentation/sliding window)

        self.bWeightDecay = bWeightDecay
        self.CompileSGD = NN_Optimizers.CompileSGD
        self.CompileRPROP = NN_Optimizers.CompileRPROP
        #self.compileCG    = NN_Optimizers.compileCG
        #self.CompileARP   = NN_Optimizers.CompileARP
        self.CompileADADELTA = NN_Optimizers.CompileADADELTA
コード例 #25
0
def objective_train_model(params):
    # Initialise parameters
    start = timeit.default_timer()
    print(params)
    num_lstm_units = int(params['num_lstm_units'])
    num_lstm_layers = int(params['num_lstm_layers'])
    num_dense_layers = int(params['num_dense_layers'])
    num_dense_units = int(params['num_dense_units'])
    num_epochs = params['num_epochs']
    learn_rate = params['learn_rate']
    mb_size = params['mb_size']
    l2reg = params['l2reg']
    rng_seed = params['rng_seed']
    #%%
    # Load training data
    path = 'saved_data'
    brancharray = numpy.load(os.path.join(path, 'train/branch_arrays.npy'))
    num_features = numpy.shape(brancharray)[-1]
    train_mask = numpy.load(os.path.join(path,
                                         'train/mask.npy')).astype(numpy.int16)
    train_label = numpy.load(os.path.join(path, 'train/padlabel.npy'))
    train_rmdoublemask = numpy.load(
        os.path.join(path, 'train/rmdoublemask.npy')).astype(numpy.int16)
    train_rmdoublemask = train_rmdoublemask.flatten()
    #%%
    numpy.random.seed(rng_seed)
    rng_inst = numpy.random.RandomState(rng_seed)
    lasagne.random.set_rng(rng_inst)
    input_var = T.ftensor3('inputs')
    mask = T.wmatrix('mask')
    target_var = T.ivector('targets')
    rmdoublesmask = T.wvector('rmdoublemask')
    # Build network
    network = build_nn(input_var,
                       mask,
                       num_features,
                       num_lstm_layers=num_lstm_layers,
                       num_lstm_units=num_lstm_units,
                       num_dense_layers=num_dense_layers,
                       num_dense_units=num_dense_units)
    # This function returns the values of the parameters
    # of all layers below one or more given Layer instances,
    # including the layer(s) itself.

    # Create a loss expression for training, i.e., a scalar objective we want
    # to minimize (for our multi-class problem, it is the cross-entropy loss):

    prediction = lasagne.layers.get_output(network)

    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    loss = loss * rmdoublesmask
    loss = lasagne.objectives.aggregate(loss, mask.flatten())
    # regularisation

    l2_penalty = l2reg * lasagne.regularization.regularize_network_params(
        network, lasagne.regularization.l2)
    loss = loss + l2_penalty

    # We could add some weight decay as well here, see lasagne.regularization.
    # Create update expressions for training, i.e., how to modify the
    # parameters at each training step. Here, we'll use Adadelta
    parameters = lasagne.layers.get_all_params(network, trainable=True)
    my_updates = lasagne.updates.adam(loss,
                                      parameters,
                                      learning_rate=learn_rate)
    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network,
    # disabling dropout layers.
    test_prediction = lasagne.layers.get_output(network, deterministic=True)

    test_loss = lasagne.objectives.categorical_crossentropy(
        prediction, target_var)
    test_loss = test_loss * rmdoublesmask
    test_loss = lasagne.objectives.aggregate(test_loss, mask.flatten())

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_fn = theano.function(
        inputs=[input_var, mask, rmdoublesmask, target_var],
        outputs=loss,
        updates=my_updates,
        on_unused_input='warn')

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input_var, mask, rmdoublesmask, target_var],
                             [test_loss, test_prediction],
                             on_unused_input='warn')
    #%%
    # We iterate over epochs:
    for epoch in range(num_epochs):
        # print("Epoch {} ".format(epoch))
        train_err = 0
        # In each epoch, we do a full pass over the training data:
        for batch in iterate_minibatches(brancharray,
                                         train_mask,
                                         train_rmdoublemask,
                                         train_label,
                                         mb_size,
                                         shuffle=False):
            inputs, mask, rmdmask, targets = batch
            train_err += train_fn(inputs, mask, rmdmask, targets)

#%%
# Load development data
    dev_brancharray = numpy.load(os.path.join(path, 'dev/branch_arrays.npy'))
    dev_mask = numpy.load(os.path.join(path,
                                       'dev/mask.npy')).astype(numpy.int16)
    dev_label = numpy.load(os.path.join(path, 'dev/padlabel.npy'))

    dev_rmdoublemask = numpy.load(os.path.join(
        path, 'dev/rmdoublemask.npy')).astype(numpy.int16).flatten()

    with open(os.path.join(path, 'dev/ids.pkl'), 'rb') as handle:
        dev_ids_padarray = pickle.load(handle)

#%%
# get predictions for development set
    err, val_ypred = val_fn(dev_brancharray, dev_mask, dev_rmdoublemask,
                            dev_label.flatten())
    val_ypred = numpy.argmax(val_ypred, axis=1).astype(numpy.int32)

    acv_label = dev_label.flatten()
    acv_prediction = numpy.asarray(val_ypred)
    acv_mask = dev_mask.flatten()
    clip_dev_label = [o for o, m in zip(acv_label, acv_mask) if m == 1]
    clip_dev_ids = [o for o, m in zip(dev_ids_padarray, acv_mask) if m == 1]
    clip_dev_prediction = [
        o for o, m in zip(acv_prediction, acv_mask) if m == 1
    ]
    # remove repeating instances
    uniqtwid, uindices2 = numpy.unique(clip_dev_ids, return_index=True)
    uniq_dev_label = [clip_dev_label[i] for i in uindices2]
    uniq_dev_prediction = [clip_dev_prediction[i] for i in uindices2]
    uniq_dev_id = [clip_dev_ids[i] for i in uindices2]
    dev_accuracy = accuracy_score(uniq_dev_label, uniq_dev_prediction)
    mactest_P, mactest_R, mactest_F, _ = precision_recall_fscore_support(
        uniq_dev_label, uniq_dev_prediction, average='macro')
    mictest_P, mictest_R, mictest_F, _ = precision_recall_fscore_support(
        uniq_dev_label, uniq_dev_prediction, average='micro')
    test_P, test_R, test_F, _ = precision_recall_fscore_support(
        uniq_dev_label, uniq_dev_prediction)
    # to change scoring objective you need to change 'loss'
    output = {
        'loss': 1 - dev_accuracy,
        'status': STATUS_OK,
        'Params': params,
        'Macro': {
            'Macro_Precision': mactest_P,
            'Macro_Recall': mactest_R,
            'macro_F_score': mactest_F
        },
        'Micro': {
            'Micro_Precision': mictest_P,
            'Micro_Recall': mictest_R,
            'micro_F_score': mictest_F
        },
        'Support': {
            'Support_Precision': test_P[0],
            'Support_Recall': test_R[0],
            'Support_F_score': test_F[0]
        },
        'Comment': {
            'Comment_Precision': test_P[1],
            'Comment_Recall': test_R[1],
            'Comment_F_score': test_F[1]
        },
        'Deny': {
            'Deny_Precision': test_P[2],
            'Deny_Recall': test_R[2],
            'Deny_F_score': test_F[2]
        },
        'Appeal': {
            'Appeal_Precision': test_P[3],
            'Appeal_Recall': test_R[3],
            'Appeal_F_score': test_F[3]
        },
        'attachments': {
            'Labels': pickle.dumps(uniq_dev_label),
            'Predictions': pickle.dumps(uniq_dev_prediction),
            'ID': pickle.dumps(uniq_dev_id)
        }
    }

    print("1-accuracy loss = ", output['loss'])

    stop = timeit.default_timer()
    print("Time: ", stop - start)
    return output
コード例 #26
0
ファイル: main.py プロジェクト: talepre/AIProg
    def build_custom_ann(self, layer_list, ann_type="rlu", nb=784):
        '''

        '''
        print(ann_type)
        layer_list = [nb] + layer_list
        input = T.dvector('input')
        target = T.wvector('target')
        w_list = []
        x_list = []
        w_list.append(
            theano.shared(
                np.random.uniform(low=-.1,
                                  high=.1,
                                  size=(layer_list[0], layer_list[1]))))
        if ann_type == "rlu":
            x_list.append(
                T.switch(
                    T.dot(input, w_list[0]) > 0, T.dot(input, w_list[0]), 0))
        elif ann_type == "sigmoid":
            x_list.append(Tann.sigmoid(T.dot(input, w_list[0])))
        elif ann_type == "ht":
            x_list.append(T.tanh(T.dot(input, w_list[0])))

        for count in range(0, len(layer_list) - 2):
            print("looping")
            w_list.append(
                theano.shared(
                    np.random.uniform(low=-.1,
                                      high=.1,
                                      size=(layer_list[count + 1],
                                            layer_list[count + 2]))))
            if ann_type == "rlu":
                x_list.append(
                    T.switch(
                        T.dot(x_list[count], w_list[count + 1]) > 0,
                        T.dot(x_list[count], w_list[count + 1]), 0))
            elif ann_type == "sigmoid":
                x_list.append(
                    Tann.sigmoid(T.dot(x_list[count], w_list[count + 1])))
            elif ann_type == "ht":
                x_list.append(T.tanh(T.dot(x_list[count], w_list[count + 1])))

        print(len(x_list))
        print(len(w_list))
        w_list.append(
            theano.shared(
                np.random.uniform(low=-.1, high=.1,
                                  size=(layer_list[-1], 10))))
        x_list.append(
            T.switch(
                T.dot(x_list[-1], w_list[-1]) > 0,
                T.dot(x_list[-1], w_list[-1]), 0))

        error = T.sum(pow((target - x_list[-1]), 2))
        params = w_list
        gradients = T.grad(error, params)
        backprops = [(p, p - self.lrate * g)
                     for p, g in zip(params, gradients)]

        self.trainer = theano.function(inputs=[input, target],
                                       outputs=error,
                                       updates=backprops,
                                       allow_input_downcast=True)
        self.predictor = theano.function(inputs=[input],
                                         outputs=x_list[-1],
                                         allow_input_downcast=True)
コード例 #27
0
ファイル: predict.py プロジェクト: martintoreilly/branchLSTM
def eval_train_model(params):
    print("Retrain model on train+dev set and evaluate on testing set")
    # Initialise parameters
    num_lstm_units = int(params['num_lstm_units'])
    num_lstm_layers = int(params['num_lstm_layers'])
    num_dense_layers = int(params['num_dense_layers'])
    num_dense_units = int(params['num_dense_units'])
    num_epochs = params['num_epochs']
    learn_rate = params['learn_rate']
    mb_size = params['mb_size']
    l2reg = params['l2reg']
    rng_seed = params['rng_seed']
    #%%
    # Load data
    path = 'saved_data'
    brancharray = numpy.load(os.path.join(path, 'train/branch_arrays.npy'))
    num_features = numpy.shape(brancharray)[-1]
    train_mask = numpy.load(os.path.join(path,
                                         'train/mask.npy')).astype(numpy.int16)
    train_label = numpy.load(os.path.join(path, 'train/padlabel.npy'))

    train_rmdoublemask = numpy.load(
        os.path.join(path, 'train/rmdoublemask.npy')).astype(numpy.int16)
    train_rmdoublemask = train_rmdoublemask.flatten()
    #%%
    numpy.random.seed(rng_seed)
    rng_inst = numpy.random.RandomState(rng_seed)
    lasagne.random.set_rng(rng_inst)
    input_var = T.ftensor3('inputs')
    mask = T.wmatrix('mask')
    target_var = T.ivector('targets')
    rmdoublesmask = T.wvector('rmdoublemask')
    # Build network
    network = build_nn(input_var,
                       mask,
                       num_features,
                       num_lstm_layers=num_lstm_layers,
                       num_lstm_units=num_lstm_units,
                       num_dense_layers=num_dense_layers,
                       num_dense_units=num_dense_units)
    # This function returns the values of the parameters of all
    # layers below one or more given Layer instances,
    # including the layer(s) itself.

    # Create a loss expression for training, i.e., a scalar objective we want
    # to minimize (for our multi-class problem, it is the cross-entropy loss):
    prediction = lasagne.layers.get_output(network)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    loss = loss * rmdoublesmask
    loss = lasagne.objectives.aggregate(loss, mask.flatten())
    # regularisation
    l2_penalty = l2reg * lasagne.regularization.regularize_network_params(
        network, lasagne.regularization.l2)
    loss = loss + l2_penalty

    # We could add some weight decay as well here, see lasagne.regularization.
    # Create update expressions for training, i.e., how to modify the
    # parameters at each training step.
    parameters = lasagne.layers.get_all_params(network, trainable=True)
    my_updates = lasagne.updates.adam(loss,
                                      parameters,
                                      learning_rate=learn_rate)
    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network,
    # disabling dropout layers.
    test_prediction = lasagne.layers.get_output(network, deterministic=True)

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_fn = theano.function(
        inputs=[input_var, mask, rmdoublesmask, target_var],
        outputs=loss,
        updates=my_updates,
        on_unused_input='warn')
    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input_var, mask],
                             test_prediction,
                             on_unused_input='warn')
    #%%
    # READ THE DATA
    dev_brancharray = numpy.load(os.path.join(path, 'dev/branch_arrays.npy'))
    dev_mask = numpy.load(os.path.join(path,
                                       'dev/mask.npy')).astype(numpy.int16)
    dev_label = numpy.load(os.path.join(path, 'dev/padlabel.npy'))

    dev_rmdoublemask = numpy.load(os.path.join(
        path, 'dev/rmdoublemask.npy')).astype(numpy.int16).flatten()

    with open(os.path.join(path, 'dev/ids.pkl'), 'rb') as handle:
        dev_ids_padarray = pickle.load(handle)

    test_brancharray = numpy.load(os.path.join(path, 'test/branch_arrays.npy'))
    test_mask = numpy.load(os.path.join(path,
                                        'test/mask.npy')).astype(numpy.int16)

    test_rmdoublemask = numpy.load(os.path.join(
        path, 'test/rmdoublemask.npy')).astype(numpy.int16).flatten()

    with open(os.path.join(path, 'test/ids.pkl'), 'rb') as handle:
        test_ids_padarray = pickle.load(handle)

#%%
#start training loop
# We iterate over epochs:
    for epoch in range(num_epochs):
        #print("Epoch {} ".format(epoch))
        train_err = 0
        # In each epoch, we do a full pass over the training data:
        for batch in iterate_minibatches(brancharray,
                                         train_mask,
                                         train_rmdoublemask,
                                         train_label,
                                         mb_size,
                                         max_seq_len=25,
                                         shuffle=False):
            inputs, mask, rmdmask, targets = batch
            train_err += train_fn(inputs, mask, rmdmask, targets)
        for batch in iterate_minibatches(dev_brancharray,
                                         dev_mask,
                                         dev_rmdoublemask,
                                         dev_label,
                                         mb_size,
                                         max_seq_len=20,
                                         shuffle=False):
            inputs, mask, rmdmask, targets = batch
            train_err += train_fn(inputs, mask, rmdmask, targets)
    # And a full pass over the test data:
    test_ypred = val_fn(test_brancharray, test_mask)
    # get class label instead of probabilities
    new_test_ypred = numpy.argmax(test_ypred, axis=1).astype(numpy.int32)

    #Take mask into account
    acv_prediction = numpy.asarray(new_test_ypred)
    acv_mask = test_mask.flatten()
    clip_dev_ids = [o for o, m in zip(test_ids_padarray, acv_mask) if m == 1]
    clip_dev_prediction = [
        o for o, m in zip(acv_prediction, acv_mask) if m == 1
    ]
    # remove repeating instances
    uniqtwid, uindices2 = numpy.unique(clip_dev_ids, return_index=True)
    uniq_dev_prediction = [clip_dev_prediction[i] for i in uindices2]
    uniq_dev_id = [clip_dev_ids[i] for i in uindices2]
    output = {
        'status': STATUS_OK,
        'Params': params,
        'attachments': {
            'Predictions': pickle.dumps(uniq_dev_prediction),
            'ID': pickle.dumps(uniq_dev_id)
        }
    }

    return output
コード例 #28
0
        valid_gens.append(WordLMGenerator([valid_data[bb], valid_mask[bb]], glove, \
            sequence_length, stride_length, buckets[bb], batch_size))

#for i in range(len(train_gens)):
#    train_gen = train_gens[i]
#    for index in range(train_gen.max_index):
#        # run minibatch
#        for trainset in train_gen.get_minibatch(index):  # data, mask, label, reset
#            print(i, index)

#================Build graph================#

x = T.ftensor3('X')  # (batch_size, sequence_length, 300)
m = T.wmatrix('M')  # (batch_size, sequence_length)
y = T.imatrix('Y')  # (batch_size, sequence_length)
r = T.wvector('r')  # (batch_size,)

graph = SimpleGraph(experiment_name, batch_size)
graph.add_layer(LSTMRecurrentLayer(input_shape=(300,),
                                   output_shape=(1024,),
                                   forget_bias_one=True,
                                   peephole=True,
                                   output_return_index=None,
                                   save_state_index=stride_length-1,
                                   precompute=False,
                                   unroll=False,
                                   backward=False), is_start=True)
# graph.add_layer(TimeDistributedDenseLayer((1024,), (512,)))  # not much time difference, and less memory
graph.add_layer(DenseLayer((1024,), (512,)))
graph.add_layer(TimeDistributedDenseLayerSCP((512,), (glove.vocabulary,)))