Example #1
0
    def __init__(self, input_dim, output_dim, weight_init=Orthogonal(mean=0, std=0.1),
                 inner_init=Gaussian(mean=0, std=0.1)):

        self.input_dim = input_dim
        self.output_dim = output_dim
        self.truncate_gradient = truncate_gradient
        self.return_sequences = return_sequences

        self.W_i = weight_init((self.input_dim, self.output_dim))
        self.U_i = inner_init((self.output_dim, self.output_dim))
        self.b_i = shared_zeros((self.output_dim))

        self.W_f = weight_init((self.input_dim, self.output_dim))
        self.U_f = inner_init((self.output_dim, self.output_dim))
        self.b_f = Identity()((self.output_dim))

        self.W_c = weight_init((self.input_dim, self.output_dim))
        self.U_c = inner_init((self.output_dim, self.output_dim))
        self.b_c = shared_zeros((self.output_dim))

        self.W_o = weight_init((self.input_dim, self.output_dim))
        self.U_o = inner_init((self.output_dim, self.output_dim))
        self.b_o = shared_zeros((self.output_dim))

        self.params = [
            self.W_i, self.U_i, self.b_i,
            self.W_c, self.U_c, self.b_c,
            self.W_f, self.U_f, self.b_f,
            self.W_o, self.U_o, self.b_o,
        ]
Example #2
0
    def __init__(self, input_dim, output_dim, truncate_gradient=-1, return_sequences=True,
                weight_init=OrthogonalWeight(), inner_init=GaussianWeight(mean=0, std=0.1)):

        self.input_dim = input_dim
        self.output_dim = output_dim
        self.truncate_gradient = truncate_gradient
        self.return_sequences = return_sequences

        self.W_i = weight_init((self.input_dim, self.output_dim))
        self.U_i = inner_init((self.output_dim, self.output_dim))
        self.b_i = shared_zeros((self.output_dim), name='b_i')

        self.W_f = weight_init((self.input_dim, self.output_dim))
        self.U_f = inner_init((self.output_dim, self.output_dim))
        self.b_f = shared_ones((self.output_dim), name='b_f')

        self.W_c = weight_init((self.input_dim, self.output_dim))
        self.U_c = inner_init((self.output_dim, self.output_dim))
        self.b_c = shared_zeros((self.output_dim), name='b_c')

        self.W_o = weight_init((self.input_dim, self.output_dim))
        self.U_o = inner_init((self.output_dim, self.output_dim))
        self.b_o = shared_zeros((self.output_dim), name='b_o')

        self.params = [
            self.W_i, self.U_i, self.b_i,
            self.W_c, self.U_c, self.b_c,
            self.W_f, self.U_f, self.b_f,
            self.W_o, self.U_o, self.b_o,
        ]
Example #3
0
    def __init__(self,
                 input_dim,
                 bottlenet_dim,
                 z_dim,
                 weight_init=GaussianWeight(mean=0, std=0.01)):

        self.input_dim = input_dim
        self.bottlenet_dim = bottlenet_dim

        # encoder
        self.W_e = weight_init((input_dim, bottlenet_dim), name='W_e')
        self.b_e = shared_zeros(shape=bottlenet_dim, name='b_e')
        self.W_miu = weight_init((bottlenet_dim, z_dim), name='W_miu')
        self.b_miu = shared_zeros(shape=z_dim, name='b_miu')
        self.W_sig = weight_init((bottlenet_dim, z_dim), name='W_sig')
        self.b_sig = shared_zeros(shape=z_dim, name='b_sig')
        # decoder
        self.W1_d = weight_init((z_dim, bottlenet_dim), name='W1_d')
        self.b1_d = shared_zeros(shape=bottlenet_dim, name='b1_d')
        self.W2_d = weight_init((bottlenet_dim, input_dim), name='W2_d')
        self.b2_d = shared_zeros(shape=input_dim, name='b2_d')

        self.params = [
            self.W_e, self.b_e, self.W_miu, self.b_miu, self.W_sig, self.b_sig,
            self.W1_d, self.b1_d, self.W2_d, self.b2_d
        ]
Example #4
0
    def __init__(self, input_shape, gamma_init=UniformWeight(), short_memory=0.1):
        '''
        REFERENCE:
            Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift
                                 http://arxiv.org/pdf/1502.03167v3.pdf
        PARAMS:
            short_memory: short term memory
                y_t is the latest value, the moving average x_tp1 is calculated as
                x_tp1 = memory * y_t + (1-memory) * x_t, the larger the short term
                memory, the more weight is put on contempory.
            epsilon:
                denominator min value for preventing division by zero in computing std
        '''
        # assert len(input_shape) == 2
        self.epsilon = 1e-6
        self.input_shape = input_shape
        self.mem = short_memory

        self.gamma = gamma_init(self.input_shape, name='gamma')
        self.beta = shared_zeros(self.input_shape, name='beta')

        self.moving_mean = 0
        self.moving_var = 1

        self.params = [self.gamma, self.beta]
Example #5
0
    def __init__(self, input_channels, filters, kernel_size=(3,3),
        stride=(1,1), W=None, b=None, weight_init=GaussianWeight(mean=0, std=0.1),
        image_shape=None, border_mode='valid'):
        '''
        PARAM:
            border_mode: (from theano)
                valid: only apply filter to complete patches of the image. Generates
                output of shape: image_shape - filter_shape + 1
                full: zero-pads image to multiple of filter shape to generate output
                of shape: image_shape + filter_shape - 1
        '''
        self.input_var = T.tensor4()
        self.input_channels = input_channels
        self.filters = filters
        self.kernel_size = kernel_size
        self.stride = stride
        self.border_mode = border_mode
        self.image_shape = image_shape

        self.W_shape = (self.filters, self.input_channels) + self.kernel_size
        self.W = W
        if self.W is None:
            self.W = weight_init(self.W_shape, name='W_'+self.__class__.__name__)

        self.b = b
        if self.b is None:
            self.b = shared_zeros(shape=(self.filters,), name='b_'+self.__class__.__name__)

        self.params = [self.W, self.b]
Example #6
0
    def __init__(self,
                 prev_dim=None,
                 this_dim=None,
                 W=None,
                 b=None,
                 weight_init=GaussianWeight(mean=0, std=0.1)):
        """
        DESCRIPTION:
            This is a fully connected layer
        PARAM:
            prev_dim(int): dimension of previous layer
            this_dim(int): dimension of this layer
            name(string): name of the layer
            W(tensor variable): Weight of 2D tensor matrix
            b(tensor variable): bias of 2D tensor matrix
            params(list): a list of params in layer that can be updated
        """

        self.prev_dim = prev_dim
        self.this_dim = this_dim

        self.W = W
        if self.W is None:
            self.W = weight_init((prev_dim, this_dim), name='W')

        self.b = b
        if self.b is None:
            self.b = shared_zeros(shape=this_dim, name='b')

        self.params = [self.W, self.b]
Example #7
0
    def __init__(self,
                 input_channels,
                 filters,
                 kernel_size=(3, 3),
                 stride=(1, 1),
                 W=None,
                 b=None,
                 weight_init=GaussianWeight(mean=0, std=0.1),
                 border_mode='valid'):
        '''
        PARAM:
            border_mode: (from theano)
                valid: only apply filter to complete patches of the image. Generates
                output of shape: image_shape - filter_shape + 1
                full: zero-pads image to multiple of filter shape to generate output
                of shape: image_shape + filter_shape - 1
        '''
        self.input_channels = input_channels
        self.filters = filters
        self.kernel_size = kernel_size
        self.stride = stride
        self.border_mode = border_mode

        self.W_shape = (self.filters, self.input_channels) + self.kernel_size
        self.W = W
        if self.W is None:
            self.W = weight_init(self.W_shape, name='W')

        self.b = b
        if self.b is None:
            self.b = shared_zeros(shape=(self.filters, ), name='b')

        self.params = [self.W, self.b]
Example #8
0
    def __init__(self, prev_dim=None, this_dim=None, W=None, b=None,
                 weight_init=GaussianWeight(mean=0, std=0.1)):
        """
        DESCRIPTION:
            This is a fully connected layer
        PARAM:
            prev_dim(int): dimension of previous layer
            this_dim(int): dimension of this layer
            name(string): name of the layer
            W(tensor variable): Weight of 2D tensor matrix
            b(tensor variable): bias of 2D tensor matrix
            params(list): a list of params in layer that can be updated
        """

        self.prev_dim = prev_dim
        self.this_dim = this_dim

        self.W = W
        if self.W is None:
            self.W = weight_init((prev_dim, this_dim), name='W')

        self.b = b
        if self.b is None:
            self.b = shared_zeros(shape=this_dim, name='b')

        self.params = [self.W, self.b]
Example #9
0
    def __init__(self, dim, layer_type, gamma_init=UniformWeight(), short_memory=0.01):
        """
        REFERENCE:
            Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift
        PARAMS:
            short_memory: short term memory
                y_t is the latest value, the moving average x_tp1 is calculated as
                x_tp1 = memory * y_t + (1-memory) * x_t, the larger the short term
                memory, the more weight is put on contempory.
            layer_type: fc or conv
            epsilon:
                denominator min value for preventing division by zero in computing std
            dim: for fc layers, shape is the layer dimension, for conv layers,
                shape is the number of feature maps
        """

        assert layer_type in ["fc", "conv"]
        self.layer_type = layer_type
        self.epsilon = 1e-6
        self.dim = dim
        self.mem = short_memory

        if self.layer_type == "fc":
            input_shape = (1, dim)
            self.broadcastable = (True, False)
        elif self.layer_type == "conv":
            input_shape = (1, dim, 1, 1)
            self.broadcastable = (True, False, True, True)

        self.gamma = gamma_init(input_shape, name="gamma")
        self.beta = shared_zeros(input_shape, name="beta")
        self.params = [self.gamma, self.beta]
        self.moving_mean = 0
        self.moving_var = 1
Example #10
0
    def __init__(self, input_dim, output_dim=128, weight_init=Orthogonal(mean=0, std=0.1),
                 inner_init=Gaussian(mean=0, std=0.1), truncate_gradient=-1,
                 output_mode='sum', return_sequences=False):

        super(BiDirectionLSTM,self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.truncate_gradient = truncate_gradient
        self.output_mode = output_mode # output_mode is either sum or concatenate
        self.return_sequences = return_sequences

        # forward weights
        self.W_i = weight_init((self.input_dim, self.output_dim))
        self.U_i = inner_init((self.output_dim, self.output_dim))
        self.b_i = shared_zeros((self.output_dim))

        self.W_f = weight_init((self.input_dim, self.output_dim))
        self.U_f = inner_init((self.output_dim, self.output_dim))
        self.b_f = shared_zeros((self.output_dim))

        self.W_c = weight_init((self.input_dim, self.output_dim))
        self.U_c = inner_init((self.output_dim, self.output_dim))
        self.b_c = shared_zeros((self.output_dim))

        self.W_o = weight_init((self.input_dim, self.output_dim))
        self.U_o = inner_init((self.output_dim, self.output_dim))
        self.b_o = shared_zeros((self.output_dim))

        # backward weights
        self.Wb_i = weight_init((self.input_dim, self.output_dim))
        self.Ub_i = inner_init((self.output_dim, self.output_dim))
        self.bb_i = shared_zeros((self.output_dim))

        self.Wb_f = weight_init((self.input_dim, self.output_dim))
        self.Ub_f = inner_init((self.output_dim, self.output_dim))
        self.bb_f = shared_zeros((self.output_dim))

        self.Wb_c = weight_init((self.input_dim, self.output_dim))
        self.Ub_c = inner_init((self.output_dim, self.output_dim))
        self.bb_c = shared_zeros((self.output_dim))

        self.Wb_o = weight_init((self.input_dim, self.output_dim))
        self.Ub_o = inner_init((self.output_dim, self.output_dim))
        self.bb_o = shared_zeros((self.output_dim))

        self.params = [
            self.W_i, self.U_i, self.b_i,
            self.W_c, self.U_c, self.b_c,
            self.W_f, self.U_f, self.b_f,
            self.W_o, self.U_o, self.b_o,

            self.Wb_i, self.Ub_i, self.bb_i,
            self.Wb_c, self.Ub_c, self.bb_c,
            self.Wb_f, self.Ub_f, self.bb_f,
            self.Wb_o, self.Ub_o, self.bb_o,
        ]
Example #11
0
    def __init__(self,
                 input_dim,
                 output_dim,
                 truncate_gradient=-1,
                 return_sequences=True,
                 weight_init=OrthogonalWeight(),
                 inner_init=GaussianWeight(mean=0, std=0.1)):

        self.input_dim = input_dim
        self.output_dim = output_dim
        self.truncate_gradient = truncate_gradient
        self.return_sequences = return_sequences

        self.W_i = weight_init((self.input_dim, self.output_dim))
        self.U_i = inner_init((self.output_dim, self.output_dim))
        self.b_i = shared_zeros((self.output_dim), name='b_i')

        self.W_f = weight_init((self.input_dim, self.output_dim))
        self.U_f = inner_init((self.output_dim, self.output_dim))
        self.b_f = shared_ones((self.output_dim), name='b_f')

        self.W_c = weight_init((self.input_dim, self.output_dim))
        self.U_c = inner_init((self.output_dim, self.output_dim))
        self.b_c = shared_zeros((self.output_dim), name='b_c')

        self.W_o = weight_init((self.input_dim, self.output_dim))
        self.U_o = inner_init((self.output_dim, self.output_dim))
        self.b_o = shared_zeros((self.output_dim), name='b_o')

        self.params = [
            self.W_i,
            self.U_i,
            self.b_i,
            self.W_c,
            self.U_c,
            self.b_c,
            self.W_f,
            self.U_f,
            self.b_f,
            self.W_o,
            self.U_o,
            self.b_o,
        ]
Example #12
0
    def __init__(self, input_dim, bottlenet_dim, z_dim, weight_init=GaussianWeight(mean=0, std=0.01)):

        self.input_dim = input_dim
        self.bottlenet_dim = bottlenet_dim

        # encoder
        self.W_e = weight_init((input_dim, bottlenet_dim), name='W_e')
        self.b_e = shared_zeros(shape=bottlenet_dim, name='b_e')
        self.W_miu = weight_init((bottlenet_dim, z_dim), name='W_miu')
        self.b_miu = shared_zeros(shape=z_dim, name='b_miu')
        self.W_sig = weight_init((bottlenet_dim, z_dim), name='W_sig')
        self.b_sig = shared_zeros(shape=z_dim, name='b_sig')
        # decoder
        self.W1_d = weight_init((z_dim, bottlenet_dim), name='W1_d')
        self.b1_d = shared_zeros(shape=bottlenet_dim, name='b1_d')
        self.W2_d = weight_init((bottlenet_dim, input_dim), name='W2_d')
        self.b2_d = shared_zeros(shape=input_dim, name='b2_d')

        self.params = [self.W_e, self.b_e, self.W_miu, self.b_miu, self.W_sig, self.b_sig,
                       self.W1_d, self.b1_d, self.W2_d, self.b2_d]
Example #13
0
    def __init__(self, input_shape, epsilon=1e-6, mode=0, momentum=0.9):
        self.input_shape = input_shape
        self.epsilon = epsilon
        self.mode = mode
        self.momentum = momentum

        self.init = UniformWeight()
        self.gamma = self.init((self.input_shape), name='gamma')
        self.beta = shared_zeros(self.input_shape, name='beta')

        self.running_mean = None
        self.running_std = None

        self.params = [self.gamma, self.beta]
Example #14
0
    def __init__(self, input_shape, epsilon=1e-6, mode=0, momentum=0.9):
        self.input_shape = input_shape
        self.epsilon = epsilon
        self.mode = mode
        self.momentum = momentum

        self.init = UniformWeight()
        self.gamma = self.init((self.input_shape), name='gamma')
        self.beta = shared_zeros(self.input_shape, name='beta')

        self.running_mean = None
        self.running_std = None

        self.params = [self.gamma, self.beta]
Example #15
0
    def __init__(self, input_dim, output_dim, weight_init=OrthogonalWeight(),
                 inner_init=GaussianWeight(mean=0, std=0.1), truncate_gradient=-1,
                 output_mode='concat', return_sequences=False):

        self.input_dim = input_dim
        self.output_dim = output_dim
        self.truncate_gradient = truncate_gradient
        self.output_mode = output_mode # output_mode is either sum or concatenate
        self.return_sequences = return_sequences

        # forward weights
        self.W_i = weight_init((self.input_dim, self.output_dim))
        self.U_i = inner_init((self.output_dim, self.output_dim))
        self.b_i = shared_zeros((self.output_dim), name='b_i')

        self.W_f = weight_init((self.input_dim, self.output_dim))
        self.U_f = inner_init((self.output_dim, self.output_dim))
        self.b_f = shared_ones((self.output_dim), name='b_f')

        self.W_c = weight_init((self.input_dim, self.output_dim))
        self.U_c = inner_init((self.output_dim, self.output_dim))
        self.b_c = shared_zeros((self.output_dim), name='b_c')

        self.W_o = weight_init((self.input_dim, self.output_dim))
        self.U_o = inner_init((self.output_dim, self.output_dim))
        self.b_o = shared_zeros((self.output_dim), name='b_o')

        # backward weights
        self.Wb_i = weight_init((self.input_dim, self.output_dim))
        self.Ub_i = inner_init((self.output_dim, self.output_dim))
        self.bb_i = shared_zeros((self.output_dim), name='bb_i')

        self.Wb_f = weight_init((self.input_dim, self.output_dim))
        self.Ub_f = inner_init((self.output_dim, self.output_dim))
        self.bb_f = shared_ones((self.output_dim), name='bb_f')

        self.Wb_c = weight_init((self.input_dim, self.output_dim))
        self.Ub_c = inner_init((self.output_dim, self.output_dim))
        self.bb_c = shared_zeros((self.output_dim), name='bb_c')

        self.Wb_o = weight_init((self.input_dim, self.output_dim))
        self.Ub_o = inner_init((self.output_dim, self.output_dim))
        self.bb_o = shared_zeros((self.output_dim), name='bb_o')

        self.params = [
            self.W_i, self.U_i, self.b_i,
            self.W_c, self.U_c, self.b_c,
            self.W_f, self.U_f, self.b_f,
            self.W_o, self.U_o, self.b_o,

            self.Wb_i, self.Ub_i, self.bb_i,
            self.Wb_c, self.Ub_c, self.bb_c,
            self.Wb_f, self.Ub_f, self.bb_f,
            self.Wb_o, self.Ub_o, self.bb_o,
        ]
Example #16
0
    def __init__(self,
                 dim,
                 layer_type,
                 gamma_init=UniformWeight(),
                 short_memory=0.9):
        '''
        REFERENCE:
            Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift
        PARAMS:
            short_memory: short term memory
                y_t is the latest value, the moving average x_tp1 is calculated as
                x_tp1 = memory * y_t + (1-memory) * x_t, the larger the short term
                memory, the more weight is put on contempory.
            layer_type: fc or conv
            epsilon:
                denominator min value for preventing division by zero in computing std
            dim: for fc layers, shape is the layer dimension, for conv layers,
                shape is the number of feature maps
        '''

        assert layer_type in ['fc', 'conv']
        self.layer_type = layer_type
        self.epsilon = 1e-6
        self.dim = dim
        self.mem = short_memory

        if self.layer_type == 'fc':
            input_shape = (1, dim)
            self.broadcastable = (True, False)
        elif self.layer_type == 'conv':
            input_shape = (1, dim, 1, 1)
            self.broadcastable = (True, False, True, True)

        self.gamma = gamma_init(input_shape, name='gamma')
        self.beta = shared_zeros(input_shape, name='beta')
        self.params = [self.gamma, self.beta]
        self.moving_mean = 0
        self.moving_var = 1
Example #17
0
    def __init__(
        self,
        input_channels,
        filters,
        stride,
        kernel_size=(3, 3),
        W=None,
        b=None,
        weight_init=GaussianWeight(mean=0, std=0.1),
        image_shape=None,
        border_mode="valid",
        pad_last_dim=False,
    ):
        """
        PARAM:
            border_mode: (from theano)
                valid: only apply filter to complete patches of the image. Generates
                output of shape: image_shape - filter_shape + 1
                full: zero-pads image to multiple of filter shape to generate output
                of shape: image_shape + filter_shape - 1
        """
        self.input_channels = input_channels
        self.filters = filters
        self.kernel_size = kernel_size
        self.border_mode = border_mode
        self.image_shape = image_shape
        self.pad_last_dim = pad_last_dim

        self.W_shape = (self.filters, self.input_channels) + self.kernel_size
        self.W = W
        if self.W is None:
            self.W = weight_init(self.W_shape, name="W")

        self.b = b
        if self.b is None:
            self.b = shared_zeros(shape=(self.filters,), name="b")

        self.params = [self.W, self.b]
Example #18
0
    def __init__(self, input_shape, gamma_init=UniformWeight(), short_memory=0.9):
        '''
        REFERENCE:
            Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift
                                 http://arxiv.org/pdf/1502.03167v3.pdf
        PARAMS:
            short_memory: short term memory
                y_t is the latest value, the moving average x_tp1 is calculated as
                x_tp1 = memory * y_t + (1-memory) * x_t, the larger the short term
                memory, the more weight is put on contempory.
            epsilon:
                denominator min value for preventing division by zero in computing std
        '''
        self.epsilon = 1e-6
        self.input_shape = input_shape
        self.mem = short_memory

        self.gamma = gamma_init(self.input_shape, name='gamma')
        self.beta = shared_zeros(self.input_shape, name='beta')

        self.moving_mean = 0
        self.moving_std = 1

        self.params = [self.gamma, self.beta]
Example #19
0
    def __init__(self,
                 input_dim,
                 output_dim,
                 weight_init=OrthogonalWeight(),
                 inner_init=GaussianWeight(mean=0, std=0.1),
                 truncate_gradient=-1,
                 output_mode='concat',
                 return_sequences=False,
                 return_idx=-1):

        self.input_dim = input_dim
        self.output_dim = output_dim
        self.truncate_gradient = truncate_gradient
        self.output_mode = output_mode  # output_mode is either sum or concatenate
        self.return_sequences = return_sequences
        self.return_idx = return_idx
        # forward weights
        self.W_i = weight_init((self.input_dim, self.output_dim))
        self.U_i = inner_init((self.output_dim, self.output_dim))
        self.b_i = shared_zeros((self.output_dim), name='b_i')

        self.W_f = weight_init((self.input_dim, self.output_dim))
        self.U_f = inner_init((self.output_dim, self.output_dim))
        self.b_f = shared_ones((self.output_dim), name='b_f')

        self.W_c = weight_init((self.input_dim, self.output_dim))
        self.U_c = inner_init((self.output_dim, self.output_dim))
        self.b_c = shared_zeros((self.output_dim), name='b_c')

        self.W_o = weight_init((self.input_dim, self.output_dim))
        self.U_o = inner_init((self.output_dim, self.output_dim))
        self.b_o = shared_zeros((self.output_dim), name='b_o')

        # backward weights
        self.Wb_i = weight_init((self.input_dim, self.output_dim))
        self.Ub_i = inner_init((self.output_dim, self.output_dim))
        self.bb_i = shared_zeros((self.output_dim), name='bb_i')

        self.Wb_f = weight_init((self.input_dim, self.output_dim))
        self.Ub_f = inner_init((self.output_dim, self.output_dim))
        self.bb_f = shared_ones((self.output_dim), name='bb_f')

        self.Wb_c = weight_init((self.input_dim, self.output_dim))
        self.Ub_c = inner_init((self.output_dim, self.output_dim))
        self.bb_c = shared_zeros((self.output_dim), name='bb_c')

        self.Wb_o = weight_init((self.input_dim, self.output_dim))
        self.Ub_o = inner_init((self.output_dim, self.output_dim))
        self.bb_o = shared_zeros((self.output_dim), name='bb_o')

        self.params = [
            self.W_i,
            self.U_i,
            self.b_i,
            self.W_c,
            self.U_c,
            self.b_c,
            self.W_f,
            self.U_f,
            self.b_f,
            self.W_o,
            self.U_o,
            self.b_o,
            self.Wb_i,
            self.Ub_i,
            self.bb_i,
            self.Wb_c,
            self.Ub_c,
            self.bb_c,
            self.Wb_f,
            self.Ub_f,
            self.bb_f,
            self.Wb_o,
            self.Ub_o,
            self.bb_o,
        ]
Example #20
0
    def setup(self):

        self.log.info( '..begin setting up train object')

        #===================[ build params and deltas list ]==================#

        params = []
        deltas = []

        for layer in self.model.layers:
            for param in layer.params:
                # checked that the param to be updated is shared variable
                if is_shared_var(param):
                    param.name += '_' + layer.__class__.__name__
                    params += [param]
                    deltas += [shared_zeros(shape=param.shape.eval())]

        #=====================[ training params updates ]=====================#

        self.log.info("..update params: " + str(params))
        train_y_pred, train_layers_stats = self.model.train_fprop(self.model.input_var)
        train_cost = self.train_cost(self.model.output_var, train_y_pred).astype(floatX)

        train_updates = []
        gparams = T.grad(train_cost, params)
        for delta, param, gparam in zip(deltas, params, gparams):
            train_updates += self.learning_method.update(delta, gparam)
            train_updates += [(param, param+delta)]

        #----[ append updates of stats from each layer to train updates ]-----#

        self.train_stats_names, train_stats_vars = split_list(train_layers_stats)
        train_stats_vars = [var.astype(floatX) for var in train_stats_vars]
        self.train_stats_shared = generate_shared_list(train_stats_vars)
        train_stats_updates = merge_lists(self.train_stats_shared, train_stats_vars)
        train_updates += train_stats_updates

        #-------------------------[ train functions ]-------------------------#

        self.log.info('..begin compiling functions')
        self.training = theano.function(inputs=[self.model.input_var, self.model.output_var],
                                        outputs=train_cost,
                                        updates=train_updates,
                                        on_unused_input='warn',
                                        allow_input_downcast=True)

        self.log.info('..training function compiled')

        #=============================[ testing ]=============================#

        test_y_pred, test_layers_stats = self.model.test_fprop(self.model.input_var)

        #-----[ append updates of stats from each layer to test updates ]-----#

        self.test_stats_names, test_stats_vars = split_list(test_layers_stats)
        test_stats_vars = [var.astype(floatX) for var in test_stats_vars]
        self.test_stats_shared = generate_shared_list(test_stats_vars)
        test_stats_updates = merge_lists(self.test_stats_shared, test_stats_vars)

        #-------------------------[ test functions ]--------------------------#

        test_stopping_error = self.valid_cost(self.model.output_var, test_y_pred).astype(floatX)
        test_cost = self.train_cost(self.model.output_var, test_y_pred).astype(floatX)

        self.testing = theano.function(inputs=[self.model.input_var, self.model.output_var],
                                       outputs=(test_stopping_error, test_cost),
                                       updates=test_stats_updates,
                                       on_unused_input='warn',
                                       allow_input_downcast=True)

        self.log.info('..testing function compiled')
Example #21
0
    def setup(self):

        self.log.info('..begin setting up train object')

        #===================[ build params and deltas list ]==================#

        params = []
        deltas = []

        for layer in self.model.layers:
            for param in layer.params:
                # checked that the param to be updated is shared variable
                if is_shared_var(param):
                    param.name += '_' + layer.__class__.__name__
                    params += [param]
                    deltas += [shared_zeros(shape=param.shape.eval())]

        #=====================[ training params updates ]=====================#

        self.log.info("..update params: " + str(params))
        train_y_pred, train_layers_stats = self.model.train_fprop(
            self.model.input_var)
        train_cost = self.train_cost(self.model.output_var,
                                     train_y_pred).astype(floatX)

        train_updates = []
        gparams = T.grad(train_cost, params)
        for delta, param, gparam in zip(deltas, params, gparams):
            train_updates += self.learning_method.update(delta, gparam)
            train_updates += [(param, param + delta)]

        #----[ append updates of stats from each layer to train updates ]-----#

        self.train_stats_names, train_stats_vars = split_list(
            train_layers_stats)
        train_stats_vars = [var.astype(floatX) for var in train_stats_vars]
        self.train_stats_shared = generate_shared_list(train_stats_vars)
        train_stats_updates = merge_lists(self.train_stats_shared,
                                          train_stats_vars)
        train_updates += train_stats_updates

        #-------------------------[ train functions ]-------------------------#

        self.log.info('..begin compiling functions')
        self.training = theano.function(
            inputs=[self.model.input_var, self.model.output_var],
            outputs=train_cost,
            updates=train_updates,
            on_unused_input='warn',
            allow_input_downcast=True)

        self.log.info('..training function compiled')

        #======================[ testing params updates ]=====================#

        test_y_pred, test_layers_stats = self.model.test_fprop(
            self.model.input_var)

        #-----[ append updates of stats from each layer to test updates ]-----#

        self.test_stats_names, test_stats_vars = split_list(test_layers_stats)
        test_stats_vars = [var.astype(floatX) for var in test_stats_vars]
        self.test_stats_shared = generate_shared_list(test_stats_vars)
        test_stats_updates = merge_lists(self.test_stats_shared,
                                         test_stats_vars)

        #-------------------------[ test functions ]--------------------------#

        test_stopping_error = self.valid_cost(self.model.output_var,
                                              test_y_pred).astype(floatX)
        test_cost = self.train_cost(self.model.output_var,
                                    test_y_pred).astype(floatX)

        self.testing = theano.function(
            inputs=[self.model.input_var, self.model.output_var],
            outputs=(test_stopping_error, test_cost),
            updates=test_stats_updates,
            on_unused_input='warn',
            allow_input_downcast=True)

        self.log.info('..testing function compiled')