def __init__(self, input_dim, output_dim, weight_init=Orthogonal(mean=0, std=0.1), inner_init=Gaussian(mean=0, std=0.1)): self.input_dim = input_dim self.output_dim = output_dim self.truncate_gradient = truncate_gradient self.return_sequences = return_sequences self.W_i = weight_init((self.input_dim, self.output_dim)) self.U_i = inner_init((self.output_dim, self.output_dim)) self.b_i = shared_zeros((self.output_dim)) self.W_f = weight_init((self.input_dim, self.output_dim)) self.U_f = inner_init((self.output_dim, self.output_dim)) self.b_f = Identity()((self.output_dim)) self.W_c = weight_init((self.input_dim, self.output_dim)) self.U_c = inner_init((self.output_dim, self.output_dim)) self.b_c = shared_zeros((self.output_dim)) self.W_o = weight_init((self.input_dim, self.output_dim)) self.U_o = inner_init((self.output_dim, self.output_dim)) self.b_o = shared_zeros((self.output_dim)) self.params = [ self.W_i, self.U_i, self.b_i, self.W_c, self.U_c, self.b_c, self.W_f, self.U_f, self.b_f, self.W_o, self.U_o, self.b_o, ]
def __init__(self, input_dim, output_dim, truncate_gradient=-1, return_sequences=True, weight_init=OrthogonalWeight(), inner_init=GaussianWeight(mean=0, std=0.1)): self.input_dim = input_dim self.output_dim = output_dim self.truncate_gradient = truncate_gradient self.return_sequences = return_sequences self.W_i = weight_init((self.input_dim, self.output_dim)) self.U_i = inner_init((self.output_dim, self.output_dim)) self.b_i = shared_zeros((self.output_dim), name='b_i') self.W_f = weight_init((self.input_dim, self.output_dim)) self.U_f = inner_init((self.output_dim, self.output_dim)) self.b_f = shared_ones((self.output_dim), name='b_f') self.W_c = weight_init((self.input_dim, self.output_dim)) self.U_c = inner_init((self.output_dim, self.output_dim)) self.b_c = shared_zeros((self.output_dim), name='b_c') self.W_o = weight_init((self.input_dim, self.output_dim)) self.U_o = inner_init((self.output_dim, self.output_dim)) self.b_o = shared_zeros((self.output_dim), name='b_o') self.params = [ self.W_i, self.U_i, self.b_i, self.W_c, self.U_c, self.b_c, self.W_f, self.U_f, self.b_f, self.W_o, self.U_o, self.b_o, ]
def __init__(self, input_dim, bottlenet_dim, z_dim, weight_init=GaussianWeight(mean=0, std=0.01)): self.input_dim = input_dim self.bottlenet_dim = bottlenet_dim # encoder self.W_e = weight_init((input_dim, bottlenet_dim), name='W_e') self.b_e = shared_zeros(shape=bottlenet_dim, name='b_e') self.W_miu = weight_init((bottlenet_dim, z_dim), name='W_miu') self.b_miu = shared_zeros(shape=z_dim, name='b_miu') self.W_sig = weight_init((bottlenet_dim, z_dim), name='W_sig') self.b_sig = shared_zeros(shape=z_dim, name='b_sig') # decoder self.W1_d = weight_init((z_dim, bottlenet_dim), name='W1_d') self.b1_d = shared_zeros(shape=bottlenet_dim, name='b1_d') self.W2_d = weight_init((bottlenet_dim, input_dim), name='W2_d') self.b2_d = shared_zeros(shape=input_dim, name='b2_d') self.params = [ self.W_e, self.b_e, self.W_miu, self.b_miu, self.W_sig, self.b_sig, self.W1_d, self.b1_d, self.W2_d, self.b2_d ]
def __init__(self, input_shape, gamma_init=UniformWeight(), short_memory=0.1): ''' REFERENCE: Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift http://arxiv.org/pdf/1502.03167v3.pdf PARAMS: short_memory: short term memory y_t is the latest value, the moving average x_tp1 is calculated as x_tp1 = memory * y_t + (1-memory) * x_t, the larger the short term memory, the more weight is put on contempory. epsilon: denominator min value for preventing division by zero in computing std ''' # assert len(input_shape) == 2 self.epsilon = 1e-6 self.input_shape = input_shape self.mem = short_memory self.gamma = gamma_init(self.input_shape, name='gamma') self.beta = shared_zeros(self.input_shape, name='beta') self.moving_mean = 0 self.moving_var = 1 self.params = [self.gamma, self.beta]
def __init__(self, input_channels, filters, kernel_size=(3,3), stride=(1,1), W=None, b=None, weight_init=GaussianWeight(mean=0, std=0.1), image_shape=None, border_mode='valid'): ''' PARAM: border_mode: (from theano) valid: only apply filter to complete patches of the image. Generates output of shape: image_shape - filter_shape + 1 full: zero-pads image to multiple of filter shape to generate output of shape: image_shape + filter_shape - 1 ''' self.input_var = T.tensor4() self.input_channels = input_channels self.filters = filters self.kernel_size = kernel_size self.stride = stride self.border_mode = border_mode self.image_shape = image_shape self.W_shape = (self.filters, self.input_channels) + self.kernel_size self.W = W if self.W is None: self.W = weight_init(self.W_shape, name='W_'+self.__class__.__name__) self.b = b if self.b is None: self.b = shared_zeros(shape=(self.filters,), name='b_'+self.__class__.__name__) self.params = [self.W, self.b]
def __init__(self, prev_dim=None, this_dim=None, W=None, b=None, weight_init=GaussianWeight(mean=0, std=0.1)): """ DESCRIPTION: This is a fully connected layer PARAM: prev_dim(int): dimension of previous layer this_dim(int): dimension of this layer name(string): name of the layer W(tensor variable): Weight of 2D tensor matrix b(tensor variable): bias of 2D tensor matrix params(list): a list of params in layer that can be updated """ self.prev_dim = prev_dim self.this_dim = this_dim self.W = W if self.W is None: self.W = weight_init((prev_dim, this_dim), name='W') self.b = b if self.b is None: self.b = shared_zeros(shape=this_dim, name='b') self.params = [self.W, self.b]
def __init__(self, input_channels, filters, kernel_size=(3, 3), stride=(1, 1), W=None, b=None, weight_init=GaussianWeight(mean=0, std=0.1), border_mode='valid'): ''' PARAM: border_mode: (from theano) valid: only apply filter to complete patches of the image. Generates output of shape: image_shape - filter_shape + 1 full: zero-pads image to multiple of filter shape to generate output of shape: image_shape + filter_shape - 1 ''' self.input_channels = input_channels self.filters = filters self.kernel_size = kernel_size self.stride = stride self.border_mode = border_mode self.W_shape = (self.filters, self.input_channels) + self.kernel_size self.W = W if self.W is None: self.W = weight_init(self.W_shape, name='W') self.b = b if self.b is None: self.b = shared_zeros(shape=(self.filters, ), name='b') self.params = [self.W, self.b]
def __init__(self, dim, layer_type, gamma_init=UniformWeight(), short_memory=0.01): """ REFERENCE: Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift PARAMS: short_memory: short term memory y_t is the latest value, the moving average x_tp1 is calculated as x_tp1 = memory * y_t + (1-memory) * x_t, the larger the short term memory, the more weight is put on contempory. layer_type: fc or conv epsilon: denominator min value for preventing division by zero in computing std dim: for fc layers, shape is the layer dimension, for conv layers, shape is the number of feature maps """ assert layer_type in ["fc", "conv"] self.layer_type = layer_type self.epsilon = 1e-6 self.dim = dim self.mem = short_memory if self.layer_type == "fc": input_shape = (1, dim) self.broadcastable = (True, False) elif self.layer_type == "conv": input_shape = (1, dim, 1, 1) self.broadcastable = (True, False, True, True) self.gamma = gamma_init(input_shape, name="gamma") self.beta = shared_zeros(input_shape, name="beta") self.params = [self.gamma, self.beta] self.moving_mean = 0 self.moving_var = 1
def __init__(self, input_dim, output_dim=128, weight_init=Orthogonal(mean=0, std=0.1), inner_init=Gaussian(mean=0, std=0.1), truncate_gradient=-1, output_mode='sum', return_sequences=False): super(BiDirectionLSTM,self).__init__() self.input_dim = input_dim self.output_dim = output_dim self.truncate_gradient = truncate_gradient self.output_mode = output_mode # output_mode is either sum or concatenate self.return_sequences = return_sequences # forward weights self.W_i = weight_init((self.input_dim, self.output_dim)) self.U_i = inner_init((self.output_dim, self.output_dim)) self.b_i = shared_zeros((self.output_dim)) self.W_f = weight_init((self.input_dim, self.output_dim)) self.U_f = inner_init((self.output_dim, self.output_dim)) self.b_f = shared_zeros((self.output_dim)) self.W_c = weight_init((self.input_dim, self.output_dim)) self.U_c = inner_init((self.output_dim, self.output_dim)) self.b_c = shared_zeros((self.output_dim)) self.W_o = weight_init((self.input_dim, self.output_dim)) self.U_o = inner_init((self.output_dim, self.output_dim)) self.b_o = shared_zeros((self.output_dim)) # backward weights self.Wb_i = weight_init((self.input_dim, self.output_dim)) self.Ub_i = inner_init((self.output_dim, self.output_dim)) self.bb_i = shared_zeros((self.output_dim)) self.Wb_f = weight_init((self.input_dim, self.output_dim)) self.Ub_f = inner_init((self.output_dim, self.output_dim)) self.bb_f = shared_zeros((self.output_dim)) self.Wb_c = weight_init((self.input_dim, self.output_dim)) self.Ub_c = inner_init((self.output_dim, self.output_dim)) self.bb_c = shared_zeros((self.output_dim)) self.Wb_o = weight_init((self.input_dim, self.output_dim)) self.Ub_o = inner_init((self.output_dim, self.output_dim)) self.bb_o = shared_zeros((self.output_dim)) self.params = [ self.W_i, self.U_i, self.b_i, self.W_c, self.U_c, self.b_c, self.W_f, self.U_f, self.b_f, self.W_o, self.U_o, self.b_o, self.Wb_i, self.Ub_i, self.bb_i, self.Wb_c, self.Ub_c, self.bb_c, self.Wb_f, self.Ub_f, self.bb_f, self.Wb_o, self.Ub_o, self.bb_o, ]
def __init__(self, input_dim, bottlenet_dim, z_dim, weight_init=GaussianWeight(mean=0, std=0.01)): self.input_dim = input_dim self.bottlenet_dim = bottlenet_dim # encoder self.W_e = weight_init((input_dim, bottlenet_dim), name='W_e') self.b_e = shared_zeros(shape=bottlenet_dim, name='b_e') self.W_miu = weight_init((bottlenet_dim, z_dim), name='W_miu') self.b_miu = shared_zeros(shape=z_dim, name='b_miu') self.W_sig = weight_init((bottlenet_dim, z_dim), name='W_sig') self.b_sig = shared_zeros(shape=z_dim, name='b_sig') # decoder self.W1_d = weight_init((z_dim, bottlenet_dim), name='W1_d') self.b1_d = shared_zeros(shape=bottlenet_dim, name='b1_d') self.W2_d = weight_init((bottlenet_dim, input_dim), name='W2_d') self.b2_d = shared_zeros(shape=input_dim, name='b2_d') self.params = [self.W_e, self.b_e, self.W_miu, self.b_miu, self.W_sig, self.b_sig, self.W1_d, self.b1_d, self.W2_d, self.b2_d]
def __init__(self, input_shape, epsilon=1e-6, mode=0, momentum=0.9): self.input_shape = input_shape self.epsilon = epsilon self.mode = mode self.momentum = momentum self.init = UniformWeight() self.gamma = self.init((self.input_shape), name='gamma') self.beta = shared_zeros(self.input_shape, name='beta') self.running_mean = None self.running_std = None self.params = [self.gamma, self.beta]
def __init__(self, input_dim, output_dim, weight_init=OrthogonalWeight(), inner_init=GaussianWeight(mean=0, std=0.1), truncate_gradient=-1, output_mode='concat', return_sequences=False): self.input_dim = input_dim self.output_dim = output_dim self.truncate_gradient = truncate_gradient self.output_mode = output_mode # output_mode is either sum or concatenate self.return_sequences = return_sequences # forward weights self.W_i = weight_init((self.input_dim, self.output_dim)) self.U_i = inner_init((self.output_dim, self.output_dim)) self.b_i = shared_zeros((self.output_dim), name='b_i') self.W_f = weight_init((self.input_dim, self.output_dim)) self.U_f = inner_init((self.output_dim, self.output_dim)) self.b_f = shared_ones((self.output_dim), name='b_f') self.W_c = weight_init((self.input_dim, self.output_dim)) self.U_c = inner_init((self.output_dim, self.output_dim)) self.b_c = shared_zeros((self.output_dim), name='b_c') self.W_o = weight_init((self.input_dim, self.output_dim)) self.U_o = inner_init((self.output_dim, self.output_dim)) self.b_o = shared_zeros((self.output_dim), name='b_o') # backward weights self.Wb_i = weight_init((self.input_dim, self.output_dim)) self.Ub_i = inner_init((self.output_dim, self.output_dim)) self.bb_i = shared_zeros((self.output_dim), name='bb_i') self.Wb_f = weight_init((self.input_dim, self.output_dim)) self.Ub_f = inner_init((self.output_dim, self.output_dim)) self.bb_f = shared_ones((self.output_dim), name='bb_f') self.Wb_c = weight_init((self.input_dim, self.output_dim)) self.Ub_c = inner_init((self.output_dim, self.output_dim)) self.bb_c = shared_zeros((self.output_dim), name='bb_c') self.Wb_o = weight_init((self.input_dim, self.output_dim)) self.Ub_o = inner_init((self.output_dim, self.output_dim)) self.bb_o = shared_zeros((self.output_dim), name='bb_o') self.params = [ self.W_i, self.U_i, self.b_i, self.W_c, self.U_c, self.b_c, self.W_f, self.U_f, self.b_f, self.W_o, self.U_o, self.b_o, self.Wb_i, self.Ub_i, self.bb_i, self.Wb_c, self.Ub_c, self.bb_c, self.Wb_f, self.Ub_f, self.bb_f, self.Wb_o, self.Ub_o, self.bb_o, ]
def __init__(self, dim, layer_type, gamma_init=UniformWeight(), short_memory=0.9): ''' REFERENCE: Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift PARAMS: short_memory: short term memory y_t is the latest value, the moving average x_tp1 is calculated as x_tp1 = memory * y_t + (1-memory) * x_t, the larger the short term memory, the more weight is put on contempory. layer_type: fc or conv epsilon: denominator min value for preventing division by zero in computing std dim: for fc layers, shape is the layer dimension, for conv layers, shape is the number of feature maps ''' assert layer_type in ['fc', 'conv'] self.layer_type = layer_type self.epsilon = 1e-6 self.dim = dim self.mem = short_memory if self.layer_type == 'fc': input_shape = (1, dim) self.broadcastable = (True, False) elif self.layer_type == 'conv': input_shape = (1, dim, 1, 1) self.broadcastable = (True, False, True, True) self.gamma = gamma_init(input_shape, name='gamma') self.beta = shared_zeros(input_shape, name='beta') self.params = [self.gamma, self.beta] self.moving_mean = 0 self.moving_var = 1
def __init__( self, input_channels, filters, stride, kernel_size=(3, 3), W=None, b=None, weight_init=GaussianWeight(mean=0, std=0.1), image_shape=None, border_mode="valid", pad_last_dim=False, ): """ PARAM: border_mode: (from theano) valid: only apply filter to complete patches of the image. Generates output of shape: image_shape - filter_shape + 1 full: zero-pads image to multiple of filter shape to generate output of shape: image_shape + filter_shape - 1 """ self.input_channels = input_channels self.filters = filters self.kernel_size = kernel_size self.border_mode = border_mode self.image_shape = image_shape self.pad_last_dim = pad_last_dim self.W_shape = (self.filters, self.input_channels) + self.kernel_size self.W = W if self.W is None: self.W = weight_init(self.W_shape, name="W") self.b = b if self.b is None: self.b = shared_zeros(shape=(self.filters,), name="b") self.params = [self.W, self.b]
def __init__(self, input_shape, gamma_init=UniformWeight(), short_memory=0.9): ''' REFERENCE: Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift http://arxiv.org/pdf/1502.03167v3.pdf PARAMS: short_memory: short term memory y_t is the latest value, the moving average x_tp1 is calculated as x_tp1 = memory * y_t + (1-memory) * x_t, the larger the short term memory, the more weight is put on contempory. epsilon: denominator min value for preventing division by zero in computing std ''' self.epsilon = 1e-6 self.input_shape = input_shape self.mem = short_memory self.gamma = gamma_init(self.input_shape, name='gamma') self.beta = shared_zeros(self.input_shape, name='beta') self.moving_mean = 0 self.moving_std = 1 self.params = [self.gamma, self.beta]
def __init__(self, input_dim, output_dim, weight_init=OrthogonalWeight(), inner_init=GaussianWeight(mean=0, std=0.1), truncate_gradient=-1, output_mode='concat', return_sequences=False, return_idx=-1): self.input_dim = input_dim self.output_dim = output_dim self.truncate_gradient = truncate_gradient self.output_mode = output_mode # output_mode is either sum or concatenate self.return_sequences = return_sequences self.return_idx = return_idx # forward weights self.W_i = weight_init((self.input_dim, self.output_dim)) self.U_i = inner_init((self.output_dim, self.output_dim)) self.b_i = shared_zeros((self.output_dim), name='b_i') self.W_f = weight_init((self.input_dim, self.output_dim)) self.U_f = inner_init((self.output_dim, self.output_dim)) self.b_f = shared_ones((self.output_dim), name='b_f') self.W_c = weight_init((self.input_dim, self.output_dim)) self.U_c = inner_init((self.output_dim, self.output_dim)) self.b_c = shared_zeros((self.output_dim), name='b_c') self.W_o = weight_init((self.input_dim, self.output_dim)) self.U_o = inner_init((self.output_dim, self.output_dim)) self.b_o = shared_zeros((self.output_dim), name='b_o') # backward weights self.Wb_i = weight_init((self.input_dim, self.output_dim)) self.Ub_i = inner_init((self.output_dim, self.output_dim)) self.bb_i = shared_zeros((self.output_dim), name='bb_i') self.Wb_f = weight_init((self.input_dim, self.output_dim)) self.Ub_f = inner_init((self.output_dim, self.output_dim)) self.bb_f = shared_ones((self.output_dim), name='bb_f') self.Wb_c = weight_init((self.input_dim, self.output_dim)) self.Ub_c = inner_init((self.output_dim, self.output_dim)) self.bb_c = shared_zeros((self.output_dim), name='bb_c') self.Wb_o = weight_init((self.input_dim, self.output_dim)) self.Ub_o = inner_init((self.output_dim, self.output_dim)) self.bb_o = shared_zeros((self.output_dim), name='bb_o') self.params = [ self.W_i, self.U_i, self.b_i, self.W_c, self.U_c, self.b_c, self.W_f, self.U_f, self.b_f, self.W_o, self.U_o, self.b_o, self.Wb_i, self.Ub_i, self.bb_i, self.Wb_c, self.Ub_c, self.bb_c, self.Wb_f, self.Ub_f, self.bb_f, self.Wb_o, self.Ub_o, self.bb_o, ]
def setup(self): self.log.info( '..begin setting up train object') #===================[ build params and deltas list ]==================# params = [] deltas = [] for layer in self.model.layers: for param in layer.params: # checked that the param to be updated is shared variable if is_shared_var(param): param.name += '_' + layer.__class__.__name__ params += [param] deltas += [shared_zeros(shape=param.shape.eval())] #=====================[ training params updates ]=====================# self.log.info("..update params: " + str(params)) train_y_pred, train_layers_stats = self.model.train_fprop(self.model.input_var) train_cost = self.train_cost(self.model.output_var, train_y_pred).astype(floatX) train_updates = [] gparams = T.grad(train_cost, params) for delta, param, gparam in zip(deltas, params, gparams): train_updates += self.learning_method.update(delta, gparam) train_updates += [(param, param+delta)] #----[ append updates of stats from each layer to train updates ]-----# self.train_stats_names, train_stats_vars = split_list(train_layers_stats) train_stats_vars = [var.astype(floatX) for var in train_stats_vars] self.train_stats_shared = generate_shared_list(train_stats_vars) train_stats_updates = merge_lists(self.train_stats_shared, train_stats_vars) train_updates += train_stats_updates #-------------------------[ train functions ]-------------------------# self.log.info('..begin compiling functions') self.training = theano.function(inputs=[self.model.input_var, self.model.output_var], outputs=train_cost, updates=train_updates, on_unused_input='warn', allow_input_downcast=True) self.log.info('..training function compiled') #=============================[ testing ]=============================# test_y_pred, test_layers_stats = self.model.test_fprop(self.model.input_var) #-----[ append updates of stats from each layer to test updates ]-----# self.test_stats_names, test_stats_vars = split_list(test_layers_stats) test_stats_vars = [var.astype(floatX) for var in test_stats_vars] self.test_stats_shared = generate_shared_list(test_stats_vars) test_stats_updates = merge_lists(self.test_stats_shared, test_stats_vars) #-------------------------[ test functions ]--------------------------# test_stopping_error = self.valid_cost(self.model.output_var, test_y_pred).astype(floatX) test_cost = self.train_cost(self.model.output_var, test_y_pred).astype(floatX) self.testing = theano.function(inputs=[self.model.input_var, self.model.output_var], outputs=(test_stopping_error, test_cost), updates=test_stats_updates, on_unused_input='warn', allow_input_downcast=True) self.log.info('..testing function compiled')
def setup(self): self.log.info('..begin setting up train object') #===================[ build params and deltas list ]==================# params = [] deltas = [] for layer in self.model.layers: for param in layer.params: # checked that the param to be updated is shared variable if is_shared_var(param): param.name += '_' + layer.__class__.__name__ params += [param] deltas += [shared_zeros(shape=param.shape.eval())] #=====================[ training params updates ]=====================# self.log.info("..update params: " + str(params)) train_y_pred, train_layers_stats = self.model.train_fprop( self.model.input_var) train_cost = self.train_cost(self.model.output_var, train_y_pred).astype(floatX) train_updates = [] gparams = T.grad(train_cost, params) for delta, param, gparam in zip(deltas, params, gparams): train_updates += self.learning_method.update(delta, gparam) train_updates += [(param, param + delta)] #----[ append updates of stats from each layer to train updates ]-----# self.train_stats_names, train_stats_vars = split_list( train_layers_stats) train_stats_vars = [var.astype(floatX) for var in train_stats_vars] self.train_stats_shared = generate_shared_list(train_stats_vars) train_stats_updates = merge_lists(self.train_stats_shared, train_stats_vars) train_updates += train_stats_updates #-------------------------[ train functions ]-------------------------# self.log.info('..begin compiling functions') self.training = theano.function( inputs=[self.model.input_var, self.model.output_var], outputs=train_cost, updates=train_updates, on_unused_input='warn', allow_input_downcast=True) self.log.info('..training function compiled') #======================[ testing params updates ]=====================# test_y_pred, test_layers_stats = self.model.test_fprop( self.model.input_var) #-----[ append updates of stats from each layer to test updates ]-----# self.test_stats_names, test_stats_vars = split_list(test_layers_stats) test_stats_vars = [var.astype(floatX) for var in test_stats_vars] self.test_stats_shared = generate_shared_list(test_stats_vars) test_stats_updates = merge_lists(self.test_stats_shared, test_stats_vars) #-------------------------[ test functions ]--------------------------# test_stopping_error = self.valid_cost(self.model.output_var, test_y_pred).astype(floatX) test_cost = self.train_cost(self.model.output_var, test_y_pred).astype(floatX) self.testing = theano.function( inputs=[self.model.input_var, self.model.output_var], outputs=(test_stopping_error, test_cost), updates=test_stats_updates, on_unused_input='warn', allow_input_downcast=True) self.log.info('..testing function compiled')