def __init__(self, w, b, w_rev, b_rev, backward_activation='tanh', forward_activation='tanh', rng=None, noise=1, optimizer_constructor=lambda: SimpleGradientDescent(0.01), cost_function=mean_squared_error, use_bias=True): self.noise = noise self.rng = get_theano_rng(rng) self.w = theano.shared(w, name='w') self.b = theano.shared(b, name='b') self.w_rev = theano.shared(w_rev, name='w_rev') self.b_rev = theano.shared(b_rev, name='b_rev') self.backward_activation = get_named_activation_function( backward_activation) if backward_activation is not None else None self.forward_activation = get_named_activation_function( forward_activation) self.forward_optimizer = optimizer_constructor() self.backward_optimizer = optimizer_constructor() self.cost_function = cost_function self.use_bias = use_bias
def __init__(self, w, b, w_rev, b_rev, backward_activation = 'tanh', forward_activation = 'tanh', rng = None, noise = 1, optimizer_constructor = lambda: SimpleGradientDescent(0.01), cost_function = mean_squared_error): self.noise = noise self.rng = get_theano_rng(rng) self.w = theano.shared(w, name = 'w') self.b = theano.shared(b, name = 'b') self.w_rev = theano.shared(w_rev, name = 'w_rev') self.b_rev = theano.shared(b_rev, name = 'b_rev') self.backward_activation = get_named_activation_function(backward_activation) if backward_activation is not None else None self.forward_activation = get_named_activation_function(forward_activation) self.forward_optimizer = optimizer_constructor() self.backward_optimizer = optimizer_constructor() self.cost_function = cost_function
def __init__(self, input_size, hidden_sizes, output_size, distribution = 'gaussian', hidden_activation = 'sig', w_init = lambda n_in, n_out: 0.01*np.random.randn(n_in, n_out)): """ :param input_size: The dimensionality of the input :param hidden_sizes: A list indicating the sizes of each hidden layer. :param output_size: The dimensionality of the output :param distribution: The form of the output distribution (currently 'gaussian' or 'bernoulli') :param hidden_activation: A string indicating the type of each hidden layer. {'sig', 'tanh', 'rect-lin', 'lin', 'softmax'} :param w_init: A function which, given input dims, output dims, returns an initial weight matrix """ all_layer_sizes = [input_size]+hidden_sizes all_layer_activations = [hidden_activation] * len(hidden_sizes) processing_chain = sum([[ FullyConnectedTransform(w = w_init(pre_size, post_size)), get_named_activation_function(activation_fcn) ] for (pre_size, post_size), activation_fcn in zip(zip(all_layer_sizes[:-1], all_layer_sizes[1:]), all_layer_activations) ], []) distribution_function = \ Branch( FullyConnectedTransform(w = w_init(all_layer_sizes[-1], output_size)), FullyConnectedTransform(w_init(all_layer_sizes[-1], output_size))) \ if distribution == 'gaussian' else \ Chain(FullyConnectedTransform(w = w_init(all_layer_sizes[-1], output_size)), get_named_activation_function('sig')) \ if distribution=='bernoulli' else \ bad_value(distribution) self.distribution = distribution self.chain = Chain(*processing_chain+[distribution_function])
def compute_activations(self, input_data, do_round=True): layer_input = input_data layer_signals = [] for i, (w, b, k) in enumerate(zip(self.ws, self.bs, self.get_scales())): scaled_input = layer_input * k if not do_round: eta = None spikes = scaled_input else: eta = tt.round(scaled_input) - scaled_input spikes = scaled_input + disconnected_grad(eta) nonlinearity = get_named_activation_function( self.hidden_activations if i < len(self.ws) - 1 else self.output_activation) output = nonlinearity((spikes / k).dot(w) + b) layer_signals.append({ 'input': layer_input, 'scaled_input': scaled_input, 'eta': eta, 'spikes': spikes, 'output': output }) layer_input = output return layer_signals
def __init__(self, activation): """ activation: a name for the activation function. {'relu', 'sig', 'tanh', ...} """ self._activation_name = activation self.activation = get_named_activation_function( activation) if isinstance(activation, basestring) else activation
def __init__(self, linear_transform, nonlinearity): """ linear_transform: Can be: A callable (e.g. FullyConnectedBridge/ConvolutionalBridge) which does a linear transform on the data. A numpy array - in which case it will be used to instantiate a linear transform. """ if isinstance(linear_transform, np.ndarray): assert (linear_transform.ndim == 2 and nonlinearity!='maxout') or (linear_transform.ndim == 3 and nonlinearity=='maxout'), \ 'Your weight matrix must be 2-D (or 3-D if you have maxout units)' linear_transform = FullyConnectedTransform(w=linear_transform) if isinstance(nonlinearity, str): nonlinearity = get_named_activation_function(nonlinearity) self.linear_transform = linear_transform self.nonlinearity = nonlinearity
def __init__(self, w_xi, w_xf, w_xc, w_xo, w_hi, w_hf, w_hc, w_ho, w_co, b_i, b_f, b_c, b_o, hidden_layer_type='tanh'): """ :param w_xi: :param w_xf: :param w_xc: :param w_xo: :param w_hi: :param w_hf: :param w_hc: :param w_ho: :param w_co: :param b_i: :param b_f: :param b_c: :param b_o: :return: """ self.n_inputs, self.n_hidden = w_xi.get_value().shape self.w_xi = w_xi self.w_xf = w_xf self.w_xc = w_xc self.w_xo = w_xo self.w_hi = w_hi self.w_hf = w_hf self.w_hc = w_hc self.w_ho = w_ho self.w_co = w_co self.b_i = b_i self.b_f = b_f self.b_c = b_c self.b_o = b_o self._hidden_activation = get_named_activation_function( hidden_layer_type)
def __init__(self, n_input, n_hidden, initializer_fcn, input_layer_type='softmax', hidden_layer_type='tanh'): self.lstm = LSTMLayer.from_initializer( n_input=n_input, n_hidden=n_hidden, initializer_fcn=initializer_fcn, hidden_layer_type=hidden_layer_type) self.w_hz = create_shared_variable(initializer_fcn, (n_hidden, n_input)) self.b_z = create_shared_variable(0, n_input) self.output_activation = mysoftmax if input_layer_type == 'softmax' else get_named_activation_function( input_layer_type)
def __init__(self, activation): """ activation: a name for the activation function. {'relu', 'sig', 'tanh', ...} """ self.activation = get_named_activation_function(activation)