Beispiel #1
0
    def __init__(self,
                 input_shape,
                 eps=1e-4,
                 momentum=0.1,
                 estimate=True,
                 axes="per-activation"):
        """

        Args:
            input_shape:
            eps:
            momentum:
            estimate:
            axes: ('per-activation', 'spatial' or a tuple of ints)
                The axes along which the input should be normalized. 'per-activation' normalizes per activation
                and is equal to axes=(0,). 'spatial' shares normalization factors across spatial dimensions
                (i.e., all dimensions past the second), which for 4D inputs would be equal to axes=(0, 2, 3).
        """
        self.axes = axes
        self.eps = eps
        self.momentum = momentum
        self.estimate = estimate
        self.running_mean = Constant(0.0)(input_shape)
        self.running_var = Constant(1.0)(input_shape)
        if self.estimate:
            self.gamma = Uniform(0.0, 1.0)(input_shape)
            self.beta = Constant(0.0)(input_shape)
Beispiel #2
0
 def __init__(self,
              n_input_ch,
              n_output_ch,
              kernel,
              stride=(1, 1),
              pad=(0, 0),
              dilation=(1, 1),
              input_shape=None,
              weight_init=XavierNormal(),
              bias_init=Constant(0.0),
              use_bias=True):
     assert len(kernel) == 2
     assert len(stride) == 2
     self.n_input_ch = n_input_ch
     self.n_output_ch = n_output_ch
     self.kernel = kernel
     self.stride = stride
     self.pad = pad
     self.dilation = dilation
     self.input_shape = input_shape
     self.filter = theano.shared(
         weight_init((n_input_ch, n_output_ch) + kernel))
     self.use_bias = use_bias
     if use_bias:
         self.bias = theano.shared(bias_init(n_output_ch))
Beispiel #3
0
    def updates(self, params, cost: tt.Variable) -> OrderedDict:
        updates = OrderedDict()
        params = list(params)
        sum_grad = 0.0
        for p in params:
            d = None
            if hasattr(self, "state_keys"):
                d = {
                    k: theano.shared(Constant(0.0)(p.get_value().shape))
                    for k in self.state_keys
                }

            g = tt.grad(cost=cost, wrt=p)
            sum_grad += g.norm(self.norm)
            us = self.base_optimizer.update_one(p, g, d)
            updates.update(us)

        scale = ifelse(sum_grad > self.threshold, self.threshold / sum_grad,
                       1.0)
        for p in params:
            updates[p] *= scale
        return updates

    # class YellowFin(Optimizer):


#     def __init__(self, lr=0.1, mu=0.0, clip_thresh=None, weight_decay=0.0, beta=0.999,
#                  curv_win_width=20, zero_debias=True, delta_mu=0.0, auto_clip_fac=None):
#         self.base_optimizer = SGD(lr=lr, momentum=)
#         self.state_keys =
Beispiel #4
0
 def __init__(self,
              n_input,
              n_output,
              weight_init=XavierNormal(1.0),
              bias_init=Constant(0.0)):
     self.n_input = n_input
     self.n_output = n_output
     self.weight = theano.shared(weight_init((n_input, n_output)),
                                 name="weight")
     self.bias = theano.shared(bias_init(n_output), name="bias")
Beispiel #5
0
    def __init__(self,
                 input_dim,
                 output_dim,
                 weight_init=XavierNormal(),
                 bias_init=Constant(0.0),
                 name="",
                 impl=RNNImpl.auto,
                 n_batch=1):
        self.n_batch = n_batch
        self.input_dim = input_dim
        self.hidden_dim = output_dim
        self.output_dim = output_dim
        self.params = []  # TODO: cuDNN conversion
        self.non_cudnn_params = []

        def register(init, shape, name):
            v = theano.shared(init(shape), name=name)
            self.params.append(v)
            self.non_cudnn_params.append(v)
            return v

        self.W_i = register(weight_init, (input_dim, output_dim),
                            name=name + ".W_i")
        self.b_wi = register(bias_init, (output_dim, ), name=name + ".b_wi")

        self.W_f = register(weight_init, (input_dim, output_dim),
                            name=name + ".W_f")
        self.b_wf = register(bias_init, (output_dim, ), name=name + ".b_wf")

        self.W_c = register(weight_init, (input_dim, output_dim),
                            name=name + ".W_c")
        self.b_wc = register(bias_init, (output_dim, ), name=name + ".b_wc")

        self.W_o = register(weight_init, (input_dim, output_dim),
                            name=name + ".W_o")
        self.b_wo = register(bias_init, (output_dim, ), name=name + ".b_wo")

        self.R_i = register(weight_init, (output_dim, output_dim),
                            name=name + ".R_i")
        self.b_ri = register(bias_init, (output_dim, ), name=name + ".b_ri")

        self.R_f = register(weight_init, (output_dim, output_dim),
                            name=name + ".R_f")
        self.b_rf = register(bias_init, (output_dim, ), name=name + ".b_rf")

        self.R_c = register(weight_init, (output_dim, output_dim),
                            name=name + ".R_c")
        self.b_rc = register(bias_init, (output_dim, ), name=name + ".b_rc")

        self.R_o = register(weight_init, (output_dim, output_dim),
                            name=name + ".R_o")
        self.b_ro = register(bias_init, (output_dim, ), name=name + ".b_ro")

        self.impl = impl  # NOTE this should be set after all the initialization of params
Beispiel #6
0
    def updates(self, params, cost: tt.Variable) -> OrderedDict:
        updates = OrderedDict()
        for p in params:
            d = None
            if hasattr(self, "state_keys"):
                d = {
                    k: theano.shared(Constant(0.0)(p.get_value().shape))
                    for k in self.state_keys
                }

            g = tt.grad(cost=cost, wrt=p)
            us = self.update_one(p, g, d)
            updates.update(us)
        return updates
Beispiel #7
0
    def __init__(self,
                 input_dim,
                 output_dim,
                 weight_init=XavierNormal(),
                 bias_init=Constant(0.0),
                 name="",
                 impl=RNNImpl.auto,
                 n_batch=1):
        self.n_batch = n_batch
        self.name = name
        self.input_dim = input_dim
        self.hidden_dim = output_dim
        self.output_dim = output_dim
        self.params = []  # TODO: cuDNN conversion
        self.non_cudnn_params = []

        def register(init, shape, name):
            v = theano.shared(init(shape), name=name)
            self.params.append(v)
            self.non_cudnn_params.append(v)
            return v

        # NOTE: do not change this initialization order because of cuDNN transfer
        self.W_r = register(weight_init, (input_dim, output_dim),
                            name=name + ".W_r")
        self.b_wr = register(bias_init, (output_dim, ), name=name + ".b_wr")

        self.W_i = register(weight_init, (input_dim, output_dim),
                            name=name + ".W_i")
        self.b_wi = register(bias_init, (output_dim, ), name=name + ".b_wi")

        self.W_h = register(weight_init, (input_dim, output_dim),
                            name=name + ".W_h")
        self.b_wh = register(bias_init, (output_dim, ), name=name + ".b_wh")

        self.R_r = register(weight_init, (output_dim, output_dim),
                            name=name + ".R_r")
        self.b_rr = register(bias_init, (output_dim, ), name=name + ".b_rr")

        self.R_i = register(weight_init, (output_dim, output_dim),
                            name=name + ".R_i")
        self.b_ru = register(bias_init, (output_dim, ), name=name + ".b_ru")

        self.R_h = register(weight_init, (output_dim, output_dim),
                            name=name + ".R_h")
        self.b_rh = register(bias_init, (output_dim, ), name=name + ".b_rh")

        self.impl = impl  # NOTE this should be set after all the initialization of params
Beispiel #8
0
 def __init__(self,
              n_input,
              n_output,
              activation=tt.tanh,
              weight_init=XavierNormal(1.0),
              bias_init=Constant(0.0),
              impl=RNNImpl.auto):
     self.impl = impl
     self.n_input = n_input
     self.n_output = n_output
     self.activation = activation
     self.weight_hx = theano.shared(weight_init((n_input, n_output)),
                                    name="weight_hx")
     self.weight_hh = theano.shared(weight_init((n_output, n_output)),
                                    name="weight_hh")
     self.bias = theano.shared(bias_init(n_output), name="bias")
     self.state = None
Beispiel #9
0
 def _params_to_cudnn(self):
     from theano.gpuarray import dnn
     from theano.gpuarray.type import gpuarray_shared_constructor
     assert dnn.dnn_available(None)
     self._rnn_block = dnn.RNNBlock(theano.config.floatX,
                                    self.hidden_dim,
                                    num_layers=1,
                                    input_mode="linear",
                                    rnn_mode=self.rnn_type,
                                    direction_mode="unidirectional")
     param_size = self._rnn_block.get_param_size(
         [self.n_batch, self.input_dim])  # TODO: study about n_batch
     self.params = [gpuarray_shared_constructor(Constant(0.0)(param_size))]
     cs = self._rnn_block.split_params(self.params[0],
                                       layer=0,
                                       input_size=[
                                           self.n_batch, self.input_dim
                                       ])  # TODO: multi layer support
     for c, p in zip(cs, self.non_cudnn_params):
         c[:] = p.get_value(borrow=True, return_internal_type=True)
Beispiel #10
0
 def __init__(self,
              n_input_ch,
              n_output_ch,
              kernel,
              weight_init=XavierNormal(),
              bias_init=Constant(0.0)):
     assert kernel % 2 == 1
     self.kernel = kernel
     self.pad = (kernel - 1) // 2
     self.n_output_ch = n_output_ch
     self.conv_wx = Conv1D(n_input_ch,
                           n_output_ch * 3,
                           kernel=self.kernel,
                           pad=self.pad,
                           weight_init=weight_init,
                           bias_init=bias_init)
     self.conv_wh = Conv1D(n_output_ch,
                           n_output_ch * 3,
                           kernel=self.kernel,
                           pad=self.pad,
                           weight_init=weight_init,
                           bias_init=bias_init)
Beispiel #11
0
 def __init__(self,
              n_input_ch,
              n_output_ch,
              kernel,
              stride=1,
              pad=0,
              dilation=1,
              input_shape=None,
              weight_init=XavierNormal(),
              bias_init=Constant(0.0),
              use_bias=True):
     self.n_input_ch = n_input_ch
     self.n_output_ch = n_output_ch
     self.kernel = kernel
     self.stride = stride
     self.pad = pad
     self.dilation = dilation
     self.input_shape = input_shape
     w = weight_init(
         (n_input_ch, n_output_ch, kernel, 1)).transpose(1, 0, 2, 3)
     self.filter = theano.shared(w)
     self.use_bias = use_bias
     if use_bias:
         self.bias = theano.shared(bias_init(n_output_ch))