Exemple #1
0
 def __init__(self,
              controller,
              num_shifts=3,
              memory_shape=(128, 20),
              W_hid_to_sign=None,
              b_hid_to_sign=lasagne.init.Constant(0.),
              nonlinearity_sign=nonlinearities.ClippedLinear(low=-1.,
                                                             high=1.),
              W_hid_to_key=lasagne.init.GlorotUniform(),
              b_hid_to_key=lasagne.init.Constant(0.),
              nonlinearity_key=nonlinearities.ClippedLinear(low=0.,
                                                            high=1.),
              W_hid_to_beta=lasagne.init.GlorotUniform(),
              b_hid_to_beta=lasagne.init.Constant(0.),
              nonlinearity_beta=lasagne.nonlinearities.rectify,
              W_hid_to_gate=lasagne.init.GlorotUniform(),
              b_hid_to_gate=lasagne.init.Constant(0.),
              nonlinearity_gate=T.nnet.hard_sigmoid,
              W_hid_to_shift=lasagne.init.GlorotUniform(),
              b_hid_to_shift=lasagne.init.Constant(0.),
              nonlinearity_shift=lasagne.nonlinearities.softmax,
              W_hid_to_gamma=lasagne.init.GlorotUniform(),
              b_hid_to_gamma=lasagne.init.Constant(0.),
              nonlinearity_gamma=lambda x: 1. + lasagne.nonlinearities.
              rectify(x),
              weights_init=init.OneHot(),
              learn_init=False,
              **kwargs):
     super(ReadHead, self).__init__(controller,
                                    num_shifts=num_shifts,
                                    memory_shape=memory_shape,
                                    W_hid_to_sign=W_hid_to_sign,
                                    b_hid_to_sign=b_hid_to_sign,
                                    nonlinearity_sign=nonlinearity_sign,
                                    W_hid_to_key=W_hid_to_key,
                                    b_hid_to_key=b_hid_to_key,
                                    nonlinearity_key=nonlinearity_key,
                                    W_hid_to_beta=W_hid_to_beta,
                                    b_hid_to_beta=b_hid_to_beta,
                                    nonlinearity_beta=nonlinearity_beta,
                                    W_hid_to_gate=W_hid_to_gate,
                                    b_hid_to_gate=b_hid_to_gate,
                                    nonlinearity_gate=nonlinearity_gate,
                                    W_hid_to_shift=W_hid_to_shift,
                                    b_hid_to_shift=b_hid_to_shift,
                                    nonlinearity_shift=nonlinearity_shift,
                                    W_hid_to_gamma=W_hid_to_gamma,
                                    b_hid_to_gamma=b_hid_to_gamma,
                                    nonlinearity_gamma=nonlinearity_gamma,
                                    weights_init=weights_init,
                                    learn_init=learn_init,
                                    **kwargs)
Exemple #2
0
    def __init__(self,
                 controller,
                 num_shifts=3,
                 memory_shape=(128, 20),
                 W_hid_to_key=lasagne.init.GlorotUniform(),
                 b_hid_to_key=lasagne.init.Constant(0.),
                 nonlinearity_key=nonlinearities.ClippedLinear(low=0.,
                                                               high=1.),
                 W_hid_to_beta=lasagne.init.GlorotUniform(),
                 b_hid_to_beta=lasagne.init.Constant(0.),
                 nonlinearity_beta=lasagne.nonlinearities.rectify,
                 W_hid_to_gate=lasagne.init.GlorotUniform(),
                 b_hid_to_gate=lasagne.init.Constant(0.),
                 nonlinearity_gate=nonlinearities.hard_sigmoid,
                 W_hid_to_shift=lasagne.init.GlorotUniform(),
                 b_hid_to_shift=lasagne.init.Constant(0.),
                 nonlinearity_shift=lasagne.nonlinearities.softmax,
                 W_hid_to_gamma=lasagne.init.GlorotUniform(),
                 b_hid_to_gamma=lasagne.init.Constant(0.),
                 nonlinearity_gamma=lambda x: 1. + lasagne.nonlinearities.
                 rectify(x),
                 weights_init=init.OneHot(),
                 learn_init=False,
                 **kwargs):
        super(Head, self).__init__(controller, **kwargs)

        self.memory_shape = memory_shape
        self.name = kwargs.get('name', 'head')
        self.learn_init = learn_init

        # Key
        self.W_hid_to_key = self.add_param(W_hid_to_key, (1, self.input_shape[1], \
            self.memory_shape[1]), name=self.name + '.key.W')
        self.b_hid_to_key = self.add_param(b_hid_to_key, (1, self.memory_shape[1]), \
            name=self.name + '.key.b', regularizable=False)
        self.nonlinearity_key = nonlinearity_key
        # Beta
        self.W_hid_to_beta = self.add_param(W_hid_to_beta, (1, self.input_shape[1], \
            1), name=self.name + '.beta.W')
        self.b_hid_to_beta = self.add_param(b_hid_to_beta, (1, 1), \
            name=self.name + '.beta.b', regularizable=False)
        self.nonlinearity_beta = nonlinearity_beta
        # Gate
        self.W_hid_to_gate = self.add_param(W_hid_to_gate, (1, self.input_shape[1], \
            1), name=self.name + '.gate.W')
        self.b_hid_to_gate = self.add_param(b_hid_to_gate, (1, 1), \
            name=self.name + '.gate.b', regularizable=False)
        self.nonlinearity_gate = nonlinearity_gate
        # Shift
        self.num_shifts = num_shifts
        self.W_hid_to_shift = self.add_param(W_hid_to_shift, (1, self.input_shape[1], \
            self.num_shifts), name=self.name + '.shift.W')
        self.b_hid_to_shift = self.add_param(b_hid_to_shift, (1, self.num_shifts), \
            name=self.name + '.shift.b', regularizable=False)
        self.nonlinearity_shift = nonlinearity_shift
        # Gamma
        self.W_hid_to_gamma = self.add_param(W_hid_to_gamma, (1, self.input_shape[1], \
            1), name=self.name + '.gamma.W')
        self.b_hid_to_gamma = self.add_param(b_hid_to_gamma, (1, 1), \
            name=self.name + '.gamma.b', regularizable=False)
        self.nonlinearity_gamma = nonlinearity_gamma

        self.weights_init = self.add_param(weights_init,
                                           (1, self.memory_shape[0]),
                                           name='weights_init',
                                           trainable=learn_init,
                                           regularizable=False)
Exemple #3
0
 def __init__(self,
              controller,
              num_shifts=3,
              memory_shape=(128, 20),
              W_hid_to_key=lasagne.init.GlorotUniform(),
              b_hid_to_key=lasagne.init.Constant(0.),
              nonlinearity_key=nonlinearities.ClippedLinear(low=0.,
                                                            high=1.),
              W_hid_to_beta=lasagne.init.GlorotUniform(),
              b_hid_to_beta=lasagne.init.Constant(0.),
              nonlinearity_beta=lasagne.nonlinearities.rectify,
              W_hid_to_gate=lasagne.init.GlorotUniform(),
              b_hid_to_gate=lasagne.init.Constant(0.),
              nonlinearity_gate=nonlinearities.hard_sigmoid,
              W_hid_to_shift=lasagne.init.GlorotUniform(),
              b_hid_to_shift=lasagne.init.Constant(0.),
              nonlinearity_shift=lasagne.nonlinearities.softmax,
              W_hid_to_gamma=lasagne.init.GlorotUniform(),
              b_hid_to_gamma=lasagne.init.Constant(0.),
              nonlinearity_gamma=lambda x: 1. + lasagne.nonlinearities.
              rectify(x),
              W_hid_to_erase=lasagne.init.GlorotUniform(),
              b_hid_to_erase=lasagne.init.Constant(0.),
              nonlinearity_erase=nonlinearities.hard_sigmoid,
              W_hid_to_add=lasagne.init.GlorotUniform(),
              b_hid_to_add=lasagne.init.Constant(0.),
              nonlinearity_add=nonlinearities.ClippedLinear(low=0.,
                                                            high=1.),
              weights_init=init.OneHot(),
              learn_init=False,
              **kwargs):
     super(WriteHead, self).__init__(controller,
                                     num_shifts=num_shifts,
                                     memory_shape=memory_shape,
                                     W_hid_to_key=W_hid_to_key,
                                     b_hid_to_key=b_hid_to_key,
                                     nonlinearity_key=nonlinearity_key,
                                     W_hid_to_beta=W_hid_to_beta,
                                     b_hid_to_beta=b_hid_to_beta,
                                     nonlinearity_beta=nonlinearity_beta,
                                     W_hid_to_gate=W_hid_to_gate,
                                     b_hid_to_gate=b_hid_to_gate,
                                     nonlinearity_gate=nonlinearity_gate,
                                     W_hid_to_shift=W_hid_to_shift,
                                     b_hid_to_shift=b_hid_to_shift,
                                     nonlinearity_shift=nonlinearity_shift,
                                     W_hid_to_gamma=W_hid_to_gamma,
                                     b_hid_to_gamma=b_hid_to_gamma,
                                     nonlinearity_gamma=nonlinearity_gamma,
                                     weights_init=weights_init,
                                     learn_init=learn_init,
                                     **kwargs)
     # Erase
     self.W_hid_to_erase = self.add_param(W_hid_to_erase, (1, self.input_shape[1], \
         self.memory_shape[1]), name=self.name + '.erase.W')
     self.b_hid_to_erase = self.add_param(b_hid_to_erase, (1, self.memory_shape[1]), \
         name=self.name + '.erase.b', regularizable=False)
     self.nonlinearity_erase = nonlinearity_erase
     # Add
     self.W_hid_to_add = self.add_param(W_hid_to_add, (1, self.input_shape[1], \
         self.memory_shape[1]), name=self.name + '.add.W')
     self.b_hid_to_add = self.add_param(b_hid_to_add, (1, self.memory_shape[1]), \
         name=self.name + '.add.b', regularizable=False)
     self.nonlinearity_add = nonlinearity_add
Exemple #4
0
    def __init__(self,
                 controller,
                 num_shifts=3,
                 memory_shape=(128, 20),
                 W_hid_to_sign=None,
                 b_hid_to_sign=lasagne.init.Constant(0.),
                 nonlinearity_sign=nonlinearities.ClippedLinear(low=-1.,
                                                                high=1.),
                 W_hid_to_key=lasagne.init.GlorotUniform(),
                 b_hid_to_key=lasagne.init.Constant(0.),
                 nonlinearity_key=nonlinearities.ClippedLinear(low=0.,
                                                               high=1.),
                 W_hid_to_beta=lasagne.init.GlorotUniform(),
                 b_hid_to_beta=lasagne.init.Constant(0.),
                 nonlinearity_beta=lasagne.nonlinearities.rectify,
                 W_hid_to_gate=lasagne.init.GlorotUniform(),
                 b_hid_to_gate=lasagne.init.Constant(0.),
                 nonlinearity_gate=nonlinearities.hard_sigmoid,
                 W_hid_to_shift=lasagne.init.GlorotUniform(),
                 b_hid_to_shift=lasagne.init.Constant(0.),
                 nonlinearity_shift=lasagne.nonlinearities.softmax,
                 W_hid_to_gamma=lasagne.init.GlorotUniform(),
                 b_hid_to_gamma=lasagne.init.Constant(0.),
                 nonlinearity_gamma=lambda x: 1. + lasagne.nonlinearities.
                 rectify(x),
                 weights_init=init.OneHot(),
                 learn_init=False,
                 **kwargs):
        super(Head, self).__init__(controller, **kwargs)

        self.memory_shape = memory_shape
        self.basename = kwargs.get('name', 'head')
        self.learn_init = learn_init

        if W_hid_to_sign is not None:
            self.sign = DenseLayer(controller,
                                   num_units=self.memory_shape[1],
                                   W=W_hid_to_sign,
                                   b=b_hid_to_sign,
                                   nonlinearity=nonlinearity_sign,
                                   name=self.basename + '.sign')
            self.W_hid_to_sign, self.b_hid_to_sign = self.sign.W, self.sign.b
        else:
            self.sign = None
            self.W_hid_to_sign, self.b_hid_to_sign = None, None

        self.key = DenseLayer(controller,
                              num_units=self.memory_shape[1],
                              W=W_hid_to_key,
                              b=b_hid_to_key,
                              nonlinearity=nonlinearity_key,
                              name=self.basename + '.key')
        self.W_hid_to_key, self.b_hid_to_key = self.key.W, self.key.b

        self.beta = DenseLayer(controller,
                               num_units=1,
                               W=W_hid_to_beta,
                               b=b_hid_to_beta,
                               nonlinearity=nonlinearity_beta,
                               name=self.basename + '.beta')
        self.W_hid_to_beta, self.b_hid_to_beta = self.beta.W, self.beta.b

        self.gate = DenseLayer(controller,
                               num_units=1,
                               W=W_hid_to_gate,
                               b=b_hid_to_gate,
                               nonlinearity=nonlinearity_gate,
                               name=self.basename + '.gate')
        self.W_hid_to_gate, self.b_hid_to_gate = self.gate.W, self.gate.b

        self.num_shifts = num_shifts
        self.shift = DenseLayer(controller,
                                num_units=num_shifts,
                                W=W_hid_to_shift,
                                b=b_hid_to_shift,
                                nonlinearity=nonlinearity_shift,
                                name=self.basename + '.shift')
        self.W_hid_to_shift, self.b_hid_to_shift = self.shift.W, self.shift.b

        self.gamma = DenseLayer(controller,
                                num_units=1,
                                W=W_hid_to_gamma,
                                b=b_hid_to_gamma,
                                nonlinearity=nonlinearity_gamma,
                                name=self.basename + '.gamma')
        self.W_hid_to_gamma, self.b_hid_to_gamma = self.gamma.W, self.gamma.b

        self.weights_init = self.add_param(weights_init,
                                           (1, self.memory_shape[0]),
                                           name='weights_init',
                                           trainable=learn_init,
                                           regularizable=False)
Exemple #5
0
    def __init__(self,
                 controller,
                 num_shifts=3,
                 memory_shape=(128, 20),
                 W_hid_to_sign=None,
                 b_hid_to_sign=lasagne.init.Constant(0.),
                 nonlinearity_sign=nonlinearities.ClippedLinear(low=-1.,
                                                                high=1.),
                 W_hid_to_key=lasagne.init.GlorotUniform(),
                 b_hid_to_key=lasagne.init.Constant(0.),
                 nonlinearity_key=nonlinearities.ClippedLinear(low=0.,
                                                               high=1.),
                 W_hid_to_beta=lasagne.init.GlorotUniform(),
                 b_hid_to_beta=lasagne.init.Constant(0.),
                 nonlinearity_beta=lasagne.nonlinearities.rectify,
                 W_hid_to_gate=lasagne.init.GlorotUniform(),
                 b_hid_to_gate=lasagne.init.Constant(0.),
                 nonlinearity_gate=nonlinearities.hard_sigmoid,
                 W_hid_to_shift=lasagne.init.GlorotUniform(),
                 b_hid_to_shift=lasagne.init.Constant(0.),
                 nonlinearity_shift=lasagne.nonlinearities.softmax,
                 W_hid_to_gamma=lasagne.init.GlorotUniform(),
                 b_hid_to_gamma=lasagne.init.Constant(0.),
                 nonlinearity_gamma=lambda x: 1. + lasagne.nonlinearities.
                 rectify(x),
                 W_hid_to_erase=lasagne.init.GlorotUniform(),
                 b_hid_to_erase=lasagne.init.Constant(0.),
                 nonlinearity_erase=nonlinearities.hard_sigmoid,
                 W_hid_to_add=lasagne.init.GlorotUniform(),
                 b_hid_to_add=lasagne.init.Constant(0.),
                 nonlinearity_add=nonlinearities.ClippedLinear(low=0.,
                                                               high=1.),
                 W_hid_to_sign_add=None,
                 b_hid_to_sign_add=lasagne.init.Constant(0.),
                 nonlinearity_sign_add=nonlinearities.ClippedLinear(low=-1.,
                                                                    high=1.),
                 weights_init=init.OneHot(),
                 learn_init=False,
                 **kwargs):
        super(WriteHead, self).__init__(controller,
                                        num_shifts=num_shifts,
                                        memory_shape=memory_shape,
                                        W_hid_to_sign=W_hid_to_sign,
                                        b_hid_to_sign=b_hid_to_sign,
                                        nonlinearity_sign=nonlinearity_sign,
                                        W_hid_to_key=W_hid_to_key,
                                        b_hid_to_key=b_hid_to_key,
                                        nonlinearity_key=nonlinearity_key,
                                        W_hid_to_beta=W_hid_to_beta,
                                        b_hid_to_beta=b_hid_to_beta,
                                        nonlinearity_beta=nonlinearity_beta,
                                        W_hid_to_gate=W_hid_to_gate,
                                        b_hid_to_gate=b_hid_to_gate,
                                        nonlinearity_gate=nonlinearity_gate,
                                        W_hid_to_shift=W_hid_to_shift,
                                        b_hid_to_shift=b_hid_to_shift,
                                        nonlinearity_shift=nonlinearity_shift,
                                        W_hid_to_gamma=W_hid_to_gamma,
                                        b_hid_to_gamma=b_hid_to_gamma,
                                        nonlinearity_gamma=nonlinearity_gamma,
                                        weights_init=weights_init,
                                        learn_init=learn_init,
                                        **kwargs)

        self.erase = DenseLayer(controller,
                                num_units=self.memory_shape[1],
                                W=W_hid_to_erase,
                                b=b_hid_to_erase,
                                nonlinearity=nonlinearity_erase,
                                name=self.basename + '.erase')
        self.W_hid_to_erase, self.b_hid_to_erase = self.erase.W, self.erase.b

        self.add = DenseLayer(controller,
                              num_units=self.memory_shape[1],
                              W=W_hid_to_add,
                              b=b_hid_to_add,
                              nonlinearity=nonlinearity_add,
                              name=self.basename + '.add')
        self.W_hid_to_add, self.b_hid_to_add = self.add.W, self.add.b

        if W_hid_to_sign_add is not None:
            self.sign_add = DenseLayer(controller,
                                       num_units=self.memory_shape[1],
                                       W=W_hid_to_sign_add,
                                       b=b_hid_to_sign_add,
                                       nonlinearity=nonlinearity_sign_add,
                                       name=self.basename + '.sign_add')
            self.W_hid_to_sign_add, self.b_hid_to_sign_add = self.sign_add.W, self.sign_add.b
        else:
            self.sign_add = None
            self.W_hid_to_sign_add, self.b_hid_to_sign_add = None, None