def initial_states(self, batch_size, **kwargs):
     return (pack(self.transition.initial_states(
                  batch_size, **kwargs)) +
             pack(self.attention.initial_glimpses(
                  batch_size, kwargs[self.attended_name])) +
             pack(self.topical_attention.initial_glimpses(
                  batch_size, kwargs[self.topical_attended_name]))
             )
 def initial_states(self, batch_size, **kwargs):
     return (pack(self.transition.initial_states(batch_size, **kwargs)) +
             pack(
                 self.attention.initial_glimpses(
                     batch_size, kwargs[self.attended_name])) +
             pack(
                 self.topical_attention.initial_glimpses(
                     batch_size, kwargs[self.topical_attended_name])))
Esempio n. 3
0
 def __init__(self, threshold, axis=None):
     axis = pack(axis) if axis is not None else ()
     self.axis = set(axis)
     self.threshold = shared_floatx(threshold, "threshold")
     add_role(self.threshold, ALGORITHM_HYPERPARAMETER)
     if len(axis) != len(self.axis):
         raise ValueError("axis must be unique")
Esempio n. 4
0
 def __init__(self, threshold, axis=None):
     axis = pack(axis) if axis is not None else ()
     self.axis = set(axis)
     self.threshold = shared_floatx(threshold, "threshold")
     add_role(self.threshold, ALGORITHM_HYPERPARAMETER)
     if len(axis) != len(self.axis):
         raise ValueError("axis must be unique")
Esempio n. 5
0
 def apply(self, input_):
     child_input = input_
     for child, application_method in zip(self.children,
                                          self.application_methods):
         output = getattr(child, application_method)(*pack(child_input))
         child_input = output
     return output
Esempio n. 6
0
        def apply(self, application, *args, **kwargs):
            # extra_ndim is a mandatory parameter, but in order not to
            # confuse with positional inputs, it has to be extracted from
            # **kwargs
            extra_ndim = kwargs.get('extra_ndim', 0)

            inputs = dict(zip(application.inputs, args))
            inputs.update(dict_subset(kwargs, application.inputs,
                                      must_have=False))
            reshaped_inputs = inputs
            # To prevent pollution of the computation graph with no-ops
            if extra_ndim > 0:
                for name, input_ in inputs.items():
                    shape, ndim = input_.shape, input_.ndim
                    # Remember extra_dims for reshaping the outputs correctly.
                    # Does not matter from which input, since we assume
                    # extra dimension match for all inputs.
                    extra_dims = shape[:extra_ndim]
                    new_first_dim = tensor.prod(shape[:extra_ndim + 1])
                    new_shape = tensor.join(
                        0, new_first_dim[None], shape[extra_ndim + 1:])
                    reshaped_inputs[name] = input_.reshape(
                        new_shape, ndim=ndim - extra_ndim)
            outputs = wrapped.__get__(self, None)(**reshaped_inputs)
            if extra_ndim == 0:
                return outputs
            reshaped_outputs = []
            for output in pack(outputs):
                shape, ndim = output.shape, output.ndim
                new_shape = tensor.join(
                    0, extra_dims, (shape[0] // tensor.prod(extra_dims))[None],
                    shape[1:])
                reshaped_outputs.append(
                    output.reshape(new_shape, ndim=ndim + extra_ndim))
            return reshaped_outputs
Esempio n. 7
0
        def apply(self, application, *args, **kwargs):
            # extra_ndim is a mandatory parameter, but in order not to
            # confuse with positional inputs, it has to be extracted from
            # **kwargs
            extra_ndim = kwargs.get("extra_ndim", 0)

            inputs = dict(zip(application.inputs, args))
            inputs.update(dict_subset(kwargs, application.inputs, must_have=False))
            reshaped_inputs = inputs
            # To prevent pollution of the computation graph with no-ops
            if extra_ndim > 0:
                for name, input_ in inputs.items():
                    shape, ndim = input_.shape, input_.ndim
                    # Remember extra_dims for reshaping the outputs correctly.
                    # Does not matter from which input, since we assume
                    # extra dimension match for all inputs.
                    extra_dims = shape[:extra_ndim]
                    new_first_dim = tensor.prod(shape[: extra_ndim + 1])
                    new_shape = tensor.join(0, new_first_dim[None], shape[extra_ndim + 1 :])
                    reshaped_inputs[name] = input_.reshape(new_shape, ndim=ndim - extra_ndim)
            outputs = wrapped.__get__(self, None)(**reshaped_inputs)
            if extra_ndim == 0:
                return outputs
            reshaped_outputs = []
            for output in pack(outputs):
                shape, ndim = output.shape, output.ndim
                new_shape = tensor.join(0, extra_dims, (shape[0] // tensor.prod(extra_dims))[None], shape[1:])
                reshaped_outputs.append(output.reshape(new_shape, ndim=ndim + extra_ndim))
            return reshaped_outputs
Esempio n. 8
0
 def apply(self, input_):
     child_input = input_
     for _, application_method in zip(self.children,
                                      self.application_methods):
         output = application_method(*pack(child_input))
         child_input = output
     return output
Esempio n. 9
0
 def apply(self, states, attended):
     states = tensor.repeat(states[None, :, :], attended.shape[0], axis=0)
     match_vectors = tensor.concatenate([states, attended], axis=2)
     energies = self.shallow.apply(*pack(match_vectors))
     energies = energies.reshape(match_vectors.shape[:-1],
                                 ndim=match_vectors.ndim - 1)
     return energies
Esempio n. 10
0
 def expr(self, model, data, **kwargs):
     assert not model.supervised
     data = pack(data)
     data = [tensor.unbroadcast(var, *range(var.ndim))
             for var in data]
     return theano.clone(
         self.cost, replace=dict(zip(self.inputs.values(), data)))
Esempio n. 11
0
 def check_sparse(rng, axis, num_init, shape, weights_init=Constant(1.)):
     weights = SparseND(axis=axis, num_init=num_init,
                        weights_init=weights_init).generate(rng, shape)
     assert weights.shape == shape
     assert weights.dtype == theano.config.floatX
     if isinstance(num_init, numbers.Integral):
         nnz = numpy.prod([s for i, s in enumerate(shape)
                           if i in pack(axis)]) * num_init
         assert numpy.count_nonzero(weights) == nnz
     else:
         atom_size = numpy.prod([s for i, s in enumerate(shape)
                                 if i not in pack(axis)])
         nnz_atom = int(num_init * atom_size)
         num_atoms = numpy.prod([s for i, s in enumerate(shape)
                                 if i in pack(axis)])
         nnz = nnz_atom * num_atoms
         assert numpy.count_nonzero(weights) == nnz
Esempio n. 12
0
 def check_sparse(rng, axis, num_init, shape, weights_init=Constant(1.)):
     weights = SparseND(axis=axis,
                        num_init=num_init,
                        weights_init=weights_init).generate(rng, shape)
     assert weights.shape == shape
     assert weights.dtype == theano.config.floatX
     if isinstance(num_init, numbers.Integral):
         nnz = numpy.prod(
             [s for i, s in enumerate(shape) if i in pack(axis)]) * num_init
         assert numpy.count_nonzero(weights) == nnz
     else:
         atom_size = numpy.prod(
             [s for i, s in enumerate(shape) if i not in pack(axis)])
         nnz_atom = int(num_init * atom_size)
         num_atoms = numpy.prod(
             [s for i, s in enumerate(shape) if i in pack(axis)])
         nnz = nnz_atom * num_atoms
         assert numpy.count_nonzero(weights) == nnz
Esempio n. 13
0
 def apply(self, *args):
     child_input = args
     for application_method in self.application_methods:
         output = application_method(*pack(child_input))
         if not self.pruning_variables_initialized:
             self.layer_activities.append(output)
         child_input = output
     self.pruning_variables_initialized = True
     return output
Esempio n. 14
0
 def apply(self, *args):
     child_input = args
     for application_method in self.application_methods:
         output = application_method(*pack(child_input))
         if not self.pruning_variables_initialized:
             self.layer_activities.append(output)
         child_input = output
     self.pruning_variables_initialized = True
     return output
Esempio n. 15
0
 def generate(self, rng, shape):
     axis_ind = pack(self.axis)
     other_ind = [i for i in range(len(shape)) if i not in axis_ind]
     axis_shapes = [shape[i] for i in axis_ind]
     other_shapes = [shape[i] for i in other_ind]
     matrix = super(SparseND, self).generate(rng,
                                             (numpy.prod(axis_shapes),
                                              numpy.prod(other_shapes)))
     unflattened = matrix.reshape(tuple(axis_shapes) + tuple(other_shapes))
     wrong_ind = axis_ind + other_ind
     transp_ind = [wrong_ind.index(i) for i in range(len(shape))]
     return unflattened.transpose(transp_ind)
Esempio n. 16
0
 def scan_function(*args):
     args = list(args)
     arg_names = (list(sequences_given) + list(states_given) +
                  list(contexts_given))
     kwargs = dict(equizip(arg_names, args))
     kwargs.update(rest_kwargs)
     outputs = application(iterate=False, **kwargs)
     # We want to save the computation graph returned by the
     # `application_function` when it is called inside the
     # `theano.scan`.
     application_call.inner_inputs = args
     application_call.inner_outputs = pack(outputs)
     return outputs
Esempio n. 17
0
 def scan_function(*args):
     args = list(args)
     arg_names = (list(sequences_given) + list(states_given) +
                  list(contexts_given))
     kwargs = dict(zip(arg_names, args))
     kwargs.update(rest_kwargs)
     outputs = getattr(brick, application_function.__name__)(
         iterate=False, **kwargs)
     # We want to save the computation graph returned by the
     # `application_function` when it is called inside the
     # `theano.scan`.
     application_call.inner_inputs = args
     application_call.inner_outputs = pack(outputs)
     return outputs
Esempio n. 18
0
 def apply(self, *args):
     child_input = args
     for application_method in self.application_methods:
         output = application_method(*pack(child_input))
         child_input = output
     return output
Esempio n. 19
0
 def apply(self, *args):
     output = args
     output = self.tanh.apply(*pack(output))
     output = self.linear.apply(*pack(output))
     return output
Esempio n. 20
0
    def __call__(self, *inputs, **kwargs):
        """Wraps an application method.

        This wrapper will provide some necessary pre- and post-processing
        of the Theano variables, such as tagging them with the brick that
        created them and naming them. These changes will apply to Theano
        variables given as positional arguments and keywords arguments.

        .. warning::

            Properly set tags are important for correct functioning of the
            framework. Do not provide inputs to your apply method in a way
            different than passing them as positional or keyword arguments,
            e.g. as list or tuple elements.

        Notes
        -----
        Application methods will allocate the brick parameters with a call
        :meth:`allocate` if they have not been allocated already.

        """
        last = Application._last_brick_applied
        if last and last != self.brick and self.brick not in last.children:
            raise ValueError("The brick {} called an apply method of the"
                             " brick {} without having it in the children"
                             " list."
                             .format(last, self.brick))

        return_dict = kwargs.pop('return_dict', False)
        return_list = kwargs.pop('return_list', False)
        assert not return_list or not return_dict

        arg_names, varargs_name, _, _ = inspect.getargspec(
            self.application_method)
        arg_names = arg_names[1:]

        call = ApplicationCall(self.brick, self)

        if 'application_call' in arg_names:
            kwargs['application_call'] = call

        def copy_and_tag(variable, role, name):
            if Brick.print_shapes:
                variable = put_hook(
                    variable, lambda x: logger.debug(
                        "{}.{}.{}.shape = {}".format(
                            self.brick.name, self.__name__, name, x.shape)))
            copy = variable.copy()
            copy.name = "{}_{}_{}".format(self.brick.name, self.__name__, name)
            copy.tag.application_call = call
            copy.tag.name = name
            copy.tag.role = role
            return copy

        if not self.brick.allocated:
            self.brick.allocate()
        if not self.brick.initialized and not self.brick.lazy:
            self.brick.initialize()
        inputs = list(inputs)
        for i, input_ in enumerate(inputs):
            name = (arg_names[i] if i < len(arg_names) else
                    "{}_{}".format(varargs_name, i - len(arg_names)))
            if isinstance(input_, tensor.Variable):
                inputs[i] = copy_and_tag(input_, VariableRole.INPUT,
                                         name)
        for key, value in kwargs.items():
            if isinstance(value, tensor.Variable):
                kwargs[key] = copy_and_tag(value, VariableRole.INPUT,
                                           key)
        Application._last_brick_applied = self.brick
        try:
            outputs = self.application_method(self.brick, *inputs, **kwargs)
        finally:
            Application._last_brick_applied = last
        # TODO allow user to return an OrderedDict
        outputs = pack(outputs)
        for i, output in enumerate(outputs):
            try:
                name = self.outputs[i]
            except:
                name = "output_{}".format(i)
            if isinstance(output, tensor.Variable):
                # TODO Tag with dimensions, axes, etc. for error-checking
                outputs[i] = copy_and_tag(outputs[i],
                                          VariableRole.OUTPUT, name)
        if return_list:
            return outputs
        if return_dict:
            return OrderedDict(zip(self.outputs, outputs))
        return unpack(outputs)
Esempio n. 21
0
 def apply(self, states, attended, posTag):
     match_vectors = states + attended + posTag
     energies = self.shallow.apply(*pack(match_vectors))
     energies = energies.reshape(match_vectors.shape[:-1],
                                 ndim=match_vectors.ndim - 1)
     return energies
Esempio n. 22
0
        def recurrent_apply(brick, application, application_call, *args,
                            **kwargs):
            """Iterates a transition function.

            Parameters
            ----------
            iterate : bool
                If ``True`` iteration is made. By default ``True``.
            reverse : bool
                If ``True``, the sequences are processed in backward
                direction. ``False`` by default.
            return_initial_states : bool
                If ``True``, initial states are included in the returned
                state tensors. ``False`` by default.

            """
            # Extract arguments related to iteration and immediately relay the
            # call to the wrapped function if `iterate=False`
            iterate = kwargs.pop('iterate', True)
            if not iterate:
                return application_function(brick, *args, **kwargs)
            reverse = kwargs.pop('reverse', False)
            return_initial_states = kwargs.pop('return_initial_states', False)

            # Push everything to kwargs
            for arg, arg_name in zip(args, arg_names):
                kwargs[arg_name] = arg

            # Make sure that all arguments for scan are tensor variables
            scan_arguments = (application.sequences + application.states +
                              application.contexts)
            for arg in scan_arguments:
                if arg in kwargs:
                    if kwargs[arg] is None:
                        del kwargs[arg]
                    else:
                        kwargs[arg] = tensor.as_tensor_variable(kwargs[arg])

            # Check which sequence and contexts were provided
            sequences_given = dict_subset(kwargs,
                                          application.sequences,
                                          must_have=False)
            contexts_given = dict_subset(kwargs,
                                         application.contexts,
                                         must_have=False)

            # Determine number of steps and batch size.
            if len(sequences_given):
                # TODO Assumes 1 time dim!
                shape = list(sequences_given.values())[0].shape
                n_steps = shape[0]
                batch_size = shape[1]
            else:
                # TODO Raise error if n_steps and batch_size not found?
                n_steps = kwargs.pop('n_steps')
                batch_size = kwargs.pop('batch_size')

            # Handle the rest kwargs
            rest_kwargs = {
                key: value
                for key, value in kwargs.items() if key not in scan_arguments
            }
            for value in rest_kwargs.values():
                if (isinstance(value, Variable)
                        and not is_shared_variable(value)):
                    logger.warning("unknown input {}".format(value) +
                                   unknown_scan_input)

            # Ensure that all initial states are available.
            initial_states = brick.initial_states(batch_size,
                                                  as_dict=True,
                                                  *args,
                                                  **kwargs)
            for state_name in application.states:
                dim = brick.get_dim(state_name)
                if state_name in kwargs:
                    if isinstance(kwargs[state_name], NdarrayInitialization):
                        kwargs[state_name] = tensor.alloc(
                            kwargs[state_name].generate(brick.rng, (1, dim)),
                            batch_size, dim)
                    elif isinstance(kwargs[state_name], Application):
                        kwargs[state_name] = (kwargs[state_name](state_name,
                                                                 batch_size,
                                                                 *args,
                                                                 **kwargs))
                else:
                    try:
                        kwargs[state_name] = initial_states[state_name]
                    except KeyError:
                        raise KeyError(
                            "no initial state for '{}' of the brick {}".format(
                                state_name, brick.name))
            states_given = dict_subset(kwargs, application.states)

            # Theano issue 1772
            for name, state in states_given.items():
                states_given[name] = tensor.unbroadcast(
                    state, *range(state.ndim))

            def scan_function(*args):
                args = list(args)
                arg_names = (list(sequences_given) + [
                    output for output in application.outputs
                    if output in application.states
                ] + list(contexts_given))
                kwargs = dict(equizip(arg_names, args))
                kwargs.update(rest_kwargs)
                outputs = application(iterate=False, **kwargs)
                # We want to save the computation graph returned by the
                # `application_function` when it is called inside the
                # `theano.scan`.
                application_call.inner_inputs = args
                application_call.inner_outputs = pack(outputs)
                return outputs

            outputs_info = [
                states_given[name] if name in application.states else None
                for name in application.outputs
            ]
            result, updates = theano.scan(
                scan_function,
                sequences=list(sequences_given.values()),
                outputs_info=outputs_info,
                non_sequences=list(contexts_given.values()),
                n_steps=n_steps,
                go_backwards=reverse,
                name='{}_{}_scan'.format(brick.name,
                                         application.application_name))
            result = pack(result)
            if return_initial_states:
                # Undo Subtensor
                for i in range(len(states_given)):
                    assert isinstance(result[i].owner.op,
                                      tensor.subtensor.Subtensor)
                    result[i] = result[i].owner.inputs[0]
            if updates:
                application_call.updates = dict_union(application_call.updates,
                                                      updates)

            return result
Esempio n. 23
0
 def initial_states(self, batch_size, **kwargs):
     return (pack(self.transition.initial_states(
                  batch_size, **kwargs)) +
             pack([tensor.zeros((batch_size, self.representationDim/2))]))
Esempio n. 24
0
        def recurrent_apply(brick, application, application_call,
                            *args, **kwargs):
            """Iterates a transition function.

            Parameters
            ----------
            iterate : bool
                If ``True`` iteration is made. By default ``True``.
            reverse : bool
                If ``True``, the sequences are processed in backward
                direction. ``False`` by default.
            return_initial_states : bool
                If ``True``, initial states are included in the returned
                state tensors. ``False`` by default.

            .. todo::

                * Handle `updates` returned by the :func:`theano.scan`
                    routine.
                * ``kwargs`` has a random order; check if this is a
                    problem.

            """
            # Extract arguments related to iteration and immediately relay the
            # call to the wrapped function if `iterate=False`
            iterate = kwargs.pop('iterate', True)
            if not iterate:
                return application_function(brick, *args, **kwargs)
            reverse = kwargs.pop('reverse', False)
            return_initial_states = kwargs.pop('return_initial_states', False)

            # Push everything to kwargs
            for arg, arg_name in zip(args, arg_names):
                kwargs[arg_name] = arg

            # Make sure that all arguments for scan are tensor variables
            scan_arguments = (application.sequences + application.states +
                              application.contexts)
            for arg in scan_arguments:
                if arg in kwargs:
                    if kwargs[arg] is None:
                        del kwargs[arg]
                    else:
                        kwargs[arg] = tensor.as_tensor_variable(kwargs[arg])

            # Check which sequence and contexts were provided
            sequences_given = dict_subset(kwargs, application.sequences,
                                          must_have=False)
            contexts_given = dict_subset(kwargs, application.contexts,
                                         must_have=False)

            # Determine number of steps and batch size.
            if len(sequences_given):
                # TODO Assumes 1 time dim!
                shape = list(sequences_given.values())[0].shape
                if not iterate:
                    batch_size = shape[0]
                else:
                    n_steps = shape[0]
                    batch_size = shape[1]
            else:
                # TODO Raise error if n_steps and batch_size not found?
                n_steps = kwargs.pop('n_steps')
                batch_size = kwargs.pop('batch_size')

            # Handle the rest kwargs
            rest_kwargs = {key: value for key, value in kwargs.items()
                           if key not in scan_arguments}
            for value in rest_kwargs.values():
                if (isinstance(value, Variable) and not
                        is_shared_variable(value)):
                    logger.warning("unknown input {}".format(value) +
                                   unknown_scan_input)

            # Ensure that all initial states are available.
            for state_name in application.states:
                dim = brick.get_dim(state_name)
                if state_name in kwargs:
                    if isinstance(kwargs[state_name], NdarrayInitialization):
                        kwargs[state_name] = tensor.alloc(
                            kwargs[state_name].generate(brick.rng, (1, dim)),
                            batch_size, dim)
                    elif isinstance(kwargs[state_name], Application):
                        kwargs[state_name] = (
                            kwargs[state_name](state_name, batch_size,
                                               *args, **kwargs))
                else:
                    # TODO init_func returns 2D-tensor, fails for iterate=False
                    kwargs[state_name] = (
                        brick.initial_state(state_name, batch_size,
                                            *args, **kwargs))
                    assert kwargs[state_name]
            states_given = dict_subset(kwargs, application.states)

            # Theano issue 1772
            for name, state in states_given.items():
                states_given[name] = tensor.unbroadcast(state,
                                                        *range(state.ndim))

            def scan_function(*args):
                args = list(args)
                arg_names = (list(sequences_given) +
                             [output for output in application.outputs
                              if output in application.states] +
                             list(contexts_given))
                kwargs = dict(equizip(arg_names, args))
                kwargs.update(rest_kwargs)
                outputs = application(iterate=False, **kwargs)
                # We want to save the computation graph returned by the
                # `application_function` when it is called inside the
                # `theano.scan`.
                application_call.inner_inputs = args
                application_call.inner_outputs = pack(outputs)
                return outputs
            outputs_info = [
                states_given[name] if name in application.states
                else None
                for name in application.outputs]
            result, updates = theano.scan(
                scan_function, sequences=list(sequences_given.values()),
                outputs_info=outputs_info,
                non_sequences=list(contexts_given.values()),
                n_steps=n_steps,
                go_backwards=reverse)
            result = pack(result)
            if return_initial_states:
                # Undo Subtensor
                for i in range(len(states_given)):
                    assert isinstance(result[i].owner.op,
                                      tensor.subtensor.Subtensor)
                    result[i] = result[i].owner.inputs[0]
            if updates:
                application_call.updates = dict_union(application_call.updates,
                                                      updates)

            return result
Esempio n. 25
0
 def apply(self, *args, **kwargs):
     outputs = super(Merge, self).apply(*args, **kwargs)
     outputs = pack(outputs)
     # Sum is often faster than tensor.sum(outputs, axis=0) for a
     # small number of outputs
     return sum(outputs)
Esempio n. 26
0
    def apply(self, bound_application, *args, **kwargs):
        as_dict = kwargs.pop('as_dict', False)
        as_list = kwargs.pop('as_list', False)
        call_id = kwargs.pop('call_id', None)
        if as_list and as_dict:
            raise ValueError

        brick = bound_application.brick

        # Find the names of the inputs to the application method
        args_names, varargs_name, _, _ = inspect.getargspec(
            self.application_function)
        args_names = args_names[1:]

        # Construct the ApplicationCall, used to store data in for this call
        call = ApplicationCall(bound_application)
        call.metadata['call_id'] = call_id
        args = list(args)
        if 'application' in args_names:
            args.insert(args_names.index('application'), bound_application)
        if 'application_call' in args_names:
            args.insert(args_names.index('application_call'), call)

        # Allocate before applying, and optionally initialize
        if not brick.allocated:
            brick.allocate()

        # Annotate all the input variables which are Theano variables

        for i, input_ in enumerate(args):
            if isinstance(input_, tensor.Variable):
                if i < len(args_names):
                    name = args_names[i]
                else:
                    name = "{}_{}".format(varargs_name, i - len(args_names))
                args[i] = copy_and_tag(input_, brick, call, INPUT,
                                       self.name, name)
        for name, input_ in kwargs.items():
            if isinstance(input_, tensor.Variable):
                kwargs[name] = copy_and_tag(input_, brick, call, INPUT,
                                            self.name, name)

        # Run the application method on the annotated variables
        last_brick = self.call_stack[-1] if self.call_stack else None
        if (last_brick and brick is not last_brick and
                brick not in last_brick.children):
            warnings.warn('Brick ' + str(self.call_stack[-1]) + ' tries '
                          'to call brick ' + str(self.brick) + ' which '
                          'is not in the list of its children. This could '
                          'be caused because an @application decorator is '
                          'missing.')
        self.call_stack.append(brick)
        try:
            outputs = self.application_function(brick, *args, **kwargs)
            outputs = pack(outputs)
        finally:
            self.call_stack.pop()

        # Rename and annotate output variables
        for i, output in enumerate(outputs):
            if isinstance(output, tensor.Variable):
                try:
                    name = bound_application.outputs[i]
                except AttributeError:
                    name = "output_{}".format(i)
                except IndexError:
                    reraise_as(ValueError("Unexpected outputs"))
                # TODO Tag with dimensions, axes, etc. for error-checking
                outputs[i] = copy_and_tag(outputs[i], brick, call,
                                          OUTPUT, self.name, name)

        # Return values
        if as_list:
            return outputs
        if as_dict:
            return OrderedDict(zip(bound_application.outputs, outputs))
        return unpack(outputs)
Esempio n. 27
0
    def __call__(self, *inputs, **kwargs):
        """Wraps an application method.

        This wrapper will provide some necessary pre- and post-processing
        of the Theano variables, such as tagging them with the brick that
        created them and naming them. These changes will apply to Theano
        variables given as positional arguments and keywords arguments.

        .. warning::

            Properly set tags are important for correct functioning of the
            framework. Do not provide inputs to your apply method in a way
            different than passing them as positional or keyword arguments,
            e.g. as list or tuple elements.

        Notes
        -----
        Application methods will allocate the brick parameters with a call
        :meth:`allocate` if they have not been allocated already.

        """
        last = Application._last_brick_applied
        if last and last != self.brick and self.brick not in last.children:
            raise ValueError("The brick {} called an apply method of the"
                             " brick {} without having it in the children"
                             " list.".format(last, self.brick))

        return_dict = kwargs.pop('return_dict', False)
        return_list = kwargs.pop('return_list', False)
        assert not return_list or not return_dict

        arg_names, varargs_name, _, _ = inspect.getargspec(
            self.application_method)
        arg_names = arg_names[1:]

        call = ApplicationCall(self.brick, self)

        if 'application_call' in arg_names:
            kwargs['application_call'] = call

        def copy_and_tag(variable, role, name):
            if Brick.print_shapes:
                variable = put_hook(
                    variable,
                    lambda x: logger.debug("{}.{}.{}.shape = {}".format(
                        self.brick.name, self.__name__, name, x.shape)))
            copy = variable.copy()
            copy.name = "{}_{}_{}".format(self.brick.name, self.__name__, name)
            copy.tag.application_call = call
            copy.tag.name = name
            copy.tag.role = role
            return copy

        if not self.brick.allocated:
            self.brick.allocate()
        if not self.brick.initialized and not self.brick.lazy:
            self.brick.initialize()
        inputs = list(inputs)
        for i, input_ in enumerate(inputs):
            name = (arg_names[i] if i < len(arg_names) else "{}_{}".format(
                varargs_name, i - len(arg_names)))
            if isinstance(input_, tensor.Variable):
                inputs[i] = copy_and_tag(input_, VariableRole.INPUT, name)
        for key, value in kwargs.items():
            if isinstance(value, tensor.Variable):
                kwargs[key] = copy_and_tag(value, VariableRole.INPUT, key)
        Application._last_brick_applied = self.brick
        try:
            outputs = self.application_method(self.brick, *inputs, **kwargs)
        finally:
            Application._last_brick_applied = last
        # TODO allow user to return an OrderedDict
        outputs = pack(outputs)
        for i, output in enumerate(outputs):
            try:
                name = self.outputs[i]
            except:
                name = "output_{}".format(i)
            if isinstance(output, tensor.Variable):
                # TODO Tag with dimensions, axes, etc. for error-checking
                outputs[i] = copy_and_tag(outputs[i], VariableRole.OUTPUT,
                                          name)
        if return_list:
            return outputs
        if return_dict:
            return OrderedDict(zip(self.outputs, outputs))
        return unpack(outputs)
Esempio n. 28
0
 def expr(self, model, data, **kwargs):
     assert not model.supervised
     data = pack(data)
     data = [tensor.unbroadcast(var, *range(var.ndim)) for var in data]
     return theano.clone(self.cost,
                         replace=dict(zip(self.inputs.values(), data)))
Esempio n. 29
0
    def apply(self, bound_application, *args, **kwargs):
        as_dict = kwargs.pop('as_dict', False)
        as_list = kwargs.pop('as_list', False)
        if as_list and as_dict:
            raise ValueError

        brick = bound_application.brick

        # Find the names of the inputs to the application method
        args_names, varargs_name, _, _ = inspect.getargspec(
            self.application_function)
        args_names = args_names[1:]

        # Construct the ApplicationCall, used to store data in for this call
        call = ApplicationCall(brick, bound_application)
        args = list(args)
        if 'application' in args_names:
            args.insert(args_names.index('application'), bound_application)
        if 'application_call' in args_names:
            args.insert(args_names.index('application_call'), call)

        # Allocate before applying, and optionally initialize
        if not brick.allocated:
            brick.allocate()
        if not brick.initialized and not brick.lazy:
            brick.initialize()

        # Annotate all the input variables which are Theano variables
        def copy_and_tag(variable, role, name):
            """Helper method to copy a variable and annotate it."""
            copy = variable.copy()
            # Theano name
            copy.name = _variable_name(brick.name, self.name, name)
            add_annotation(copy, brick)
            add_annotation(copy, call)
            # Blocks name
            copy.tag.name = name
            add_role(copy, role)
            return copy

        for i, input_ in enumerate(args):
            if isinstance(input_, tensor.Variable):
                if i < len(args_names):
                    name = args_names[i]
                else:
                    name = "{}_{}".format(varargs_name, i - len(args_names))
                args[i] = copy_and_tag(input_, INPUT, name)
        for name, input_ in kwargs.items():
            if isinstance(input_, tensor.Variable):
                kwargs[name] = copy_and_tag(input_, INPUT, name)

        # Run the application method on the annotated variables
        if self.call_stack and brick is not self.call_stack[-1] and \
                brick not in self.call_stack[-1].children:
            raise ValueError('Brick ' + str(self.call_stack[-1]) + ' tries '
                             'to call brick ' + str(self.brick) + ' which '
                             'is not in the list of its children.')
        self.call_stack.append(brick)
        try:
            outputs = self.application_function(brick, *args, **kwargs)
            outputs = pack(outputs)
        finally:
            self.call_stack.pop()

        # Rename and annotate output variables
        for i, output in enumerate(outputs):
            if isinstance(output, tensor.Variable):
                try:
                    name = bound_application.outputs[i]
                except AttributeError:
                    name = "output_{}".format(i)
                except IndexError:
                    reraise_as(ValueError("Unexpected outputs"))
                # TODO Tag with dimensions, axes, etc. for error-checking
                outputs[i] = copy_and_tag(outputs[i], OUTPUT, name)

        # Return values
        if as_list:
            return outputs
        if as_dict:
            return OrderedDict(zip(bound_application.outputs, outputs))
        return unpack(outputs)
Esempio n. 30
0
 def apply(self, states, attended):
     states = self.linear.apply(*pack(states))
     match_vectors = tensor.tensordot(attended, states, axes=[2, 1])[:,
                                                                     0, :]
     energies = tensor.exp(match_vectors)
     return energies
Esempio n. 31
0
batch_size = 10
n_steps = 50
transitions = [SimpleRecurrent, GatedRecurrent, LSTM]
dims = [100, 250, 1000, 2000]



table = []
for transition in transitions:
    row = []
    for dim in dims:
        brick = transition(dim=dim, activation=Tanh())
        input_vars = {name: tensor.tensor3(name)
                    for name in brick.apply.sequences
                    if name != 'mask'}
        output_vars = pack(brick.apply(**input_vars))
        cost = sum(output.sum() for output in output_vars)
        grads = tensor.grad(cost, list(brick.params))
        function = theano.function(input_vars.values(), grads)
        inputs = {name: numpy.random.rand(n_steps, batch_size, brick.get_dim(name))
                .astype(theano.config.floatX)
                for name in input_vars}

        result = timeit.timeit(lambda: function(**inputs), number=5) / 5
        print transition.__name__, dim, result
        row.append(result)
    table.append(row)

sep = '|'
print sep, sep, sep.join(str(dim) for dim in dims), sep
print '---'.join(sep * (len(dims) + 2))
Esempio n. 32
0
 def __init__(self, threshold, axis=None):
     axis = pack(axis) if axis is not None else ()
     self.axis = set(axis)
     self.threshold = shared_floatx(threshold)
     if len(axis) != len(self.axis):
         raise ValueError("axis must be unique")
Esempio n. 33
0
    def apply(self, bound_application, *args, **kwargs):
        as_dict = kwargs.pop('as_dict', False)
        as_list = kwargs.pop('as_list', False)
        if as_list and as_dict:
            raise ValueError

        brick = bound_application.brick

        # Find the names of the inputs to the application method
        args_names, varargs_name, _, _ = inspect.getargspec(
            self.application_function)
        args_names = args_names[1:]

        # Construct the ApplicationCall, used to store data in for this call
        call = ApplicationCall(bound_application)
        args = list(args)
        if 'application' in args_names:
            args.insert(args_names.index('application'), bound_application)
        if 'application_call' in args_names:
            args.insert(args_names.index('application_call'), call)

        # Allocate before applying, and optionally initialize
        if not brick.allocated:
            brick.allocate()

        # Annotate all the input variables which are Theano variables
        def copy_and_tag(variable, role, name):
            """Helper method to copy a variable and annotate it."""
            copy = variable.copy()
            # Theano name
            copy.name = _variable_name(brick.name, self.name, name)
            add_annotation(copy, brick)
            add_annotation(copy, call)
            # Blocks name
            copy.tag.name = name
            add_role(copy, role)
            return copy

        for i, input_ in enumerate(args):
            if isinstance(input_, tensor.Variable):
                if i < len(args_names):
                    name = args_names[i]
                else:
                    name = "{}_{}".format(varargs_name, i - len(args_names))
                args[i] = copy_and_tag(input_, INPUT, name)
        for name, input_ in kwargs.items():
            if isinstance(input_, tensor.Variable):
                kwargs[name] = copy_and_tag(input_, INPUT, name)

        # Run the application method on the annotated variables
        last_brick = self.call_stack[-1] if self.call_stack else None
        if (last_brick and brick is not last_brick and
                brick not in last_brick.children):
            raise ValueError('Brick ' + str(self.call_stack[-1]) + ' tries '
                             'to call brick ' + str(self.brick) + ' which '
                             'is not in the list of its children.')
        self.call_stack.append(brick)
        try:
            outputs = self.application_function(brick, *args, **kwargs)
            outputs = pack(outputs)
        finally:
            self.call_stack.pop()

        # Rename and annotate output variables
        for i, output in enumerate(outputs):
            if isinstance(output, tensor.Variable):
                try:
                    name = bound_application.outputs[i]
                except AttributeError:
                    name = "output_{}".format(i)
                except IndexError:
                    reraise_as(ValueError("Unexpected outputs"))
                # TODO Tag with dimensions, axes, etc. for error-checking
                outputs[i] = copy_and_tag(outputs[i],
                                          OUTPUT, name)

        # Return values
        if as_list:
            return outputs
        if as_dict:
            return OrderedDict(zip(bound_application.outputs, outputs))
        return unpack(outputs)
Esempio n. 34
0
        def recurrent_apply(brick, *args, **kwargs):
            """Iterates a transition function.

            Parameters
            ----------
            iterate : bool
                If ``True`` iteration is made. By default ``True``.
            reverse : bool
                If ``True``, the sequences are processed in backward
                direction. ``False`` by default.
            return_initial_states : bool
                If ``True``, initial states are included in the returned
                state tensors. ``False`` by default.

            .. todo::

                * Handle `updates` returned by the `theano.scan`
                    routine.
                * ``kwargs`` has a random order; check if this is a
                    problem.

            """
            # Extract arguments related to iteration.
            iterate = kwargs.pop('iterate', True)
            reverse = kwargs.pop('reverse', False)
            return_initial_states = kwargs.pop('return_initial_states', False)

            # Push everything to kwargs
            for arg, arg_name in zip(args, arg_names):
                kwargs[arg_name] = arg
            # Separate kwargs that aren't sequence, context or state variables
            scan_arguments = (application.sequences + application.states +
                              application.contexts)
            rest_kwargs = {
                key: value
                for key, value in kwargs.items() if key not in scan_arguments
            }

            # Check what is given and what is not
            def only_given(arg_names):
                return OrderedDict((arg_name, kwargs[arg_name])
                                   for arg_name in arg_names
                                   if kwargs.get(arg_name))

            sequences_given = only_given(application.sequences)
            contexts_given = only_given(application.contexts)

            # TODO Assumes 1 time dim!
            if len(sequences_given):
                shape = list(sequences_given.values())[0].shape
                if not iterate:
                    batch_size = shape[0]
                else:
                    n_steps = shape[0]
                    batch_size = shape[1]
            else:
                # TODO Raise error if n_steps and batch_size not found?
                n_steps = kwargs.pop('n_steps')
                batch_size = kwargs.pop('batch_size')

            # Ensure that all initial states are available.
            for state_name in application.states:
                dim = brick.get_dim(state_name)
                if state_name in kwargs:
                    if isinstance(kwargs[state_name], NdarrayInitialization):
                        kwargs[state_name] = tensor.alloc(
                            kwargs[state_name].generate(brick.rng, (1, dim)),
                            batch_size, dim)
                    elif isinstance(kwargs[state_name], Application):
                        kwargs[state_name] = \
                            kwargs[state_name](state_name, batch_size,
                                               *args, **kwargs)
                else:
                    # TODO init_func returns 2D-tensor, fails for iterate=False
                    kwargs[state_name] = \
                        brick.initial_state(state_name, batch_size,
                                            *args, **kwargs)
                    assert kwargs[state_name]
            states_given = only_given(application.states)
            assert len(states_given) == len(application.states)

            # Theano issue 1772
            for name, state in states_given.items():
                states_given[name] = tensor.unbroadcast(
                    state, *range(state.ndim))

            # Apply methods
            if not iterate:
                return application_method(brick, **kwargs)

            def scan_function(*args):
                args = list(args)
                arg_names = (list(sequences_given) + list(states_given) +
                             list(contexts_given))
                kwargs = dict(zip(arg_names, args))
                kwargs.update(rest_kwargs)
                return application_method(brick, **kwargs)

            outputs_info = (
                list(states_given.values()) + [None] *
                (len(application.outputs) - len(application.states)))
            result, updates = theano.scan(
                scan_function,
                sequences=list(sequences_given.values()),
                outputs_info=outputs_info,
                non_sequences=list(contexts_given.values()),
                n_steps=n_steps,
                go_backwards=reverse)
            result = pack(result)
            if return_initial_states:
                # Undo Subtensor
                for i in range(len(states_given)):
                    assert isinstance(result[i].owner.op,
                                      tensor.subtensor.Subtensor)
                    result[i] = result[i].owner.inputs[0]
            if updates:
                list(updates.values())[0].owner.tag.updates = updates
            return result
Esempio n. 35
0
        def recurrent_apply(brick, *args, **kwargs):
            """Iterates a transition function.

            Parameters
            ----------
            iterate : bool
                If ``True`` iteration is made. By default ``True``.
            reverse : bool
                If ``True``, the sequences are processed in backward
                direction. ``False`` by default.
            return_initial_states : bool
                If ``True``, initial states are included in the returned
                state tensors. ``False`` by default.

            .. todo::

                * Handle `updates` returned by the `theano.scan`
                    routine.
                * ``kwargs`` has a random order; check if this is a
                    problem.

            """
            # Extract arguments related to iteration.
            iterate = kwargs.pop('iterate', True)
            reverse = kwargs.pop('reverse', False)
            return_initial_states = kwargs.pop('return_initial_states', False)

            # Push everything to kwargs
            for arg, arg_name in zip(args, arg_names):
                kwargs[arg_name] = arg
            # Separate kwargs that aren't sequence, context or state variables
            scan_arguments = (application.sequences + application.states +
                              application.contexts)
            rest_kwargs = {key: value for key, value in kwargs.items()
                           if key not in scan_arguments}

            # Check what is given and what is not
            def only_given(arg_names):
                return OrderedDict((arg_name, kwargs[arg_name])
                                   for arg_name in arg_names
                                   if kwargs.get(arg_name))
            sequences_given = only_given(application.sequences)
            contexts_given = only_given(application.contexts)

            # TODO Assumes 1 time dim!
            if len(sequences_given):
                shape = list(sequences_given.values())[0].shape
                if not iterate:
                    batch_size = shape[0]
                else:
                    n_steps = shape[0]
                    batch_size = shape[1]
            else:
                # TODO Raise error if n_steps and batch_size not found?
                n_steps = kwargs.pop('n_steps')
                batch_size = kwargs.pop('batch_size')

            # Ensure that all initial states are available.
            for state_name in application.states:
                dim = brick.get_dim(state_name)
                if state_name in kwargs:
                    if isinstance(kwargs[state_name], NdarrayInitialization):
                        kwargs[state_name] = tensor.alloc(
                            kwargs[state_name].generate(brick.rng, (1, dim)),
                            batch_size, dim)
                    elif isinstance(kwargs[state_name], Application):
                        kwargs[state_name] = \
                            kwargs[state_name](state_name, batch_size,
                                               *args, **kwargs)
                else:
                    # TODO init_func returns 2D-tensor, fails for iterate=False
                    kwargs[state_name] = \
                        brick.initial_state(state_name, batch_size,
                                            *args, **kwargs)
                    assert kwargs[state_name]
            states_given = only_given(application.states)
            assert len(states_given) == len(application.states)

            # Theano issue 1772
            for name, state in states_given.items():
                states_given[name] = tensor.unbroadcast(state,
                                                        *range(state.ndim))

            # Apply methods
            if not iterate:
                return application_method(brick, **kwargs)

            def scan_function(*args):
                args = list(args)
                arg_names = (list(sequences_given) + list(states_given) +
                             list(contexts_given))
                kwargs = dict(zip(arg_names, args))
                kwargs.update(rest_kwargs)
                return application_method(brick, **kwargs)
            outputs_info = (list(states_given.values())
                            + [None] * (len(application.outputs) -
                                        len(application.states)))
            result, updates = theano.scan(
                scan_function, sequences=list(sequences_given.values()),
                outputs_info=outputs_info,
                non_sequences=list(contexts_given.values()),
                n_steps=n_steps,
                go_backwards=reverse)
            result = pack(result)
            if return_initial_states:
                # Undo Subtensor
                for i in range(len(states_given)):
                    assert isinstance(result[i].owner.op,
                                      tensor.subtensor.Subtensor)
                    result[i] = result[i].owner.inputs[0]
            if updates:
                list(updates.values())[0].owner.tag.updates = updates
            return result
Esempio n. 36
0
 def apply(self, *args):
     child_input = args
     for application_method in self.application_methods:
         output = application_method(*pack(child_input))
         child_input = output
     return output
Esempio n. 37
0
 def apply(self, *args, **kwargs):
     outputs = super(Merge, self).apply(*args, **kwargs)
     outputs = pack(outputs)
     # Sum is often faster than tensor.sum(outputs, axis=0) for a
     # small number of outputs
     return sum(outputs)
Esempio n. 38
0
 def __init__(self, threshold, axis=None):
     axis = pack(axis) if axis is not None else ()
     self.axis = set(axis)
     self.threshold = shared_floatx(threshold)
     if len(axis) != len(self.axis):
         raise ValueError("axis must be unique")
Esempio n. 39
0
        def recurrent_apply(brick, application, application_call, *args,
                            **kwargs):
            """Iterates a transition function.

            Parameters
            ----------
            iterate : bool
                If ``True`` iteration is made. By default ``True``.
            reverse : bool
                If ``True``, the sequences are processed in backward
                direction. ``False`` by default.
            return_initial_states : bool
                If ``True``, initial states are included in the returned
                state tensors. ``False`` by default.

            .. todo::

                * Handle `updates` returned by the :func:`theano.scan`
                    routine.
                * ``kwargs`` has a random order; check if this is a
                    problem.

            """
            # Extract arguments related to iteration and immediately relay the
            # call to the wrapped function if `iterate=False`
            iterate = kwargs.pop('iterate', True)
            if not iterate:
                return application_function(brick, *args, **kwargs)
            reverse = kwargs.pop('reverse', False)
            return_initial_states = kwargs.pop('return_initial_states', False)

            # Push everything to kwargs
            for arg, arg_name in zip(args, arg_names):
                kwargs[arg_name] = arg
            # Separate sequences, states and contexts
            scan_arguments = (application.sequences + application.states +
                              application.contexts)

            # Check what is given and what is not
            def only_given(arg_names):
                return OrderedDict((arg_name, kwargs[arg_name])
                                   for arg_name in arg_names
                                   if kwargs.get(arg_name))

            sequences_given = only_given(application.sequences)
            contexts_given = only_given(application.contexts)

            # TODO Assumes 1 time dim!
            if len(sequences_given):
                shape = list(sequences_given.values())[0].shape
                if not iterate:
                    batch_size = shape[0]
                else:
                    n_steps = shape[0]
                    batch_size = shape[1]
            else:
                # TODO Raise error if n_steps and batch_size not found?
                n_steps = kwargs.pop('n_steps')
                batch_size = kwargs.pop('batch_size')

            # Handle the rest kwargs
            rest_kwargs = {
                key: value
                for key, value in kwargs.items() if key not in scan_arguments
            }
            for value in rest_kwargs.values():
                if (isinstance(value, Variable)
                        and not is_shared_variable(value)):
                    warnings.warn(
                        'Your function uses a non-shared variable other than'
                        ' those given by scan explicitly. That can'
                        ' significantly slow down `tensor.grad` call.'
                        ' Did you forget to declare it in `contexts`?')

            # Ensure that all initial states are available.
            for state_name in application.states:
                dim = brick.get_dim(state_name)
                if state_name in kwargs:
                    if isinstance(kwargs[state_name], NdarrayInitialization):
                        kwargs[state_name] = tensor.alloc(
                            kwargs[state_name].generate(brick.rng, (1, dim)),
                            batch_size, dim)
                    elif isinstance(kwargs[state_name], Application):
                        kwargs[state_name] = \
                            kwargs[state_name](state_name, batch_size,
                                               *args, **kwargs)
                else:
                    # TODO init_func returns 2D-tensor, fails for iterate=False
                    kwargs[state_name] = \
                        brick.initial_state(state_name, batch_size,
                                            *args, **kwargs)
                    assert kwargs[state_name]
            states_given = only_given(application.states)
            assert len(states_given) == len(application.states)

            # Theano issue 1772
            for name, state in states_given.items():
                states_given[name] = tensor.unbroadcast(
                    state, *range(state.ndim))

            def scan_function(*args):
                args = list(args)
                arg_names = (list(sequences_given) + list(states_given) +
                             list(contexts_given))
                kwargs = dict(zip(arg_names, args))
                kwargs.update(rest_kwargs)
                outputs = getattr(brick,
                                  application_function.__name__)(iterate=False,
                                                                 **kwargs)
                # We want to save the computation graph returned by the
                # `application_function` when it is called inside the
                # `theano.scan`.
                application_call.inner_inputs = args
                application_call.inner_outputs = pack(outputs)
                return outputs

            outputs_info = (
                list(states_given.values()) + [None] *
                (len(application.outputs) - len(application.states)))
            result, updates = theano.scan(
                scan_function,
                sequences=list(sequences_given.values()),
                outputs_info=outputs_info,
                non_sequences=list(contexts_given.values()),
                n_steps=n_steps,
                go_backwards=reverse)
            result = pack(result)
            if return_initial_states:
                # Undo Subtensor
                for i in range(len(states_given)):
                    assert isinstance(result[i].owner.op,
                                      tensor.subtensor.Subtensor)
                    result[i] = result[i].owner.inputs[0]
            if updates:
                application_call.updates = dict_union(application_call.updates,
                                                      updates)

            return result