def initial_states(self, batch_size, **kwargs): return (pack(self.transition.initial_states( batch_size, **kwargs)) + pack(self.attention.initial_glimpses( batch_size, kwargs[self.attended_name])) + pack(self.topical_attention.initial_glimpses( batch_size, kwargs[self.topical_attended_name])) )
def initial_states(self, batch_size, **kwargs): return (pack(self.transition.initial_states(batch_size, **kwargs)) + pack( self.attention.initial_glimpses( batch_size, kwargs[self.attended_name])) + pack( self.topical_attention.initial_glimpses( batch_size, kwargs[self.topical_attended_name])))
def __init__(self, threshold, axis=None): axis = pack(axis) if axis is not None else () self.axis = set(axis) self.threshold = shared_floatx(threshold, "threshold") add_role(self.threshold, ALGORITHM_HYPERPARAMETER) if len(axis) != len(self.axis): raise ValueError("axis must be unique")
def apply(self, input_): child_input = input_ for child, application_method in zip(self.children, self.application_methods): output = getattr(child, application_method)(*pack(child_input)) child_input = output return output
def apply(self, application, *args, **kwargs): # extra_ndim is a mandatory parameter, but in order not to # confuse with positional inputs, it has to be extracted from # **kwargs extra_ndim = kwargs.get('extra_ndim', 0) inputs = dict(zip(application.inputs, args)) inputs.update(dict_subset(kwargs, application.inputs, must_have=False)) reshaped_inputs = inputs # To prevent pollution of the computation graph with no-ops if extra_ndim > 0: for name, input_ in inputs.items(): shape, ndim = input_.shape, input_.ndim # Remember extra_dims for reshaping the outputs correctly. # Does not matter from which input, since we assume # extra dimension match for all inputs. extra_dims = shape[:extra_ndim] new_first_dim = tensor.prod(shape[:extra_ndim + 1]) new_shape = tensor.join( 0, new_first_dim[None], shape[extra_ndim + 1:]) reshaped_inputs[name] = input_.reshape( new_shape, ndim=ndim - extra_ndim) outputs = wrapped.__get__(self, None)(**reshaped_inputs) if extra_ndim == 0: return outputs reshaped_outputs = [] for output in pack(outputs): shape, ndim = output.shape, output.ndim new_shape = tensor.join( 0, extra_dims, (shape[0] // tensor.prod(extra_dims))[None], shape[1:]) reshaped_outputs.append( output.reshape(new_shape, ndim=ndim + extra_ndim)) return reshaped_outputs
def apply(self, application, *args, **kwargs): # extra_ndim is a mandatory parameter, but in order not to # confuse with positional inputs, it has to be extracted from # **kwargs extra_ndim = kwargs.get("extra_ndim", 0) inputs = dict(zip(application.inputs, args)) inputs.update(dict_subset(kwargs, application.inputs, must_have=False)) reshaped_inputs = inputs # To prevent pollution of the computation graph with no-ops if extra_ndim > 0: for name, input_ in inputs.items(): shape, ndim = input_.shape, input_.ndim # Remember extra_dims for reshaping the outputs correctly. # Does not matter from which input, since we assume # extra dimension match for all inputs. extra_dims = shape[:extra_ndim] new_first_dim = tensor.prod(shape[: extra_ndim + 1]) new_shape = tensor.join(0, new_first_dim[None], shape[extra_ndim + 1 :]) reshaped_inputs[name] = input_.reshape(new_shape, ndim=ndim - extra_ndim) outputs = wrapped.__get__(self, None)(**reshaped_inputs) if extra_ndim == 0: return outputs reshaped_outputs = [] for output in pack(outputs): shape, ndim = output.shape, output.ndim new_shape = tensor.join(0, extra_dims, (shape[0] // tensor.prod(extra_dims))[None], shape[1:]) reshaped_outputs.append(output.reshape(new_shape, ndim=ndim + extra_ndim)) return reshaped_outputs
def apply(self, input_): child_input = input_ for _, application_method in zip(self.children, self.application_methods): output = application_method(*pack(child_input)) child_input = output return output
def apply(self, states, attended): states = tensor.repeat(states[None, :, :], attended.shape[0], axis=0) match_vectors = tensor.concatenate([states, attended], axis=2) energies = self.shallow.apply(*pack(match_vectors)) energies = energies.reshape(match_vectors.shape[:-1], ndim=match_vectors.ndim - 1) return energies
def expr(self, model, data, **kwargs): assert not model.supervised data = pack(data) data = [tensor.unbroadcast(var, *range(var.ndim)) for var in data] return theano.clone( self.cost, replace=dict(zip(self.inputs.values(), data)))
def check_sparse(rng, axis, num_init, shape, weights_init=Constant(1.)): weights = SparseND(axis=axis, num_init=num_init, weights_init=weights_init).generate(rng, shape) assert weights.shape == shape assert weights.dtype == theano.config.floatX if isinstance(num_init, numbers.Integral): nnz = numpy.prod([s for i, s in enumerate(shape) if i in pack(axis)]) * num_init assert numpy.count_nonzero(weights) == nnz else: atom_size = numpy.prod([s for i, s in enumerate(shape) if i not in pack(axis)]) nnz_atom = int(num_init * atom_size) num_atoms = numpy.prod([s for i, s in enumerate(shape) if i in pack(axis)]) nnz = nnz_atom * num_atoms assert numpy.count_nonzero(weights) == nnz
def check_sparse(rng, axis, num_init, shape, weights_init=Constant(1.)): weights = SparseND(axis=axis, num_init=num_init, weights_init=weights_init).generate(rng, shape) assert weights.shape == shape assert weights.dtype == theano.config.floatX if isinstance(num_init, numbers.Integral): nnz = numpy.prod( [s for i, s in enumerate(shape) if i in pack(axis)]) * num_init assert numpy.count_nonzero(weights) == nnz else: atom_size = numpy.prod( [s for i, s in enumerate(shape) if i not in pack(axis)]) nnz_atom = int(num_init * atom_size) num_atoms = numpy.prod( [s for i, s in enumerate(shape) if i in pack(axis)]) nnz = nnz_atom * num_atoms assert numpy.count_nonzero(weights) == nnz
def apply(self, *args): child_input = args for application_method in self.application_methods: output = application_method(*pack(child_input)) if not self.pruning_variables_initialized: self.layer_activities.append(output) child_input = output self.pruning_variables_initialized = True return output
def generate(self, rng, shape): axis_ind = pack(self.axis) other_ind = [i for i in range(len(shape)) if i not in axis_ind] axis_shapes = [shape[i] for i in axis_ind] other_shapes = [shape[i] for i in other_ind] matrix = super(SparseND, self).generate(rng, (numpy.prod(axis_shapes), numpy.prod(other_shapes))) unflattened = matrix.reshape(tuple(axis_shapes) + tuple(other_shapes)) wrong_ind = axis_ind + other_ind transp_ind = [wrong_ind.index(i) for i in range(len(shape))] return unflattened.transpose(transp_ind)
def scan_function(*args): args = list(args) arg_names = (list(sequences_given) + list(states_given) + list(contexts_given)) kwargs = dict(equizip(arg_names, args)) kwargs.update(rest_kwargs) outputs = application(iterate=False, **kwargs) # We want to save the computation graph returned by the # `application_function` when it is called inside the # `theano.scan`. application_call.inner_inputs = args application_call.inner_outputs = pack(outputs) return outputs
def scan_function(*args): args = list(args) arg_names = (list(sequences_given) + list(states_given) + list(contexts_given)) kwargs = dict(zip(arg_names, args)) kwargs.update(rest_kwargs) outputs = getattr(brick, application_function.__name__)( iterate=False, **kwargs) # We want to save the computation graph returned by the # `application_function` when it is called inside the # `theano.scan`. application_call.inner_inputs = args application_call.inner_outputs = pack(outputs) return outputs
def apply(self, *args): child_input = args for application_method in self.application_methods: output = application_method(*pack(child_input)) child_input = output return output
def apply(self, *args): output = args output = self.tanh.apply(*pack(output)) output = self.linear.apply(*pack(output)) return output
def __call__(self, *inputs, **kwargs): """Wraps an application method. This wrapper will provide some necessary pre- and post-processing of the Theano variables, such as tagging them with the brick that created them and naming them. These changes will apply to Theano variables given as positional arguments and keywords arguments. .. warning:: Properly set tags are important for correct functioning of the framework. Do not provide inputs to your apply method in a way different than passing them as positional or keyword arguments, e.g. as list or tuple elements. Notes ----- Application methods will allocate the brick parameters with a call :meth:`allocate` if they have not been allocated already. """ last = Application._last_brick_applied if last and last != self.brick and self.brick not in last.children: raise ValueError("The brick {} called an apply method of the" " brick {} without having it in the children" " list." .format(last, self.brick)) return_dict = kwargs.pop('return_dict', False) return_list = kwargs.pop('return_list', False) assert not return_list or not return_dict arg_names, varargs_name, _, _ = inspect.getargspec( self.application_method) arg_names = arg_names[1:] call = ApplicationCall(self.brick, self) if 'application_call' in arg_names: kwargs['application_call'] = call def copy_and_tag(variable, role, name): if Brick.print_shapes: variable = put_hook( variable, lambda x: logger.debug( "{}.{}.{}.shape = {}".format( self.brick.name, self.__name__, name, x.shape))) copy = variable.copy() copy.name = "{}_{}_{}".format(self.brick.name, self.__name__, name) copy.tag.application_call = call copy.tag.name = name copy.tag.role = role return copy if not self.brick.allocated: self.brick.allocate() if not self.brick.initialized and not self.brick.lazy: self.brick.initialize() inputs = list(inputs) for i, input_ in enumerate(inputs): name = (arg_names[i] if i < len(arg_names) else "{}_{}".format(varargs_name, i - len(arg_names))) if isinstance(input_, tensor.Variable): inputs[i] = copy_and_tag(input_, VariableRole.INPUT, name) for key, value in kwargs.items(): if isinstance(value, tensor.Variable): kwargs[key] = copy_and_tag(value, VariableRole.INPUT, key) Application._last_brick_applied = self.brick try: outputs = self.application_method(self.brick, *inputs, **kwargs) finally: Application._last_brick_applied = last # TODO allow user to return an OrderedDict outputs = pack(outputs) for i, output in enumerate(outputs): try: name = self.outputs[i] except: name = "output_{}".format(i) if isinstance(output, tensor.Variable): # TODO Tag with dimensions, axes, etc. for error-checking outputs[i] = copy_and_tag(outputs[i], VariableRole.OUTPUT, name) if return_list: return outputs if return_dict: return OrderedDict(zip(self.outputs, outputs)) return unpack(outputs)
def apply(self, states, attended, posTag): match_vectors = states + attended + posTag energies = self.shallow.apply(*pack(match_vectors)) energies = energies.reshape(match_vectors.shape[:-1], ndim=match_vectors.ndim - 1) return energies
def recurrent_apply(brick, application, application_call, *args, **kwargs): """Iterates a transition function. Parameters ---------- iterate : bool If ``True`` iteration is made. By default ``True``. reverse : bool If ``True``, the sequences are processed in backward direction. ``False`` by default. return_initial_states : bool If ``True``, initial states are included in the returned state tensors. ``False`` by default. """ # Extract arguments related to iteration and immediately relay the # call to the wrapped function if `iterate=False` iterate = kwargs.pop('iterate', True) if not iterate: return application_function(brick, *args, **kwargs) reverse = kwargs.pop('reverse', False) return_initial_states = kwargs.pop('return_initial_states', False) # Push everything to kwargs for arg, arg_name in zip(args, arg_names): kwargs[arg_name] = arg # Make sure that all arguments for scan are tensor variables scan_arguments = (application.sequences + application.states + application.contexts) for arg in scan_arguments: if arg in kwargs: if kwargs[arg] is None: del kwargs[arg] else: kwargs[arg] = tensor.as_tensor_variable(kwargs[arg]) # Check which sequence and contexts were provided sequences_given = dict_subset(kwargs, application.sequences, must_have=False) contexts_given = dict_subset(kwargs, application.contexts, must_have=False) # Determine number of steps and batch size. if len(sequences_given): # TODO Assumes 1 time dim! shape = list(sequences_given.values())[0].shape n_steps = shape[0] batch_size = shape[1] else: # TODO Raise error if n_steps and batch_size not found? n_steps = kwargs.pop('n_steps') batch_size = kwargs.pop('batch_size') # Handle the rest kwargs rest_kwargs = { key: value for key, value in kwargs.items() if key not in scan_arguments } for value in rest_kwargs.values(): if (isinstance(value, Variable) and not is_shared_variable(value)): logger.warning("unknown input {}".format(value) + unknown_scan_input) # Ensure that all initial states are available. initial_states = brick.initial_states(batch_size, as_dict=True, *args, **kwargs) for state_name in application.states: dim = brick.get_dim(state_name) if state_name in kwargs: if isinstance(kwargs[state_name], NdarrayInitialization): kwargs[state_name] = tensor.alloc( kwargs[state_name].generate(brick.rng, (1, dim)), batch_size, dim) elif isinstance(kwargs[state_name], Application): kwargs[state_name] = (kwargs[state_name](state_name, batch_size, *args, **kwargs)) else: try: kwargs[state_name] = initial_states[state_name] except KeyError: raise KeyError( "no initial state for '{}' of the brick {}".format( state_name, brick.name)) states_given = dict_subset(kwargs, application.states) # Theano issue 1772 for name, state in states_given.items(): states_given[name] = tensor.unbroadcast( state, *range(state.ndim)) def scan_function(*args): args = list(args) arg_names = (list(sequences_given) + [ output for output in application.outputs if output in application.states ] + list(contexts_given)) kwargs = dict(equizip(arg_names, args)) kwargs.update(rest_kwargs) outputs = application(iterate=False, **kwargs) # We want to save the computation graph returned by the # `application_function` when it is called inside the # `theano.scan`. application_call.inner_inputs = args application_call.inner_outputs = pack(outputs) return outputs outputs_info = [ states_given[name] if name in application.states else None for name in application.outputs ] result, updates = theano.scan( scan_function, sequences=list(sequences_given.values()), outputs_info=outputs_info, non_sequences=list(contexts_given.values()), n_steps=n_steps, go_backwards=reverse, name='{}_{}_scan'.format(brick.name, application.application_name)) result = pack(result) if return_initial_states: # Undo Subtensor for i in range(len(states_given)): assert isinstance(result[i].owner.op, tensor.subtensor.Subtensor) result[i] = result[i].owner.inputs[0] if updates: application_call.updates = dict_union(application_call.updates, updates) return result
def initial_states(self, batch_size, **kwargs): return (pack(self.transition.initial_states( batch_size, **kwargs)) + pack([tensor.zeros((batch_size, self.representationDim/2))]))
def recurrent_apply(brick, application, application_call, *args, **kwargs): """Iterates a transition function. Parameters ---------- iterate : bool If ``True`` iteration is made. By default ``True``. reverse : bool If ``True``, the sequences are processed in backward direction. ``False`` by default. return_initial_states : bool If ``True``, initial states are included in the returned state tensors. ``False`` by default. .. todo:: * Handle `updates` returned by the :func:`theano.scan` routine. * ``kwargs`` has a random order; check if this is a problem. """ # Extract arguments related to iteration and immediately relay the # call to the wrapped function if `iterate=False` iterate = kwargs.pop('iterate', True) if not iterate: return application_function(brick, *args, **kwargs) reverse = kwargs.pop('reverse', False) return_initial_states = kwargs.pop('return_initial_states', False) # Push everything to kwargs for arg, arg_name in zip(args, arg_names): kwargs[arg_name] = arg # Make sure that all arguments for scan are tensor variables scan_arguments = (application.sequences + application.states + application.contexts) for arg in scan_arguments: if arg in kwargs: if kwargs[arg] is None: del kwargs[arg] else: kwargs[arg] = tensor.as_tensor_variable(kwargs[arg]) # Check which sequence and contexts were provided sequences_given = dict_subset(kwargs, application.sequences, must_have=False) contexts_given = dict_subset(kwargs, application.contexts, must_have=False) # Determine number of steps and batch size. if len(sequences_given): # TODO Assumes 1 time dim! shape = list(sequences_given.values())[0].shape if not iterate: batch_size = shape[0] else: n_steps = shape[0] batch_size = shape[1] else: # TODO Raise error if n_steps and batch_size not found? n_steps = kwargs.pop('n_steps') batch_size = kwargs.pop('batch_size') # Handle the rest kwargs rest_kwargs = {key: value for key, value in kwargs.items() if key not in scan_arguments} for value in rest_kwargs.values(): if (isinstance(value, Variable) and not is_shared_variable(value)): logger.warning("unknown input {}".format(value) + unknown_scan_input) # Ensure that all initial states are available. for state_name in application.states: dim = brick.get_dim(state_name) if state_name in kwargs: if isinstance(kwargs[state_name], NdarrayInitialization): kwargs[state_name] = tensor.alloc( kwargs[state_name].generate(brick.rng, (1, dim)), batch_size, dim) elif isinstance(kwargs[state_name], Application): kwargs[state_name] = ( kwargs[state_name](state_name, batch_size, *args, **kwargs)) else: # TODO init_func returns 2D-tensor, fails for iterate=False kwargs[state_name] = ( brick.initial_state(state_name, batch_size, *args, **kwargs)) assert kwargs[state_name] states_given = dict_subset(kwargs, application.states) # Theano issue 1772 for name, state in states_given.items(): states_given[name] = tensor.unbroadcast(state, *range(state.ndim)) def scan_function(*args): args = list(args) arg_names = (list(sequences_given) + [output for output in application.outputs if output in application.states] + list(contexts_given)) kwargs = dict(equizip(arg_names, args)) kwargs.update(rest_kwargs) outputs = application(iterate=False, **kwargs) # We want to save the computation graph returned by the # `application_function` when it is called inside the # `theano.scan`. application_call.inner_inputs = args application_call.inner_outputs = pack(outputs) return outputs outputs_info = [ states_given[name] if name in application.states else None for name in application.outputs] result, updates = theano.scan( scan_function, sequences=list(sequences_given.values()), outputs_info=outputs_info, non_sequences=list(contexts_given.values()), n_steps=n_steps, go_backwards=reverse) result = pack(result) if return_initial_states: # Undo Subtensor for i in range(len(states_given)): assert isinstance(result[i].owner.op, tensor.subtensor.Subtensor) result[i] = result[i].owner.inputs[0] if updates: application_call.updates = dict_union(application_call.updates, updates) return result
def apply(self, *args, **kwargs): outputs = super(Merge, self).apply(*args, **kwargs) outputs = pack(outputs) # Sum is often faster than tensor.sum(outputs, axis=0) for a # small number of outputs return sum(outputs)
def apply(self, bound_application, *args, **kwargs): as_dict = kwargs.pop('as_dict', False) as_list = kwargs.pop('as_list', False) call_id = kwargs.pop('call_id', None) if as_list and as_dict: raise ValueError brick = bound_application.brick # Find the names of the inputs to the application method args_names, varargs_name, _, _ = inspect.getargspec( self.application_function) args_names = args_names[1:] # Construct the ApplicationCall, used to store data in for this call call = ApplicationCall(bound_application) call.metadata['call_id'] = call_id args = list(args) if 'application' in args_names: args.insert(args_names.index('application'), bound_application) if 'application_call' in args_names: args.insert(args_names.index('application_call'), call) # Allocate before applying, and optionally initialize if not brick.allocated: brick.allocate() # Annotate all the input variables which are Theano variables for i, input_ in enumerate(args): if isinstance(input_, tensor.Variable): if i < len(args_names): name = args_names[i] else: name = "{}_{}".format(varargs_name, i - len(args_names)) args[i] = copy_and_tag(input_, brick, call, INPUT, self.name, name) for name, input_ in kwargs.items(): if isinstance(input_, tensor.Variable): kwargs[name] = copy_and_tag(input_, brick, call, INPUT, self.name, name) # Run the application method on the annotated variables last_brick = self.call_stack[-1] if self.call_stack else None if (last_brick and brick is not last_brick and brick not in last_brick.children): warnings.warn('Brick ' + str(self.call_stack[-1]) + ' tries ' 'to call brick ' + str(self.brick) + ' which ' 'is not in the list of its children. This could ' 'be caused because an @application decorator is ' 'missing.') self.call_stack.append(brick) try: outputs = self.application_function(brick, *args, **kwargs) outputs = pack(outputs) finally: self.call_stack.pop() # Rename and annotate output variables for i, output in enumerate(outputs): if isinstance(output, tensor.Variable): try: name = bound_application.outputs[i] except AttributeError: name = "output_{}".format(i) except IndexError: reraise_as(ValueError("Unexpected outputs")) # TODO Tag with dimensions, axes, etc. for error-checking outputs[i] = copy_and_tag(outputs[i], brick, call, OUTPUT, self.name, name) # Return values if as_list: return outputs if as_dict: return OrderedDict(zip(bound_application.outputs, outputs)) return unpack(outputs)
def __call__(self, *inputs, **kwargs): """Wraps an application method. This wrapper will provide some necessary pre- and post-processing of the Theano variables, such as tagging them with the brick that created them and naming them. These changes will apply to Theano variables given as positional arguments and keywords arguments. .. warning:: Properly set tags are important for correct functioning of the framework. Do not provide inputs to your apply method in a way different than passing them as positional or keyword arguments, e.g. as list or tuple elements. Notes ----- Application methods will allocate the brick parameters with a call :meth:`allocate` if they have not been allocated already. """ last = Application._last_brick_applied if last and last != self.brick and self.brick not in last.children: raise ValueError("The brick {} called an apply method of the" " brick {} without having it in the children" " list.".format(last, self.brick)) return_dict = kwargs.pop('return_dict', False) return_list = kwargs.pop('return_list', False) assert not return_list or not return_dict arg_names, varargs_name, _, _ = inspect.getargspec( self.application_method) arg_names = arg_names[1:] call = ApplicationCall(self.brick, self) if 'application_call' in arg_names: kwargs['application_call'] = call def copy_and_tag(variable, role, name): if Brick.print_shapes: variable = put_hook( variable, lambda x: logger.debug("{}.{}.{}.shape = {}".format( self.brick.name, self.__name__, name, x.shape))) copy = variable.copy() copy.name = "{}_{}_{}".format(self.brick.name, self.__name__, name) copy.tag.application_call = call copy.tag.name = name copy.tag.role = role return copy if not self.brick.allocated: self.brick.allocate() if not self.brick.initialized and not self.brick.lazy: self.brick.initialize() inputs = list(inputs) for i, input_ in enumerate(inputs): name = (arg_names[i] if i < len(arg_names) else "{}_{}".format( varargs_name, i - len(arg_names))) if isinstance(input_, tensor.Variable): inputs[i] = copy_and_tag(input_, VariableRole.INPUT, name) for key, value in kwargs.items(): if isinstance(value, tensor.Variable): kwargs[key] = copy_and_tag(value, VariableRole.INPUT, key) Application._last_brick_applied = self.brick try: outputs = self.application_method(self.brick, *inputs, **kwargs) finally: Application._last_brick_applied = last # TODO allow user to return an OrderedDict outputs = pack(outputs) for i, output in enumerate(outputs): try: name = self.outputs[i] except: name = "output_{}".format(i) if isinstance(output, tensor.Variable): # TODO Tag with dimensions, axes, etc. for error-checking outputs[i] = copy_and_tag(outputs[i], VariableRole.OUTPUT, name) if return_list: return outputs if return_dict: return OrderedDict(zip(self.outputs, outputs)) return unpack(outputs)
def expr(self, model, data, **kwargs): assert not model.supervised data = pack(data) data = [tensor.unbroadcast(var, *range(var.ndim)) for var in data] return theano.clone(self.cost, replace=dict(zip(self.inputs.values(), data)))
def apply(self, bound_application, *args, **kwargs): as_dict = kwargs.pop('as_dict', False) as_list = kwargs.pop('as_list', False) if as_list and as_dict: raise ValueError brick = bound_application.brick # Find the names of the inputs to the application method args_names, varargs_name, _, _ = inspect.getargspec( self.application_function) args_names = args_names[1:] # Construct the ApplicationCall, used to store data in for this call call = ApplicationCall(brick, bound_application) args = list(args) if 'application' in args_names: args.insert(args_names.index('application'), bound_application) if 'application_call' in args_names: args.insert(args_names.index('application_call'), call) # Allocate before applying, and optionally initialize if not brick.allocated: brick.allocate() if not brick.initialized and not brick.lazy: brick.initialize() # Annotate all the input variables which are Theano variables def copy_and_tag(variable, role, name): """Helper method to copy a variable and annotate it.""" copy = variable.copy() # Theano name copy.name = _variable_name(brick.name, self.name, name) add_annotation(copy, brick) add_annotation(copy, call) # Blocks name copy.tag.name = name add_role(copy, role) return copy for i, input_ in enumerate(args): if isinstance(input_, tensor.Variable): if i < len(args_names): name = args_names[i] else: name = "{}_{}".format(varargs_name, i - len(args_names)) args[i] = copy_and_tag(input_, INPUT, name) for name, input_ in kwargs.items(): if isinstance(input_, tensor.Variable): kwargs[name] = copy_and_tag(input_, INPUT, name) # Run the application method on the annotated variables if self.call_stack and brick is not self.call_stack[-1] and \ brick not in self.call_stack[-1].children: raise ValueError('Brick ' + str(self.call_stack[-1]) + ' tries ' 'to call brick ' + str(self.brick) + ' which ' 'is not in the list of its children.') self.call_stack.append(brick) try: outputs = self.application_function(brick, *args, **kwargs) outputs = pack(outputs) finally: self.call_stack.pop() # Rename and annotate output variables for i, output in enumerate(outputs): if isinstance(output, tensor.Variable): try: name = bound_application.outputs[i] except AttributeError: name = "output_{}".format(i) except IndexError: reraise_as(ValueError("Unexpected outputs")) # TODO Tag with dimensions, axes, etc. for error-checking outputs[i] = copy_and_tag(outputs[i], OUTPUT, name) # Return values if as_list: return outputs if as_dict: return OrderedDict(zip(bound_application.outputs, outputs)) return unpack(outputs)
def apply(self, states, attended): states = self.linear.apply(*pack(states)) match_vectors = tensor.tensordot(attended, states, axes=[2, 1])[:, 0, :] energies = tensor.exp(match_vectors) return energies
batch_size = 10 n_steps = 50 transitions = [SimpleRecurrent, GatedRecurrent, LSTM] dims = [100, 250, 1000, 2000] table = [] for transition in transitions: row = [] for dim in dims: brick = transition(dim=dim, activation=Tanh()) input_vars = {name: tensor.tensor3(name) for name in brick.apply.sequences if name != 'mask'} output_vars = pack(brick.apply(**input_vars)) cost = sum(output.sum() for output in output_vars) grads = tensor.grad(cost, list(brick.params)) function = theano.function(input_vars.values(), grads) inputs = {name: numpy.random.rand(n_steps, batch_size, brick.get_dim(name)) .astype(theano.config.floatX) for name in input_vars} result = timeit.timeit(lambda: function(**inputs), number=5) / 5 print transition.__name__, dim, result row.append(result) table.append(row) sep = '|' print sep, sep, sep.join(str(dim) for dim in dims), sep print '---'.join(sep * (len(dims) + 2))
def __init__(self, threshold, axis=None): axis = pack(axis) if axis is not None else () self.axis = set(axis) self.threshold = shared_floatx(threshold) if len(axis) != len(self.axis): raise ValueError("axis must be unique")
def apply(self, bound_application, *args, **kwargs): as_dict = kwargs.pop('as_dict', False) as_list = kwargs.pop('as_list', False) if as_list and as_dict: raise ValueError brick = bound_application.brick # Find the names of the inputs to the application method args_names, varargs_name, _, _ = inspect.getargspec( self.application_function) args_names = args_names[1:] # Construct the ApplicationCall, used to store data in for this call call = ApplicationCall(bound_application) args = list(args) if 'application' in args_names: args.insert(args_names.index('application'), bound_application) if 'application_call' in args_names: args.insert(args_names.index('application_call'), call) # Allocate before applying, and optionally initialize if not brick.allocated: brick.allocate() # Annotate all the input variables which are Theano variables def copy_and_tag(variable, role, name): """Helper method to copy a variable and annotate it.""" copy = variable.copy() # Theano name copy.name = _variable_name(brick.name, self.name, name) add_annotation(copy, brick) add_annotation(copy, call) # Blocks name copy.tag.name = name add_role(copy, role) return copy for i, input_ in enumerate(args): if isinstance(input_, tensor.Variable): if i < len(args_names): name = args_names[i] else: name = "{}_{}".format(varargs_name, i - len(args_names)) args[i] = copy_and_tag(input_, INPUT, name) for name, input_ in kwargs.items(): if isinstance(input_, tensor.Variable): kwargs[name] = copy_and_tag(input_, INPUT, name) # Run the application method on the annotated variables last_brick = self.call_stack[-1] if self.call_stack else None if (last_brick and brick is not last_brick and brick not in last_brick.children): raise ValueError('Brick ' + str(self.call_stack[-1]) + ' tries ' 'to call brick ' + str(self.brick) + ' which ' 'is not in the list of its children.') self.call_stack.append(brick) try: outputs = self.application_function(brick, *args, **kwargs) outputs = pack(outputs) finally: self.call_stack.pop() # Rename and annotate output variables for i, output in enumerate(outputs): if isinstance(output, tensor.Variable): try: name = bound_application.outputs[i] except AttributeError: name = "output_{}".format(i) except IndexError: reraise_as(ValueError("Unexpected outputs")) # TODO Tag with dimensions, axes, etc. for error-checking outputs[i] = copy_and_tag(outputs[i], OUTPUT, name) # Return values if as_list: return outputs if as_dict: return OrderedDict(zip(bound_application.outputs, outputs)) return unpack(outputs)
def recurrent_apply(brick, *args, **kwargs): """Iterates a transition function. Parameters ---------- iterate : bool If ``True`` iteration is made. By default ``True``. reverse : bool If ``True``, the sequences are processed in backward direction. ``False`` by default. return_initial_states : bool If ``True``, initial states are included in the returned state tensors. ``False`` by default. .. todo:: * Handle `updates` returned by the `theano.scan` routine. * ``kwargs`` has a random order; check if this is a problem. """ # Extract arguments related to iteration. iterate = kwargs.pop('iterate', True) reverse = kwargs.pop('reverse', False) return_initial_states = kwargs.pop('return_initial_states', False) # Push everything to kwargs for arg, arg_name in zip(args, arg_names): kwargs[arg_name] = arg # Separate kwargs that aren't sequence, context or state variables scan_arguments = (application.sequences + application.states + application.contexts) rest_kwargs = { key: value for key, value in kwargs.items() if key not in scan_arguments } # Check what is given and what is not def only_given(arg_names): return OrderedDict((arg_name, kwargs[arg_name]) for arg_name in arg_names if kwargs.get(arg_name)) sequences_given = only_given(application.sequences) contexts_given = only_given(application.contexts) # TODO Assumes 1 time dim! if len(sequences_given): shape = list(sequences_given.values())[0].shape if not iterate: batch_size = shape[0] else: n_steps = shape[0] batch_size = shape[1] else: # TODO Raise error if n_steps and batch_size not found? n_steps = kwargs.pop('n_steps') batch_size = kwargs.pop('batch_size') # Ensure that all initial states are available. for state_name in application.states: dim = brick.get_dim(state_name) if state_name in kwargs: if isinstance(kwargs[state_name], NdarrayInitialization): kwargs[state_name] = tensor.alloc( kwargs[state_name].generate(brick.rng, (1, dim)), batch_size, dim) elif isinstance(kwargs[state_name], Application): kwargs[state_name] = \ kwargs[state_name](state_name, batch_size, *args, **kwargs) else: # TODO init_func returns 2D-tensor, fails for iterate=False kwargs[state_name] = \ brick.initial_state(state_name, batch_size, *args, **kwargs) assert kwargs[state_name] states_given = only_given(application.states) assert len(states_given) == len(application.states) # Theano issue 1772 for name, state in states_given.items(): states_given[name] = tensor.unbroadcast( state, *range(state.ndim)) # Apply methods if not iterate: return application_method(brick, **kwargs) def scan_function(*args): args = list(args) arg_names = (list(sequences_given) + list(states_given) + list(contexts_given)) kwargs = dict(zip(arg_names, args)) kwargs.update(rest_kwargs) return application_method(brick, **kwargs) outputs_info = ( list(states_given.values()) + [None] * (len(application.outputs) - len(application.states))) result, updates = theano.scan( scan_function, sequences=list(sequences_given.values()), outputs_info=outputs_info, non_sequences=list(contexts_given.values()), n_steps=n_steps, go_backwards=reverse) result = pack(result) if return_initial_states: # Undo Subtensor for i in range(len(states_given)): assert isinstance(result[i].owner.op, tensor.subtensor.Subtensor) result[i] = result[i].owner.inputs[0] if updates: list(updates.values())[0].owner.tag.updates = updates return result
def recurrent_apply(brick, *args, **kwargs): """Iterates a transition function. Parameters ---------- iterate : bool If ``True`` iteration is made. By default ``True``. reverse : bool If ``True``, the sequences are processed in backward direction. ``False`` by default. return_initial_states : bool If ``True``, initial states are included in the returned state tensors. ``False`` by default. .. todo:: * Handle `updates` returned by the `theano.scan` routine. * ``kwargs`` has a random order; check if this is a problem. """ # Extract arguments related to iteration. iterate = kwargs.pop('iterate', True) reverse = kwargs.pop('reverse', False) return_initial_states = kwargs.pop('return_initial_states', False) # Push everything to kwargs for arg, arg_name in zip(args, arg_names): kwargs[arg_name] = arg # Separate kwargs that aren't sequence, context or state variables scan_arguments = (application.sequences + application.states + application.contexts) rest_kwargs = {key: value for key, value in kwargs.items() if key not in scan_arguments} # Check what is given and what is not def only_given(arg_names): return OrderedDict((arg_name, kwargs[arg_name]) for arg_name in arg_names if kwargs.get(arg_name)) sequences_given = only_given(application.sequences) contexts_given = only_given(application.contexts) # TODO Assumes 1 time dim! if len(sequences_given): shape = list(sequences_given.values())[0].shape if not iterate: batch_size = shape[0] else: n_steps = shape[0] batch_size = shape[1] else: # TODO Raise error if n_steps and batch_size not found? n_steps = kwargs.pop('n_steps') batch_size = kwargs.pop('batch_size') # Ensure that all initial states are available. for state_name in application.states: dim = brick.get_dim(state_name) if state_name in kwargs: if isinstance(kwargs[state_name], NdarrayInitialization): kwargs[state_name] = tensor.alloc( kwargs[state_name].generate(brick.rng, (1, dim)), batch_size, dim) elif isinstance(kwargs[state_name], Application): kwargs[state_name] = \ kwargs[state_name](state_name, batch_size, *args, **kwargs) else: # TODO init_func returns 2D-tensor, fails for iterate=False kwargs[state_name] = \ brick.initial_state(state_name, batch_size, *args, **kwargs) assert kwargs[state_name] states_given = only_given(application.states) assert len(states_given) == len(application.states) # Theano issue 1772 for name, state in states_given.items(): states_given[name] = tensor.unbroadcast(state, *range(state.ndim)) # Apply methods if not iterate: return application_method(brick, **kwargs) def scan_function(*args): args = list(args) arg_names = (list(sequences_given) + list(states_given) + list(contexts_given)) kwargs = dict(zip(arg_names, args)) kwargs.update(rest_kwargs) return application_method(brick, **kwargs) outputs_info = (list(states_given.values()) + [None] * (len(application.outputs) - len(application.states))) result, updates = theano.scan( scan_function, sequences=list(sequences_given.values()), outputs_info=outputs_info, non_sequences=list(contexts_given.values()), n_steps=n_steps, go_backwards=reverse) result = pack(result) if return_initial_states: # Undo Subtensor for i in range(len(states_given)): assert isinstance(result[i].owner.op, tensor.subtensor.Subtensor) result[i] = result[i].owner.inputs[0] if updates: list(updates.values())[0].owner.tag.updates = updates return result
def recurrent_apply(brick, application, application_call, *args, **kwargs): """Iterates a transition function. Parameters ---------- iterate : bool If ``True`` iteration is made. By default ``True``. reverse : bool If ``True``, the sequences are processed in backward direction. ``False`` by default. return_initial_states : bool If ``True``, initial states are included in the returned state tensors. ``False`` by default. .. todo:: * Handle `updates` returned by the :func:`theano.scan` routine. * ``kwargs`` has a random order; check if this is a problem. """ # Extract arguments related to iteration and immediately relay the # call to the wrapped function if `iterate=False` iterate = kwargs.pop('iterate', True) if not iterate: return application_function(brick, *args, **kwargs) reverse = kwargs.pop('reverse', False) return_initial_states = kwargs.pop('return_initial_states', False) # Push everything to kwargs for arg, arg_name in zip(args, arg_names): kwargs[arg_name] = arg # Separate sequences, states and contexts scan_arguments = (application.sequences + application.states + application.contexts) # Check what is given and what is not def only_given(arg_names): return OrderedDict((arg_name, kwargs[arg_name]) for arg_name in arg_names if kwargs.get(arg_name)) sequences_given = only_given(application.sequences) contexts_given = only_given(application.contexts) # TODO Assumes 1 time dim! if len(sequences_given): shape = list(sequences_given.values())[0].shape if not iterate: batch_size = shape[0] else: n_steps = shape[0] batch_size = shape[1] else: # TODO Raise error if n_steps and batch_size not found? n_steps = kwargs.pop('n_steps') batch_size = kwargs.pop('batch_size') # Handle the rest kwargs rest_kwargs = { key: value for key, value in kwargs.items() if key not in scan_arguments } for value in rest_kwargs.values(): if (isinstance(value, Variable) and not is_shared_variable(value)): warnings.warn( 'Your function uses a non-shared variable other than' ' those given by scan explicitly. That can' ' significantly slow down `tensor.grad` call.' ' Did you forget to declare it in `contexts`?') # Ensure that all initial states are available. for state_name in application.states: dim = brick.get_dim(state_name) if state_name in kwargs: if isinstance(kwargs[state_name], NdarrayInitialization): kwargs[state_name] = tensor.alloc( kwargs[state_name].generate(brick.rng, (1, dim)), batch_size, dim) elif isinstance(kwargs[state_name], Application): kwargs[state_name] = \ kwargs[state_name](state_name, batch_size, *args, **kwargs) else: # TODO init_func returns 2D-tensor, fails for iterate=False kwargs[state_name] = \ brick.initial_state(state_name, batch_size, *args, **kwargs) assert kwargs[state_name] states_given = only_given(application.states) assert len(states_given) == len(application.states) # Theano issue 1772 for name, state in states_given.items(): states_given[name] = tensor.unbroadcast( state, *range(state.ndim)) def scan_function(*args): args = list(args) arg_names = (list(sequences_given) + list(states_given) + list(contexts_given)) kwargs = dict(zip(arg_names, args)) kwargs.update(rest_kwargs) outputs = getattr(brick, application_function.__name__)(iterate=False, **kwargs) # We want to save the computation graph returned by the # `application_function` when it is called inside the # `theano.scan`. application_call.inner_inputs = args application_call.inner_outputs = pack(outputs) return outputs outputs_info = ( list(states_given.values()) + [None] * (len(application.outputs) - len(application.states))) result, updates = theano.scan( scan_function, sequences=list(sequences_given.values()), outputs_info=outputs_info, non_sequences=list(contexts_given.values()), n_steps=n_steps, go_backwards=reverse) result = pack(result) if return_initial_states: # Undo Subtensor for i in range(len(states_given)): assert isinstance(result[i].owner.op, tensor.subtensor.Subtensor) result[i] = result[i].owner.inputs[0] if updates: application_call.updates = dict_union(application_call.updates, updates) return result