def _compute_over_subset(self, subset, inputs, targets,
                             monitors_dict, monitor_function, monitors_outservice_dict,
                             plot):
        inputs = raise_to_list(inputs)
        targets = raise_to_list(targets)
        if inputs is not None and len(monitors_dict) > 0:
            monitors = {key: [] for key in monitors_dict.keys()}
            data = [minibatch(input, self.batch_size, self.min_batch_size) for input in inputs]
            if targets is not None and not self.unsupervised:
                data += [minibatch(target, self.batch_size, self.min_batch_size) for target in targets]

            for batch in min_normalized_izip(*data):
                _outs = raise_to_list(monitor_function(*batch))
                current_monitors = zip(monitors_dict.keys(), _outs)
                for name, val in current_monitors:
                    val = numpy.asarray(val)
                    monitors[name].append(val)

            # get the mean values for the batches
            current_mean_monitors = {key: numpy.mean(vals, 0) for key, vals in monitors.items()}
            # log the mean values!
            log.info('%s monitors: %s', subset, str(current_mean_monitors))
            # send the values to their outservices
            for name, service in monitors_outservice_dict.items():
                if name in current_mean_monitors and service:
                    service.write(current_mean_monitors[name], subset)
            # if there is a plot, also send them over!
            if plot:
                plot.update_plots(epoch=self.epoch_counter, monitors=current_mean_monitors)
Exemple #2
0
    def compile_run_fn(self):
        """
        This is a helper function to compile the f_run function for computing the model's outputs given inputs.
        Compile and set the f_run function used for `run()`.

        It sets the `self.f_run` attribute to the f_run function.

        .. note::
            The run function defaults like so::

                self.f_run = function(inputs  = raise_to_list(self.get_inputs()),
                                      outputs = raise_to_list(self.get_outputs()),
                                      updates = self.get_updates(),
                                      name    = 'f_run')
        """
        if not hasattr(self, 'f_run'):
            log.debug("Compiling f_run...")
            t = time.time()
            outputs = raise_to_list(self.get_outputs())
            if outputs is not None and len(outputs) == 1:
                outputs = outputs[0]
            self.f_run = function(inputs=raise_to_list(self.get_inputs()),
                                  outputs=outputs,
                                  updates=self.get_updates(),
                                  name='f_run')
            log.debug("Compilation done. Took %s",
                      make_time_units_string(time.time() - t))
        else:
            log.warn('f_run already exists!')
Exemple #3
0
    def _compute_over_subset(self, subset, inputs, targets,
                             monitors_dict, monitor_function, monitors_outservice_dict,
                             plot):
        inputs = raise_to_list(inputs)
        targets = raise_to_list(targets)
        if inputs is not None and len(monitors_dict) > 0:
            monitors = {key: [] for key in monitors_dict.keys()}
            data = [minibatch(input, self.batch_size, self.min_batch_size) for input in inputs]
            if targets is not None and not self.unsupervised:
                data += [minibatch(target, self.batch_size, self.min_batch_size) for target in targets]

            for batch in min_normalized_izip(*data):
                _outs = raise_to_list(monitor_function(*batch))
                current_monitors = zip(monitors_dict.keys(), _outs)
                for name, val in current_monitors:
                    val = numpy.asarray(val)
                    monitors[name].append(val)

            # get the mean values for the batches
            current_mean_monitors = {key: numpy.mean(vals, 0) for key, vals in monitors.items()}
            # log the mean values!
            log.info('%s monitors: %s', subset, str(current_mean_monitors))
            # send the values to their outservices
            for name, service in monitors_outservice_dict.items():
                if name in current_mean_monitors and service:
                    service.write(current_mean_monitors[name], subset)
            # if there is a plot, also send them over!
            if plot:
                plot.update_plots(epoch=self.epoch_counter, monitors=current_mean_monitors)
Exemple #4
0
    def compile_run_fn(self):
        """
        This is a helper function to compile the f_run function for computing the model's outputs given inputs.
        Compile and set the f_run function used for `run()`.

        It sets the `self.f_run` attribute to the f_run function.

        .. note::
            The run function defaults like so::

                self.f_run = function(inputs  = raise_to_list(self.get_inputs()),
                                      outputs = raise_to_list(self.get_outputs()),
                                      updates = self.get_updates(),
                                      name    = 'f_run')
        """
        if not hasattr(self, 'f_run'):
            log.debug("Compiling f_run...")
            t = time.time()
            outputs = raise_to_list(self.get_outputs())
            if outputs is not None and len(outputs) == 1:
                outputs = outputs[0]
            self.f_run = function(inputs  = raise_to_list(self.get_inputs()),
                                  outputs = outputs,
                                  updates = self.get_updates(),
                                  name    = 'f_run')
            log.debug("Compilation done. Took %s", make_time_units_string(time.time() - t))
        else:
            log.warn('f_run already exists!')
Exemple #5
0
    def __init__(self, inputs=None, outputs=None, function=None, **kwargs):
        """
        Parameters
        ----------
        inputs : List of [tuple(shape, `Theano.TensorType`)]
            List of [tuple(shape, `Theano.TensorType`)] or None describing the inputs to use for this layer.
            `shape` will be a monad tuple representing known sizes for each dimension in the `Theano.TensorType`.
            The length of `shape` should be equal to number of dimensions in `Theano.TensorType`, where the shape
            element is an integer representing the size for its dimension, or None if the shape isn't known.
            For example, if you have a matrix with unknown batch size but fixed feature size of 784, `shape` would
            be: (None, 784). The full form of `inputs` would be:
            [((None, 784), <TensorType(float32, matrix)>)].
        outputs : List of [int or shape tuple]
            The dimensionality of the output(s) for this model. Shape here is the shape monad described in `inputs`.
        function : theano expression, optional
            If you want to pass a custom Theano expression to run on the inputs, you can do that here. This is
            mostly to enable a wrapper for processing data when adding layers to a :class:`opendeep.models.container`
            object, such as a :class:`opendeep.models.Prototype`.
        """
        self._classname = self.__class__.__name__
        self.inputs = raise_to_list(inputs)
        self.output_size = raise_to_list(kwargs.get('output_size', outputs))
        self.function = function
        self.args = {}
        self.args = add_kwargs_to_dict(kwargs.copy(), self.args)
        self.args['inputs'] = self.inputs
        if self.output_size is not None:
            self.args['output_size'] = self.output_size
        if self.function is not None:
            self.args['function'] = self.function
        # Don't know the position of switches!
        self.switches_on = None

        log.debug("Creating a new ModifyLayer: %s with args: %s" % (self._classname, str(self.args)))
Exemple #6
0
    def get_gradient(self,
                     starting_gradient=None,
                     cost=None,
                     additional_cost=None):
        """
        This method allows you to define the gradient for this model manually. It should either work with a provided
        starting gradient (from upstream layers/models), or grab the training cost if no start gradient is provided.

        Theano's subgraph gradient function specified here:
        http://deeplearning.net/software/theano/library/gradient.html#theano.gradient.subgraph_grad

        .. warning::
            If the gradients of cost with respect to any of the start variables is already part of the
            start dictionary, then it may be counted twice with respect
            to wrt (`get_params()`) and end (`get_inputs()`).

        You should only implement this method if you want to manually define your gradients for the model.

        Parameters
        ----------
        starting_gradient : dictionary of {variable: known_gradient}, optional
            The starting, known gradients for parameters.
        cost : theano expression, optional
            The cost expression to use when calculating the gradients. Defaults to `get_train_cost()`.
        additional_cost : theano expression, optional
            Any additional cost to add to the gradient.

        Returns
        -------
        tuple
            (Gradient with respect to params, gradient with respect to inputs)
        """
        # check if starting gradients was provided.
        # if there are known gradients to start, use those instead of the cost for this model
        if starting_gradient is not None:
            params_grad, next_starting_grad = theano.subgraph_grad(
                wrt=self.get_params(),
                end=raise_to_list(self.get_inputs()),
                start=starting_gradient,
                cost=additional_cost,
                details=False)
        # otherwise, just use this model's cost to determine gradient
        else:
            # use the cost if it was given
            cost = cost or self.get_train_cost()
            if additional_cost is not None:
                cost = T.sum(cost, additional_cost)
            params_grad, next_starting_grad = theano.subgraph_grad(
                wrt=self.get_params(),
                end=raise_to_list(self.get_inputs()),
                cost=cost,
                details=False)
        return (OrderedDict(zip(self.get_params(), params_grad)),
                OrderedDict(
                    zip(raise_to_list(self.get_inputs()), next_starting_grad)))
Exemple #7
0
    def add(self, layer, **kwargs):
        """
        This adds a :class:`Model` (or list of models) to the sequence that the :class:`Prototype` holds.
        Also handles :class:`ModifyLayer`s.

        By default, we want single models added sequentially to use the outputs of the previous model as its
        `inputs` (if no `inputs` was defined by the user).

        Examples
        --------
        Here is the sequential creation of an MLP (no `inputs` have to be defined, `add()` takes care
        of it automatically::

            from theano.tensor import matrix
            from opendeep.models.container import Prototype
            from opendeep.models.single_layer.basic import Dense, Softmax
            mlp = Prototype()
            mlp.add(Dense(inputs=(28*28, matrix('x')), outputs=1000, activation='relu'))
            mlp.add(Dense, outputs=512, activation='relu')
            mlp.add(Softmax, outputs=10)

        Parameters
        ----------
        layer : :class:`Model` or list(:class:`Model`) or :class:`ModifyLayer` or list(:class:`ModifyLayer`) or
        type(:class:`Model`) or type(:class:`ModifyLayer`)
            The model (or list of models) to add to the Prototype as the next layer(s). If you want the inputs to
            be automatically updated with the outputs of the previous layer, specify the uninstantiated class type
            as the `layer` argument, and then the respective parameters as `kwargs` to instantiate the
            class (except for the `inputs` parameter). It will automatically route the `inputs` parameter when
            initializing the class to the `get_outputs()` method from the previous layer.
        """
        if isclass(layer):
            if len(self.models) > 0:
                if not kwargs.get('inputs', False):
                    # get the previous layer output size and expression
                    previous_out_sizes = raise_to_list(
                        self.models[-1].output_size)
                    previous_outs = raise_to_list(
                        self.models[-1].get_outputs())
                    # create the inputs from the previous outputs
                    current_inputs = zip(previous_out_sizes, previous_outs)
                    kwargs['inputs'] = current_inputs
                layer = layer(**kwargs)

        # we want to be able to add multiple layers at a time (in a list), so using extend.
        # make sure the model or modifylayer is in a list
        layers = raise_to_list(layer)
        for l in layers:
            assert isinstance(l, Model) or isinstance(l, ModifyLayer), \
                "Expected layer input to be Model or ModifyLayer, found %s" % str(type(l))
        self.models.extend(layers)
    def __init__(self, inputs=None, outputs=None, function=None, **kwargs):
        """
        Parameters
        ----------
        inputs : List of [tuple(shape, `Theano.TensorType`)]
            List of [tuple(shape, `Theano.TensorType`)] or None describing the inputs to use for this layer.
            `shape` will be a monad tuple representing known sizes for each dimension in the `Theano.TensorType`.
            The length of `shape` should be equal to number of dimensions in `Theano.TensorType`, where the shape
            element is an integer representing the size for its dimension, or None if the shape isn't known.
            For example, if you have a matrix with unknown batch size but fixed feature size of 784, `shape` would
            be: (None, 784). The full form of `inputs` would be:
            [((None, 784), <TensorType(float32, matrix)>)].
        outputs : List of [int or shape tuple]
            The dimensionality of the output(s) for this model. Shape here is the shape monad described in `inputs`.
        function : theano expression, optional
            If you want to pass a custom Theano expression to run on the inputs, you can do that here. This is
            mostly to enable a wrapper for processing data when adding layers to a :class:`opendeep.models.container`
            object, such as a :class:`opendeep.models.Prototype`.
        """
        self._classname = self.__class__.__name__
        self.inputs = raise_to_list(inputs)
        if self.inputs is not None:
            ins = []
            # deal with Models or ModifyLayers being passed as an input.
            for input in self.inputs:
                if hasattr(input, 'output_size') and hasattr(input, 'get_outputs'):
                    sizes = raise_to_list(input.output_size)
                    outs = raise_to_list(input.get_outputs())
                    if len(sizes) == 1 and len(sizes) < len(outs):
                        sizes = sizes * len(outs)
                    input = raise_to_list(zip(sizes, outs))
                    for i in input:
                        ins.append(i)
                else:
                    ins.append(input)
            # replace self.inputs
            self.inputs = ins

        self.output_size = raise_to_list(kwargs.get('output_size', outputs))
        self.function = function
        self.args = {}
        self.args = add_kwargs_to_dict(kwargs.copy(), self.args)
        self.args['inputs'] = self.inputs
        if self.output_size is not None:
            self.args['output_size'] = self.output_size
        if self.function is not None:
            self.args['function'] = self.function
        # Don't know the position of switches!
        self.switches_on = None

        log.debug("Creating a new ModifyLayer: %s with args: %s" % (self._classname, str(self.args)))
Exemple #9
0
    def run(self, input):
        """
        This method will return the Prototype's output (run through the `f_run` function), given an input. The input
        comes from all unique inputs to the models in the Prototype as calculated from `get_inputs()` and the outputs
        computed similarly from `get_outputs`.

        Try to avoid re-compiling the theano function created for run - check a `hasattr(self, 'f_run')` or
        something similar first.

        Parameters
        ----------
        input: array_like
            Theano/numpy tensor-like object that is the input into the model's computation graph.

        Returns
        -------
        array_like
            Theano/numpy tensor-like object that is the output of the model's computation graph.
        """
        # set the noise switches off for running! we assume unseen data is noisy anyway :)
        old_switch_vals = []
        if len(self.get_switches()) > 0:
            log.debug("Turning off %s noise switches, resetting them after run!", str(len(self.get_switches())))
            old_switch_vals = [switch.get_value() for switch in self.get_switches()]
            [switch.set_value(0.) for switch in self.get_switches()]

        # make sure the input is raised to a list - we are going to splat it!
        input = raise_to_list(input)
        # first check if we already made an f_run function
        if hasattr(self, 'f_run'):
            output = self.f_run(*input)
        # otherwise, compile it!
        else:
            inputs = raise_to_list(self.get_inputs())
            outputs = raise_to_list(self.get_outputs())
            if outputs is not None and len(outputs) == 1:
                outputs = outputs[0]
            updates = self.get_updates()
            t = time.time()
            log.info("Compiling f_run...")
            self.f_run = function(inputs=inputs, outputs=outputs, updates=updates, name="f_run")
            log.info("Compilation done! Took %s", make_time_units_string(time.time() - t))
            output = self.f_run(*input)

        # reset any switches to how they were!
        if len(self.get_switches()) > 0:
            [switch.set_value(val) for switch, val in zip(self.get_switches(), old_switch_vals)]

        return output
Exemple #10
0
    def add(self, layer, **kwargs):
        """
        This adds a :class:`Model` (or list of models) to the sequence that the :class:`Prototype` holds.
        Also handles :class:`ModifyLayer`s.

        By default, we want single models added sequentially to use the outputs of the previous model as its
        `inputs` (if no `inputs` was defined by the user).

        Examples
        --------
        Here is the sequential creation of an MLP (no `inputs` have to be defined, `add()` takes care
        of it automatically::

            from theano.tensor import matrix
            from opendeep.models.container import Prototype
            from opendeep.models.single_layer.basic import Dense, Softmax
            mlp = Prototype()
            mlp.add(Dense(inputs=(28*28, matrix('x')), outputs=1000, activation='relu'))
            mlp.add(Dense, outputs=512, activation='relu')
            mlp.add(Softmax, outputs=10)

        Parameters
        ----------
        layer : :class:`Model` or list(:class:`Model`) or :class:`ModifyLayer` or list(:class:`ModifyLayer`) or
        type(:class:`Model`) or type(:class:`ModifyLayer`)
            The model (or list of models) to add to the Prototype as the next layer(s). If you want the inputs to
            be automatically updated with the outputs of the previous layer, specify the uninstantiated class type
            as the `layer` argument, and then the respective parameters as `kwargs` to instantiate the
            class (except for the `inputs` parameter). It will automatically route the `inputs` parameter when
            initializing the class to the `get_outputs()` method from the previous layer.
        """
        if isclass(layer):
            if len(self.models) > 0:
                if not kwargs.get('inputs', False):
                    # get the previous layer output size and expression
                    previous_out_sizes = raise_to_list(self.models[-1].output_size)
                    previous_outs      = raise_to_list(self.models[-1].get_outputs())
                    # create the inputs from the previous outputs
                    current_inputs = zip(previous_out_sizes, previous_outs)
                    kwargs['inputs'] = current_inputs
                layer = layer(**kwargs)

        # we want to be able to add multiple layers at a time (in a list), so using extend.
        # make sure the model or modifylayer is in a list
        layers = raise_to_list(layer)
        for l in layers:
            assert isinstance(l, Model) or isinstance(l, ModifyLayer), \
                "Expected layer input to be Model or ModifyLayer, found %s" % str(type(l))
        self.models.extend(layers)
Exemple #11
0
    def get_gradient(self, starting_gradient=None, cost=None, additional_cost=None):
        """
        This method allows you to define the gradient for this model manually. It should either work with a provided
        starting gradient (from upstream layers/models), or grab the training cost if no start gradient is provided.

        Theano's subgraph gradient function specified here:
        http://deeplearning.net/software/theano/library/gradient.html#theano.gradient.subgraph_grad

        .. warning::
            If the gradients of cost with respect to any of the start variables is already part of the
            start dictionary, then it may be counted twice with respect
            to wrt (`get_params()`) and end (`get_inputs()`).

        You should only implement this method if you want to manually define your gradients for the model.

        Parameters
        ----------
        starting_gradient : dictionary of {variable: known_gradient}, optional
            The starting, known gradients for parameters.
        cost : theano expression, optional
            The cost expression to use when calculating the gradients. Defaults to `get_train_cost()`.
        additional_cost : theano expression, optional
            Any additional cost to add to the gradient.

        Returns
        -------
        tuple
            (Gradient with respect to params, gradient with respect to inputs)
        """
        # check if starting gradients was provided.
        # if there are known gradients to start, use those instead of the cost for this model
        if starting_gradient is not None:
            params_grad, next_starting_grad = theano.subgraph_grad(wrt=self.get_params(),
                                                                   end=raise_to_list(self.get_inputs()),
                                                                   start=starting_gradient,
                                                                   cost=additional_cost,
                                                                   details=False)
        # otherwise, just use this model's cost to determine gradient
        else:
            # use the cost if it was given
            cost = cost or self.get_train_cost()
            if additional_cost is not None:
                cost = T.sum(cost, additional_cost)
            params_grad, next_starting_grad = theano.subgraph_grad(wrt=self.get_params(),
                                                                   end=raise_to_list(self.get_inputs()),
                                                                   cost=cost,
                                                                   details=False)
        return (OrderedDict(zip(self.get_params(), params_grad)),
                OrderedDict(zip(raise_to_list(self.get_inputs()), next_starting_grad)))
Exemple #12
0
    def compile_run_fn(self):
        """
        This is a helper function to compile the f_run function for computing the model's outputs given inputs.
        Compile and set the f_run function used for `run()`.

        It sets the `self.f_run` attribute to the f_run function.

        .. note::
            The run function defaults like so::

                self.f_run = function(inputs  = raise_to_list(self.get_inputs()),
                                      outputs = self.get_outputs(),
                                      updates = self.get_updates(),
                                      name    = 'f_run')

        Returns
        -------
        Theano function
            The compiled theano function for running the model.
        """
        if not getattr(self, 'f_run', None):
            log.debug("Compiling f_run...")
            t = time.time()
            self.f_run = function(inputs=raise_to_list(self.get_inputs()),
                                  outputs=self.get_outputs(),
                                  updates=self.get_updates(),
                                  name='f_run')
            log.debug("Compilation done. Took %s",
                      make_time_units_string(time.time() - t))
        else:
            log.debug('f_run already exists!')

        return self.f_run
def get_stats(input, stat=None):
    """
    Returns a dictionary mapping the name of the statistic to the result on the input.
    Currently gets mean, var, std, min, max, l1, l2.

    Parameters
    ----------
    input : tensor
        Theano tensor to grab stats for.

    Returns
    -------
    dict
        Dictionary of all the statistics expressions {string_name: theano expression}
    """
    stats = {
        'mean': T.mean(input),
        'var': T.var(input),
        'std': T.std(input),
        'min': T.min(input),
        'max': T.max(input),
        'l1': input.norm(L=1),
        'l2': input.norm(L=2),
        #'num_nonzero': T.sum(T.nonzero(input)),
    }
    stat_list = raise_to_list(stat)
    compiled_stats = {}
    if stat_list is None:
        return stats

    for stat in stat_list:
        if isinstance(stat, string_types) and stat in stats:
            compiled_stats.update({stat: stats[stat]})
    return compiled_stats
Exemple #14
0
def _check_type_and_return_as_list(iterables, name="Unknown"):
    """
    Helper method that checks the input to see if it is iterable as well as not a generator.
    (inputs the list of iterables as well as the name you want to use for this grouping of iterables,
    such as train_inputs, etc.)
    """
    already_list = isinstance(iterables, list)
    iterables = raise_to_list(iterables)
    if iterables is not None:
        # type checking to make sure everything is iterable (and warn against generators).
        for idx, elem in enumerate(iterables):
            assert isinstance(elem, Iterable), "%s (raised to a list) parameter index %d is not iterable! Found %s" % \
                                               (name, idx, str(type(elem)))
            assert not isinstance(elem, Generator), "%s (raised to a list) parameter index %d is a generator! " \
                                                    "Because we loop through the data multiple times, the generator " \
                                                    "will run out after the first iteration. Please consider using " \
                                                    "one of the stream types in opendeep.data.stream instead, " \
                                                    "or define your own class that performs the generator function " \
                                                    "in an __iter__(self) method!" % (name, idx)
            if isinstance(elem, list):
                log.warning("%s (raised to a list) parameter index %d has type: list. Because we raise iterables to "
                            "a list internally, this is generally bad practice. Please use something else like "
                            "Tuples, Iterators, Numpy Arrays, etc. instead of Lists for the data source." %
                            (name, idx))
        # if we only have one stream, just return it not in a list wrapper (if we indeed raised it to a list)
        if len(iterables) == 1 and not already_list:
            iterables = iterables[0]
    return iterables
Exemple #15
0
    def run(self, input):
        """
        This method will return the Prototype's output (run through the `f_run` function), given an input. The input
        comes from all unique inputs to the models in the Prototype as calculated from `get_inputs()` and the outputs
        computed similarly from `get_outputs`.

        Try to avoid re-compiling the theano function created for run - check a `hasattr(self, 'f_run')` or
        something similar first.

        Parameters
        ----------
        input: array_like
            Theano/numpy tensor-like object that is the input into the model's computation graph.

        Returns
        -------
        array_like
            Theano/numpy tensor-like object that is the output of the model's computation graph.
        """
        # make sure the input is raised to a list - we are going to splat it!
        input = raise_to_list(input)
        # first check if we already made an f_run function
        if hasattr(self, 'f_run'):
            return self.f_run(*input)
        # otherwise, compile it!
        else:
            inputs = self.get_inputs()
            outputs = self.get_outputs()
            updates = self.get_updates()
            t = time.time()
            log.info("Compiling f_run...")
            self.f_run = function(inputs=inputs, outputs=outputs, updates=updates, name="f_run")
            log.info("Compilation done! Took %s", make_time_units_string(time.time() - t))
            return self.f_run(*input)
Exemple #16
0
    def get_inputs(self):
        """
        This should return the input(s) to the Prototype's computation graph as a list.
        This is called by the :class:`Optimizer` when creating the theano train function on the cost expressions
        returned by get_train_cost(). Therefore, these are the training function inputs! (Which is different
        from f_run inputs if you include the supervised labels)

        This gets a list of all unique inputs by going through each model in the Prototype and checking if its
        inputs are used as hooks to other models or are unique (a starting point in the computation graph).

        Returns
        -------
        List(tensor)
            Theano variables representing the input(s) to the computation graph.
        """
        inputs = []
        for model in self.models:
            # grab the inputs list from the model
            model_inputs = model.get_inputs()
            # go through each and find the ones that are tensors in their basic input form (i.e. don't have an owner)
            for input in model_inputs:
                # if it is a tensor
                if isinstance(input, T.TensorVariable) and hasattr(input, 'owner'):
                    # if it doesn't have an owner
                    if input.owner is None:
                        # add it to the running inputs list
                        input = raise_to_list(input)
                        inputs.extend(input)
        return inputs
Exemple #17
0
    def __init__(self, name, monitors=None):
        """
        Initializes a channel with `name` and a potential starting list of :class:`Monitor` to include.

        Names of channels have to be unique from each other.

        Parameters
        ----------
        name : str
            The unique name to give this channel.
        monitors : Monitor, list(Monitor), optional
            The starting monitor(s) to use for this channel.
        """
        monitors = raise_to_list(monitors)
        if monitors is not None:
            # make sure the input monitors are actually Monitors
            for monitor in monitors:
                assert isinstance(monitor, Monitor), \
                    "Input monitors need to all be type Monitor. Found %s" % str(type(monitor))
            # initialize the list with these monitors
            self.monitors = monitors
        else:
            # otherwise, start empty
            self.monitors = []
        # make sure the channel name is a string
        assert isinstance(name, string_types), "name needs to be a string. found %s" % str(type(name))
        self.name = name
 def __iter__(self):
     for fname in find_files(self.path, self.filter):
         if self.preprocess is not None and callable(self.preprocess):
             fname = self.preprocess(fname)
         fnames = raise_to_list(fname)
         for name in fnames:
             yield name
Exemple #19
0
    def add(self, monitor):
        """
        Adds a :class:`Monitor` (or list of monitors) to the channel.

        This will append `monitor` to `self.monitors`.

        Parameters
        ----------
        monitor : Monitor or list(Monitor)
        """
        monitors = raise_to_list(monitor)
        # make sure the input monitors are actually Monitors
        for monitor in monitors:
            assert isinstance(monitor, Monitor), \
                "Input monitors need to all be type Monitor. Found %s" % str(type(monitor))
            # check if monitor is already in the channel - if it is, skip.
            if monitor.expression in self.get_monitor_expressions():
                monitors.remove(monitor)
            else:
                names = self.get_monitor_names()
                # check if the monitor has the same name as one in the channel; if so, rename it with a number added.
                # for example, if a monitor with name 'a' already exists in the channel, it will be renamed to 'a_0'.
                if monitor.name in names:
                    i = 0
                    potential_name = '_'.join([monitor.name, str(i)])
                    while potential_name in names or i > 10000:
                        i += 1
                        potential_name = '_'.join([monitor.name, str(i)])
                    # found the next open name, so rename the monitor.
                    log.info("Renaming monitor %s (from Channel %s) to %s.", monitor.name, self.name, potential_name)
                    monitor.name = potential_name
                # add the monitor to the list!
                self.monitors.append(monitor)
Exemple #20
0
def _check_type_and_return_as_list(iterables, name="Unknown"):
    """
    Helper method that checks the input to see if it is iterable as well as not a generator.
    (inputs the list of iterables as well as the name you want to use for this grouping of iterables,
    such as train_inputs, etc.)
    """
    already_list = isinstance(iterables, list)
    iterables = raise_to_list(iterables)
    if iterables is not None:
        # type checking to make sure everything is iterable (and warn against generators).
        for idx, elem in enumerate(iterables):
            assert isinstance(elem, Iterable), "%s (raised to a list) parameter index %d is not iterable! Found %s" % \
                                               (name, idx, str(type(elem)))
            assert not isinstance(elem, Generator), "%s (raised to a list) parameter index %d is a generator! " \
                                                    "Because we loop through the data multiple times, the generator " \
                                                    "will run out after the first iteration. Please consider using " \
                                                    "one of the stream types in opendeep.data.stream instead, " \
                                                    "or define your own class that performs the generator function " \
                                                    "in an __iter__(self) method!" % (name, idx)
            if isinstance(elem, list):
                log.warning(
                    "%s (raised to a list) parameter index %d has type: list. Because we raise iterables to "
                    "a list internally, this is generally bad practice. Please use something else like "
                    "Tuples, Iterators, Numpy Arrays, etc. instead of Lists for the data source."
                    % (name, idx))
        # if we only have one stream, just return it not in a list wrapper (if we indeed raised it to a list)
        if len(iterables) == 1 and not already_list:
            iterables = iterables[0]
    return iterables
Exemple #21
0
    def __init__(self, config=None, layers=None, outdir='outputs/prototype/'):
        """
        During initialization, use the optional config provided to pre-set up the models. This is used
        for repeatable experiments.

        .. todo:: Add the ability to create models list from the input config. Right now, it does nothing.

        Parameters
        ----------
        config : dict or JSON/YAML filename, optional
            A configuration defining the multiple models/configurations for this container to have.
        layers : list(:class:`Model`)
            A model or list of models to initialize the :class:`Prototype` with.
        outdir : str
            The location to produce outputs from training or running the :class:`Prototype`.
        """
        # initialize superclass (model) with the config
        super(Prototype, self).__init__(config=config, outdir=outdir)

        # TODO: add ability to create the models list from the input config.

        if layers is None:
            # create an empty list of the models this container holds.
            self.models = []
        else:
            # otherwise, use the layers input during initialization (make sure to raise to list)
            layers = raise_to_list(layers)
            self.models = layers
Exemple #22
0
    def _get_batch_indices(self, data_lengths):
        """
        Computes the tuples of (start_index, end_index) that represent the appropriate slices of the concatenated
        dataset with regards to the given data_lengths. This allows for lists of data lengths to represent sequences,
        so that the concatenated batches returned do not overstep the start of a new sequence.

        Parameters
        ----------
        data_lengths : list(int) or int
            List of num_examples for each dataset (the length of the datasets - this is a list in the case of
            sequences).

        Returns
        -------
        list((int, int))
            List of tuples (start, end) representing the batch slices for the total dataset if it were concatenated.
        """
        batch_indices = []
        start_idx = 0
        for len in raise_to_list(data_lengths):
            # integer division to determine number of whole batches for this length
            n_batches = len / int(self.batch_size)
            # add the (start_idx, end_idx) tuple to the list
            for i in range(n_batches):
                end_idx = start_idx + self.batch_size
                batch_indices.append((start_idx, end_idx))
                start_idx = end_idx
            # remainder to find number of leftover examples
            remainder = numpy.remainder(len, self.batch_size)
            end_idx = start_idx + remainder
            # check if it is bigger than the minimum allowed size
            if remainder >= self.minimum_batch_size:
                batch_indices.append((start_idx, end_idx))
            start_idx = end_idx
        return batch_indices
Exemple #23
0
    def set_switches(self, values):
        """
        This helper method sets all Theano switches from `get_switches()` to the `values` parameter specified.

        Parameters
        ----------
        values : list(boolean)
        """
        switches = raise_to_list(self.get_switches())
        values = raise_to_list(values)
        values = [1. if val else 0. for val in values]
        assert len(switches) == len(values), "Switches (len %d) needs to be same length as values (len %d)!" % \
                                             (len(switches), len(values))
        log.debug("Setting specified values for %d switches!" % len(switches))
        [switch.set_value(val) for switch, val in zip(switches, values)]
        self.switches_on = None
Exemple #24
0
    def run(self, input):
        """
        This method will return the model's output (run through the function), given an input. In the case that
        input_hooks or hidden_hooks are used, the function should use them appropriately and assume they are the input.

        .. note::
            If the Model doesn't have an f_run attribute,
            it will run `compile_run_fn()` to compile the appropriate function.

        Parameters
        ----------
        input : tensor
            Theano/numpy tensor-like object that is the input into the model's computation graph.

        Returns
        -------
        array_like
            Array_like object that is the output of the model's computation graph run on the given input.
        """
        # check if the run function is already compiled, otherwise compile it!
        if not hasattr(self, 'f_run'):
            self.compile_run_fn()

        # because we use the splat to account for multiple inputs to the function, make sure input is a list.
        input = raise_to_list(input)
        # return the results of the run function!
        output = self.f_run(*input)

        return output
Exemple #25
0
    def set_switches(self, values):
        """
        This helper method sets all Theano switches from `get_switches()` to the `values` parameter specified.

        Parameters
        ----------
        values : list(boolean)
        """
        switches = raise_to_list(self.get_switches())
        values = raise_to_list(values)
        values = [1. if val else 0. for val in values]
        assert len(switches) == len(values), "Switches (len %d) needs to be same length as values (len %d)!" % \
                                             (len(switches), len(values))
        log.debug("Setting specified values for %d switches!" % len(switches))
        [switch.set_value(val) for switch, val in zip(switches, values)]
        self.switches_on = None
Exemple #26
0
def get_stats(input, stat=None):
    """
    Returns a dictionary mapping the name of the statistic to the result on the input.
    Currently gets mean, var, std, min, max, l1, l2.

    Parameters
    ----------
    input : tensor
        Theano tensor to grab stats for.

    Returns
    -------
    dict
        Dictionary of all the statistics expressions {string_name: theano expression}
    """
    stats = {
        'mean': T.mean(input),
        'var': T.var(input),
        'std': T.std(input),
        'min': T.min(input),
        'max': T.max(input),
        'l1': input.norm(L=1),
        'l2': input.norm(L=2),
        #'num_nonzero': T.sum(T.nonzero(input)),
    }
    stat_list = raise_to_list(stat)
    compiled_stats = {}
    if stat_list is None:
        return stats

    for stat in stat_list:
        if isinstance(stat, six.string_types) and stat in stats:
            compiled_stats.update({stat: stats[stat]})
    return compiled_stats
Exemple #27
0
    def get_inputs(self):
        """
        This should return the input(s) to the Prototype's computation graph as a list.
        This is called by the :class:`Optimizer` when creating the theano train function on the cost expressions.
        Therefore, these are the training function inputs! (Which is different
        from f_run inputs if you include the supervised labels)

        This gets a list of all unique inputs by going through each model in the Prototype and checking if its
        inputs are used as hooks to other models or are unique (a starting point in the computation graph).

        Returns
        -------
        List(tensor)
            Theano variables representing the input(s) to the computation graph. They will be a list in the
            order of inputs presented per model, for each model in the layers. e.g.
        """
        inputs = []
        for model in self.models:
            # grab the inputs list from the model
            model_inputs = raise_to_list(model.get_inputs())
            # go through each and find the ones that are tensors in their basic input form (i.e. don't have an owner)
            for model_input in model_inputs:
                # find the base input tensors (from people initializing matrix(), tensor3(), etc.) given to the model
                owner = getattr(model_input, 'owner', False)
                has_data = hasattr(model_input, 'data')
                is_tensor = isinstance(getattr(model_input, 'type', None),
                                       TensorType)
                if owner is None and not has_data and is_tensor:
                    # add it to the running list of inputs if it doesn't already exist
                    if model_input not in inputs:
                        inputs.append(model_input)
        return inputs
Exemple #28
0
    def __init__(self, layers=None, config=None, outdir='outputs/prototype/'):
        """
        During initialization, use the optional config provided to pre-set up the models. This is used
        for repeatable experiments.

        .. todo:: Add the ability to create models list from the input config. Right now, it does nothing.

        Parameters
        ----------
        layers : list(:class:`Model`), optional
            A model or list of models to initialize the :class:`Prototype` with.
        config : dict or JSON/YAML filename, optional
            A configuration defining the multiple models/configurations for this container to have.
        outdir : str, optional
            The location to produce outputs from training or running the :class:`Prototype`.
        """
        # initialize superclass (model) with the config
        super(Prototype, self).__init__(config=config, outdir=outdir, layers=layers)

        # TODO: add ability to create the models list from the input config.

        if layers is None:
            # create an empty list of the models this container holds.
            self.models = []
        else:
            # otherwise, use the layers input during initialization (make sure to raise to list)
            layers = raise_to_list(layers)
            self.models = layers
Exemple #29
0
    def __init__(self, name, monitors=None):
        """
        Initializes a channel with `name` and a potential starting list of :class:`Monitor` to include.

        Names of channels have to be unique from each other.

        Parameters
        ----------
        name : str
            The unique name to give this channel.
        monitors : Monitor, list(Monitor), optional
            The starting monitor(s) to use for this channel.
        """
        monitors = raise_to_list(monitors)
        if monitors is not None:
            # make sure the input monitors are actually Monitors
            for monitor in monitors:
                assert isinstance(monitor, Monitor), \
                    "Input monitors need to all be type Monitor. Found %s" % str(type(monitor))
            # initialize the list with these monitors
            self.monitors = monitors
        else:
            # otherwise, start empty
            self.monitors = []
        # make sure the channel name is a string
        assert isinstance(
            name, string_types), "name needs to be a string. found %s" % str(
                type(name))
        self.name = name
Exemple #30
0
    def add(self, monitor):
        """
        Adds a :class:`Monitor` (or list of monitors) to the channel.

        This will append `monitor` to `self.monitors`.

        Parameters
        ----------
        monitor : Monitor or list(Monitor)
        """
        monitors = raise_to_list(monitor)
        # make sure the input monitors are actually Monitors
        for monitor in monitors:
            assert isinstance(monitor, Monitor), \
                "Input monitors need to all be type Monitor. Found %s" % str(type(monitor))
            # check if monitor is already in the channel - if it is, skip.
            if monitor.expression in self.get_monitor_expressions():
                monitors.remove(monitor)
            else:
                names = self.get_monitor_names()
                # check if the monitor has the same name as one in the channel; if so, rename it with a number added.
                # for example, if a monitor with name 'a' already exists in the channel, it will be renamed to 'a_0'.
                if monitor.name in names:
                    i = 0
                    potential_name = '_'.join([monitor.name, str(i)])
                    while potential_name in names or i > 10000:
                        i += 1
                        potential_name = '_'.join([monitor.name, str(i)])
                    # found the next open name, so rename the monitor.
                    log.info("Renaming monitor %s (from Channel %s) to %s.",
                             monitor.name, self.name, potential_name)
                    monitor.name = potential_name
                # add the monitor to the list!
                self.monitors.append(monitor)
Exemple #31
0
    def get_inputs(self):
        """
        This should return the input(s) to the Prototype's computation graph as a list.
        This is called by the :class:`Optimizer` when creating the theano train function on the cost expressions
        returned by get_train_cost(). Therefore, these are the training function inputs! (Which is different
        from f_run inputs if you include the supervised labels)

        This gets a list of all unique inputs by going through each model in the Prototype and checking if its
        inputs are used as hooks to other models or are unique (a starting point in the computation graph).

        Returns
        -------
        List(tensor)
            Theano variables representing the input(s) to the computation graph.
        """
        inputs = []
        for model in self.models:
            # grab the inputs list from the model
            model_inputs = model.get_inputs()
            # go through each and find the ones that are tensors in their basic input form (i.e. don't have an owner)
            for input in model_inputs:
                # if it is a tensor
                if isinstance(input, T.TensorVariable) and hasattr(
                        input, 'owner'):
                    # if it doesn't have an owner
                    if input.owner is None:
                        # add it to the running inputs list
                        input = raise_to_list(input)
                        inputs.extend(input)
        return inputs
Exemple #32
0
    def run(self, input):
        """
        This method will return the model's output (run through the function), given an input. In the case that
        input_hooks or hidden_hooks are used, the function should use them appropriately and assume they are the input.

        .. note::
            If the Model doesn't have an f_run attribute,
            it will run `compile_run_fn()` to compile the appropriate function.

        Parameters
        ----------
        input : tensor
            Theano/numpy tensor-like object that is the input into the model's computation graph.

        Returns
        -------
        array_like
            Array_like object that is the output of the model's computation graph run on the given input.
        """
        # check if the run function is already compiled, otherwise compile it!
        if not hasattr(self, 'f_run'):
            self.compile_run_fn()

        # because we use the splat to account for multiple inputs to the function, make sure input is a list.
        input = raise_to_list(input)
        # return the results of the run function!
        output = self.f_run(*input)

        return output
Exemple #33
0
    def compile_run_fn(self):
        """
        This is a helper function to compile the f_run function for computing the model's outputs given inputs.
        Compile and set the f_run function used for `run()`.

        It sets the `self.f_run` attribute to the f_run function.

        .. note::
            The run function defaults like so::

                self.f_run = function(inputs  = raise_to_list(self.get_inputs()),
                                      outputs = self.get_outputs(),
                                      updates = self.get_updates(),
                                      name    = 'f_run')

        Returns
        -------
        Theano function
            The compiled theano function for running the model.
        """
        if not getattr(self, 'f_run', None):
            log.debug("Compiling f_run...")
            t = time.time()
            self.f_run = function(inputs  = raise_to_list(self.get_inputs()),
                                  outputs = self.get_outputs(),
                                  updates = self.get_updates(),
                                  name    = 'f_run')
            log.debug("Compilation done. Took %s", make_time_units_string(time.time() - t))
        else:
            log.debug('f_run already exists!')

        return self.f_run
Exemple #34
0
    def get_inputs(self):
        """
        This should return the input(s) to the Prototype's computation graph as a list.
        This is called by the :class:`Optimizer` when creating the theano train function on the cost expressions.
        Therefore, these are the training function inputs! (Which is different
        from f_run inputs if you include the supervised labels)

        This gets a list of all unique inputs by going through each model in the Prototype and checking if its
        inputs are used as hooks to other models or are unique (a starting point in the computation graph).

        Returns
        -------
        List(tensor)
            Theano variables representing the input(s) to the computation graph. They will be a list in the
            order of inputs presented per model, for each model in the layers. e.g.
        """
        inputs = []
        for model in self.models:
            # grab the inputs list from the model
            model_inputs = raise_to_list(model.get_inputs())
            # go through each and find the ones that are tensors in their basic input form (i.e. don't have an owner)
            for model_input in model_inputs:
                # find the base input tensors (from people initializing matrix(), tensor3(), etc.) given to the model
                owner = getattr(model_input, 'owner', False)
                has_data = hasattr(model_input, 'data')
                is_tensor = isinstance(getattr(model_input, 'type', None), TensorType)
                if owner is None and not has_data and is_tensor:
                    # add it to the running list of inputs if it doesn't already exist
                    if model_input not in inputs:
                        inputs.append(model_input)
        return inputs
Exemple #35
0
 def __iter__(self):
     for fname in files.find_files(self.path, self.filter):
         if self.preprocess is not None and callable(self.preprocess):
             fname = self.preprocess(fname)
         fnames = raise_to_list(fname)
         for name in fnames:
             yield name
 def run(self, input):
     if hasattr(self, 'f_run'):
         # because we use the splat to account for multiple inputs to the function, make sure input is a list.
         input = raise_to_list(input)
         return self.f_run(*input)
     else:
         log.warning("No f_run for the GSN (this is most likely the case when a hiddens_hook was provided.")
         return None
Exemple #37
0
 def turn_on_switches(self):
     """
     This helper method turns all Theano switches by `get_switches()` to their on position of 1./True
     """
     switches = raise_to_list(self.get_switches())
     log.debug("Turning on %d switches for %s!" % (len(switches), self._classname))
     [switch.set_value(1.) for switch in switches]
     self.switches_on = True
Exemple #38
0
 def __init__(self, model):
     # make sure the input model to repeat is a Model instance
     assert isinstance(model, Model), "The initial model provided was type %s, not a Model." % str(type(model))
     self.model = model
     # make this input one dimension more than the provided Model's input (since we are repeating over the
     # first dimension)
     model_input = raise_to_list(self.model.get_inputs())[0]
     self.input = T.TensorType(model_input.dtype, (False,)*(model_input.ndim + 1))
Exemple #39
0
 def turn_off_switches(self):
     """
     This helper method turns all Theano switches by `get_switches()` to their off position of 0./False
     """
     switches = raise_to_list(self.get_switches())
     if len(switches) > 0:
         log.debug("Turning off %d switches for %s!" % (len(switches), self._classname))
         [switch.set_value(0.) for switch in switches]
         self.switches_on = False
 def turn_on_switches(self):
     """
     This helper method turns all Theano switches by `get_switches()` to their on position of 1./True
     """
     switches = raise_to_list(self.get_switches())
     if len(switches) > 0:
         log.debug("Turning on %d switches for %s!" % (len(switches), self._classname))
         [switch.set_value(1.) for switch in switches]
         self.switches_on = True
Exemple #41
0
 def run(self, input):
     if hasattr(self, 'f_run'):
         # because we use the splat to account for multiple inputs to the function, make sure input is a list.
         input = raise_to_list(input)
         return self.f_run(*input)
     else:
         log.warning(
             "No f_run for the GSN (this is most likely the case when a hiddens_hook was provided."
         )
         return None
Exemple #42
0
 def flip_switches(self):
     """
     This helper method flips all Theano switches specified by `get_switches()` to 0. or 1. (the opposite value
     that the switch is currently set to).
     """
     switches = raise_to_list(self.get_switches())
     log.debug("Flipping %d switches for %s!" % (len(switches), self._classname))
     [switch.set_value(1. - switch.get_value()) for switch in switches]
     if self.switches_on is not None:
         self.switches_on = not self.switches_on
Exemple #43
0
    def run(self, input):
        """
        This method will return the model's output (run through the function), given an input. In the case that
        input_hooks or hidden_hooks are used, the function should use them appropriately and assume they are the input.

        .. note::
            If the Model doesn't have an `f_run` attribute,
            it will run `compile_run_fn()` to compile the appropriate function.

        Parameters
        ----------
        input : tensor or list(tensor)
            Theano/numpy tensor-like object(s) that is the input(s) into the model's computation graph.

        Returns
        -------
        array_like or list(array_like)
            Array_like object that is the output(s) of the model's computation graph run on the given input(s).
        """
        # set the noise switches off for running (this only happens the first time)!
        old_switch_vals = []
        if self.switches_on is not False:
            old_switch_vals = [
                switch.get_value()
                for switch in raise_to_list(self.get_switches())
            ]
            self.turn_off_switches()

        # check if the run function is already compiled, otherwise compile it!
        if not getattr(self, 'f_run', None):
            self.compile_run_fn()

        # because we use the splat to account for multiple inputs to the function, make sure input is a list.
        input = raise_to_list(input)
        # return the results of the run function!
        output = self.f_run(*input)

        # reset any switches to how they were!
        if len(old_switch_vals) > 0:
            self.set_switches(old_switch_vals)

        return output
Exemple #44
0
    def add(self, model):
        """
        This adds a model (or list of models) to the sequence that the :class:`Prototype` holds.

        By default, we want single models added sequentially to use the outputs of the previous model as its
        `inputs_hook` (if no `inputs_hook` was defined by the user).

        Examples
        --------
        Here is the sequential creation of an MLP (no `inputs_hook` have to be defined, `add()` takes care
        of it automatically::

            from opendeep.models.container import Prototype
            from opendeep.models.single_layer.basic import BasicLayer, SoftmaxLayer
            mlp = Prototype()
            mlp.add(BasicLayer(input_size=28*28, output_size=1000, activation='relu', noise='dropout', noise_level=0.5))
            mlp.add(BasicLayer(output_size=512, activation='relu', noise='dropout', noise_level=0.5))
            mlp.add(SoftmaxLayer(output_size=10))

        Parameters
        ----------
        model : :class:`Model` or list(:class:`Model`)
            The model (or list of models) to add to the Prototype. In the case of a single model with no `inputs_hook`,
            the Prototype will configure the `inputs_hook` to take the previous model's output from `get_outputs()`.
        """
        # check if model is a single model (not a list of models)
        if isinstance(model, Model):
            # if there is a previous layer added (more than one model in the Prototype)
            if len(self.models) > 0:
                # check if inputs_hook (and hiddens_hook) wasn't already defined by the user - basically a blank slate
                if model.inputs_hook is None and model.hiddens_hook is None:
                    log.info('Overriding model %s with new inputs_hook!',
                             str(type(model)))
                    # get the previous layer output size and expression
                    previous_out_size = self.models[-1].output_size
                    previous_out = self.models[-1].get_outputs()
                    # create the inputs_hook from the previous outputs
                    current_inputs_hook = (previous_out_size, previous_out)
                    # grab the current model class
                    model_class = type(model)
                    # make the model a new instance of the current model (same arguments) except new inputs_hook
                    model_args = model.args.copy()
                    model_args['inputs_hook'] = current_inputs_hook
                    new_model = model_class(**model_args)
                    # clean up allocated variables from old model
                    for param in model.get_params():
                        del param
                    del model
                    model = new_model

        # we want to be able to add multiple layers at a time (in a list), so using extend.
        # make sure the model is a list
        model = raise_to_list(model)
        self.models.extend(model)
 def flip_switches(self):
     """
     This helper method flips all Theano switches specified by `get_switches()` to 0. or 1. (the opposite value
     that the switch is currently set to).
     """
     switches = raise_to_list(self.get_switches())
     if len(switches) > 0:
         log.debug("Flipping %d switches for %s!" % (len(switches), self._classname))
         [switch.set_value(1. - switch.get_value()) for switch in switches]
         if self.switches_on is not None:
             self.switches_on = not self.switches_on
Exemple #46
0
 def __iter__(self):
     for fname in files.find_files(self.path, self.filter):
         try:
             with Image.open(fname) as im:
                 data = numpy.array(im)
                 if self.preprocess is not None and callable(self.preprocess):
                     data = self.preprocess(data)
                 data = raise_to_list(data)
                 for d in data:
                     yield d
         except Exception as err:
             log.exception(err.__str__())
Exemple #47
0
    def add(self, model):
        """
        This adds a model (or list of models) to the sequence that the :class:`Prototype` holds.

        By default, we want single models added sequentially to use the outputs of the previous model as its
        `inputs_hook` (if no `inputs_hook` was defined by the user).

        Examples
        --------
        Here is the sequential creation of an MLP (no `inputs_hook` have to be defined, `add()` takes care
        of it automatically::

            from opendeep.models.container import Prototype
            from opendeep.models.single_layer.basic import BasicLayer, SoftmaxLayer
            mlp = Prototype()
            mlp.add(BasicLayer(input_size=28*28, output_size=1000, activation='relu', noise='dropout', noise_level=0.5))
            mlp.add(BasicLayer(output_size=512, activation='relu', noise='dropout', noise_level=0.5))
            mlp.add(SoftmaxLayer(output_size=10))

        Parameters
        ----------
        model : :class:`Model` or list(:class:`Model`)
            The model (or list of models) to add to the Prototype. In the case of a single model with no `inputs_hook`,
            the Prototype will configure the `inputs_hook` to take the previous model's output from `get_outputs()`.
        """
        # check if model is a single model (not a list of models)
        if isinstance(model, Model):
            # if there is a previous layer added (more than one model in the Prototype)
            if len(self.models) > 0:
                # check if inputs_hook (and hiddens_hook) wasn't already defined by the user - basically a blank slate
                if model.inputs_hook is None and model.hiddens_hook is None:
                    log.info('Overriding model %s with new inputs_hook!', str(type(model)))
                    # get the previous layer output size and expression
                    previous_out_size = self.models[-1].output_size
                    previous_out      = self.models[-1].get_outputs()
                    # create the inputs_hook from the previous outputs
                    current_inputs_hook = (previous_out_size, previous_out)
                    # grab the current model class
                    model_class = type(model)
                    # make the model a new instance of the current model (same arguments) except new inputs_hook
                    model_args = model.args.copy()
                    model_args['inputs_hook'] = current_inputs_hook
                    new_model = model_class(**model_args)
                    # clean up allocated variables from old model
                    for param in model.get_params():
                        del param
                    del model
                    model = new_model

        # we want to be able to add multiple layers at a time (in a list), so using extend.
        # make sure the model is a list
        model = raise_to_list(model)
        self.models.extend(model)
Exemple #48
0
    def run(self, input):
        """
        This method will return the model's output (run through the function), given an input. In the case that
        input_hooks or hidden_hooks are used, the function should use them appropriately and assume they are the input.

        Try to avoid re-compiling the theano function created for run - check a hasattr(self, 'f_run') or
        something similar first. I recommend creating your theano f_run in a create_computation_graph method
        to be called after the class initializes.
        ------------------

        :param input: Theano/numpy tensor-like object that is the input into the model's computation graph.
        :type input: tensor

        :return: Theano/numpy tensor-like object that is the output of the model's computation graph.
        :rtype: tensor
        """
        # set any noise switches to zero
        if len(self.get_noise_switch()) > 0:
            vals = [switch.get_value() for switch in self.get_noise_switch()]
            [switch.set_value(0.) for switch in self.get_noise_switch()]

        # check if the run function is already compiled, otherwise compile it!
        if not hasattr(self, 'f_run'):
            log.debug("Compiling f_run...")
            t = time.time()
            self.f_run = function(inputs  = raise_to_list(self.get_inputs()),
                                  outputs = self.get_outputs(),
                                  updates = self.get_updates())
            log.debug("Compilation done. Took %s", make_time_units_string(time.time() - t))

        # because we use the splat to account for multiple inputs to the function, make sure input is a list.
        input = raise_to_list(input)
        # return the results of the run function!
        output =  self.f_run(*input)

        # reset the noise switches
        if len(self.get_noise_switch()) > 0:
            [switch.set_value(val) for switch, val in zip(self.get_noise_switch(), vals)]

        return output
 def __iter__(self):
     for fname in find_files(self.path, self.filter):
         try:
             with Image.open(fname) as im:
                 data = numpy.array(im)
                 if self.preprocess is not None and callable(
                         self.preprocess):
                     data = self.preprocess(data)
                 data = raise_to_list(data)
                 for d in data:
                     yield d
         except Exception as err:
             _log.exception(err.__str__())
Exemple #50
0
    def run(self, input):
        """
        This method will return the model's output (run through the function), given an input. In the case that
        input_hooks or hidden_hooks are used, the function should use them appropriately and assume they are the input.

        .. note::
            If the Model doesn't have an `f_run` attribute,
            it will run `compile_run_fn()` to compile the appropriate function.

        Parameters
        ----------
        input : tensor or list(tensor)
            Theano/numpy tensor-like object(s) that is the input(s) into the model's computation graph.

        Returns
        -------
        array_like or list(array_like)
            Array_like object that is the output(s) of the model's computation graph run on the given input(s).
        """
        # set the noise switches off for running (this only happens the first time)!
        old_switch_vals = []
        if self.switches_on is not False:
            old_switch_vals = [switch.get_value() for switch in raise_to_list(self.get_switches())]
            self.turn_off_switches()

        # check if the run function is already compiled, otherwise compile it!
        if not getattr(self, 'f_run', None):
            self.compile_run_fn()

        # because we use the splat to account for multiple inputs to the function, make sure input is a list.
        input = raise_to_list(input)
        # return the results of the run function!
        output = self.f_run(*input)

        # reset any switches to how they were!
        if len(old_switch_vals) > 0:
            self.set_switches(old_switch_vals)

        return output
Exemple #51
0
 def __iter__(self):
     idx = 0
     for fname in files.find_files(self.path, self.filter):
         with open(fname, 'r') as f:
             for line in f:
                 if self.preprocess is not None:
                     line = self.preprocess(line)
                 line = raise_to_list(line)
                 for token in line:
                     if idx >= self.n_future:
                         yield token
                     else:
                         idx += 1
 def __init__(self, model):
     raise NotImplementedError("Repeating class not implemented yet!")
     # make sure the input model to repeat is a Model instance
     assert isinstance(
         model, Model
     ), "The initial model provided was type %s, not a Model." % str(
         type(model))
     self.model = model
     # make this input one dimension more than the provided Model's input (since we are repeating over the
     # first dimension)
     model_input = raise_to_list(self.model.get_inputs())[0]
     self.input = TensorType(model_input.dtype,
                             (False, ) * (model_input.ndim + 1))
Exemple #53
0
 def __iter__(self):
     idx = 0
     for fname in files.find_files(self.path, self.filter):
         with open(fname, 'r') as f:
             for line in f:
                 if self.preprocess is not None:
                     line = self.preprocess(line)
                 line = raise_to_list(line)
                 for token in line:
                     if idx >= self.n_future:
                         yield token
                     else:
                         idx += 1
Exemple #54
0
    def __init__(self, inputs, targets=None, func=None, **kwargs):
        """
        Initializes the :class:`Loss` function.

        Parameters
        ----------
        inputs : list(theano symbolic expression)
            The input(s) necessary for the loss function.
        targets : list(theano symbolic variable), optional
            The target(s) variables for the loss function.
        func : function, optional
            A python function for computing the loss given the inputs list an targets list (in order).
            The function `func` will be called with parameters: func(*(list(inputs)+list(targets))).
        """
        self._classname = self.__class__.__name__
        log.debug("Creating a new instance of %s", self._classname)
        self.inputs = raise_to_list(inputs)
        if self.inputs is not None:
            ins = []
            # deal with Models or ModifyLayers being passed as an input.
            for input in self.inputs:
                if hasattr(input, 'get_outputs'):
                    inputs = raise_to_list(input.get_outputs())
                    for i in inputs:
                        ins.append(i)
                else:
                    ins.append(input)
            # replace self.inputs
            self.inputs = ins

        self.targets = raise_to_list(targets)
        if self.targets is None:
            self.targets = []
        self.func = func
        self.args = kwargs.copy()
        self.args['inputs'] = self.inputs
        self.args['targets'] = self.targets
        self.args['func'] = self.func
Exemple #55
0
    def __init__(self, inputs, targets=None, func=None, **kwargs):
        """
        Initializes the :class:`Loss` function.

        Parameters
        ----------
        inputs : list(theano symbolic expression)
            The input(s) necessary for the loss function.
        targets : list(theano symbolic variable), optional
            The target(s) variables for the loss function.
        func : function, optional
            A python function for computing the loss given the inputs list an targets list (in order).
            The function `func` will be called with parameters: func(*(list(inputs)+list(targets))).
        """
        self._classname = self.__class__.__name__
        log.debug("Creating a new instance of %s", self._classname)
        self.inputs = raise_to_list(inputs)
        if self.inputs is not None:
            ins = []
            # deal with Models or ModifyLayers being passed as an input.
            for input in self.inputs:
                if hasattr(input, 'get_outputs'):
                    inputs = raise_to_list(input.get_outputs())
                    for i in inputs:
                        ins.append(i)
                else:
                    ins.append(input)
            # replace self.inputs
            self.inputs = ins

        self.targets = raise_to_list(targets)
        if self.targets is None:
            self.targets = []
        self.func = func
        self.args = kwargs.copy()
        self.args['inputs'] = self.inputs
        self.args['targets'] = self.targets
        self.args['func'] = self.func
Exemple #56
0
def find_files(path, path_filter=None):
    """
    Recursively walks directories in ``path`` (if it is a directory) to find the files that have names
    matching ``path_filter``.

    Parameters
    ----------
    path : str or iterable(str)
        The path to the directory to walk or file to find, or an iterable of filepaths.
    path_filter : regular expression string or compiled regular expression object
        The regular expression to match against file path names.
    """
    if path_filter is not None:
        if isinstance(path_filter, string_types):
            reg = re.compile(path_filter)
        else:
            reg = path_filter
    else:
        reg = None

    try:
        paths = raise_to_list(os.path.realpath(path))
    except Exception:
        if isinstance(path, Iterable):
            paths = [os.path.realpath(p) for p in path]
        else:
            raise

    for path in paths:
        if os.path.isdir(path):
            for root, dirs, files in os.walk(path):
                for basename in files:
                    filepath = os.path.join(root, basename)
                    try:
                        if reg is None or reg.match(filepath) is not None:
                            yield filepath
                    except TypeError as te:
                        log.exception(
                            'TypeError exception when finding files. {!s}'.
                            format(te.message))
                        raise
        elif os.path.isfile(path):
            try:
                if reg is None or reg.match(path) is not None:
                    yield path
            except TypeError as te:
                log.exception(
                    'TypeError exception when finding files. {!s}'.format(
                        te.message))
                raise
Exemple #57
0
    def _get_givens_subset(self, subset, batch_slice):
        """
        This translates a batch slice of start and end indices into the actual data from the given subset.

        Parameters
        ----------
        subset : int
            The subset to use - determined in opendeep.data.datasets as TRAIN, VALID, or TEST attributes.
        batch_slice : symbolic slice
            The symbolic slice to grab from the data.

        Returns
        -------
        OrderedDict
            The givens to provide to a function where it sets the input variable to the actual batch representation
            of data from the dataset: (input_variable: data[batch])
        """
        # translate the data_idx into the givens for the model
        # first get the lists of input variables the model requires - inputs and targets
        model_inputs = raise_to_list(self.model.get_inputs())
        model_targets = raise_to_list(self.model.get_targets())
        givens = None
        if self.dataset.getSubset(subset)[0] is not None:
            # grab the data and labels
            data, labels = self.dataset.getSubset(subset)
            # create the givens for the input function as pairs of (input_variable: sliced_data)
            givens = OrderedDict(zip(model_inputs, [data[batch_slice]]))
            # include labels as well if they are required by the model
            if model_targets is not None and len(model_targets) > 0:
                if labels is None:
                    log.error("No labels in the dataset!")
                    raise AssertionError, "No lables in the dataset!"
                givens.update(OrderedDict(zip(model_targets, [labels[batch_slice]])))
        else:
            log.warning("Dataset doesn't have subset %s" % get_subset_strings(subset))

        return givens
Exemple #58
0
def L2(parameters):
    """
    L2 loss is also known as ridge regularization (for ridge regression). It is most commonly used in practice.

    :param parameters: parameters to apply the regularization
    :type parameters: theano variables

    :return: L1 applies to the theano variables
    :rtype: theano tensor
    """
    # make parameters into a list if it isn't (so we can do comprehension)
    parameters = raise_to_list(parameters)
    if parameters is not None:
        return T.sum([T.sum(parameter ** 2) for parameter in parameters])
    else:
        log.warning("None parameters passed to L2 regularizer!")
Exemple #59
0
    def get_noise_switch(self):
        """
        This method returns a list of shared theano variables representing switches for adding noise in the model.

        This is constructed by calling `get_noise_switch()` on every model in the Prototype.

        Returns
        -------
        list
            List of the shared variables representing switches to be turned on during training and off during f_run.
        """
        # Return the noise switches going through each model in the list
        noise_switches = []
        for model in self.models:
            noise_switches.extend(raise_to_list(model.get_noise_switch()))
        return noise_switches