Beispiel #1
0
        def _get_batches(self, tensors, formats, batch_indices):
            '''
            Extracts batches form self.dataset.tensors.

            Overrides superclass' _get_batch, because h5py.Dataset can't handle
            duplicate or out-of-order elements in batch_indices.
            '''
            if isinstance(batch_indices, slice):
                return super(H5Iterator, self)._get_batches(tensors,
                                                            formats,
                                                            batch_indices)

            assert_is_instance(batch_indices, numpy.ndarray)
            assert_all_integer(batch_indices)

            # pylint: disable=unbalanced-tuple-unpacking
            (unique_batch_indices,
             unique_to_batch_indices) = numpy.unique(batch_indices,
                                                     return_inverse=True)

            super_self = super(H5Iterator, self)

            unique_batches = super_self._get_batches(tensors,
                                                     formats,
                                                     unique_batch_indices)

            return super_self._get_batches(unique_batches,
                                           formats,
                                           unique_to_batch_indices)
Beispiel #2
0
    def _next(self):

        batch_indices = self._next_batch_indices()
        # pdb.set_trace()

        # sanity-check output of _next_batch_indices()
        if not isinstance(batch_indices, slice):
            assert_all_integer(batch_indices)

            if isinstance(batch_indices, numpy.ndarray):
                # Workaround to a bug in h5py.Dataset where indexing by a
                # length-1 ndarray is treated like indexing with the integer it
                # contains.
                if len(batch_indices) == 1:
                    batch_indices = tuple(batch_indices)
            else:
                assert_is_instance(batch_indices, collections.Sequence)

        result = tuple(self._get_batches(self.dataset.tensors,
                                         self.dataset.formats,
                                         batch_indices))

        # sanity-check size of batches
        for batch, fmt in safe_izip(result, self.dataset.formats):
            assert_equal(batch.shape[fmt.axes.index('b')], self.batch_size)

        return result
Beispiel #3
0
    def __init__(self, all_norb_labels):
        assert_true(numpy.issubdtype(all_norb_labels.dtype, numpy.integer))
        assert_equal(len(all_norb_labels.shape), 2)
        assert_in(all_norb_labels.shape[1], (5, 11))

        classes = all_norb_labels[:, 0]
        instances = all_norb_labels[:, 1]
        assert_all_integer(classes)
        assert_all_integer(instances)
        assert_greater_equal(classes.min(), 0)
        assert_greater_equal(instances.min(), 0)

        max_instance = int(instances.max())

        sparse_ids = classes * (max_instance + 1) + instances
        assert_true(numpy.all(sparse_ids >= instances), "integer overflow")

        sparse_id_to_dense_id = numpy.empty(sparse_ids.max() + 1,
                                            dtype='int32')
        sparse_id_to_dense_id[:] = -1

        unique_sparse_ids = numpy.asarray(list(frozenset(sparse_ids)))
        unique_sparse_ids.sort()
        sparse_id_to_dense_id[unique_sparse_ids] = \
            numpy.arange(len(unique_sparse_ids))

        self.__max_instance = max_instance
        self.sparse_id_to_dense_id = sparse_id_to_dense_id
        self.num_unique_ids = len(unique_sparse_ids)
Beispiel #4
0
def limit_param_norms(parameter_updater, param, max_norm, input_axes):
    '''
    Modifies the update of an SgdParameterUpdater to limit param L2 norms.

    Parameter norms are computed by summing over the input_axes, provided.
    These are so named because you typically want to sum over the axes
    that get dotted with the input to the node (e.g. input_axes=[0] for Linear,
    input_axes=[1, 2, 3] for Conv2D).

    Parameters
    ----------

    parameter_updater: simplelearn.training.ParameterUpdater
      The parameter updater whose updates this will modify.

    param: theano shared variable

      The parameter being updated by parameter_updater.

      (No way to get this from SgdParameterUpdater at present; it updates the
      parameter and its velocity, and there's no way to safely distinguish them
      in parameter_updates.update_pairs)

    max_norm: floating-point scalar
      The maximum L2 norm to be permitted for the parameters.

    input_axes: Sequence
      A Sequence of ints. The indices to sum over when computing the
      L2 norm of the updated params.
    '''

    assert_is_instance(parameter_updater, ParameterUpdater)
    assert_in(param, parameter_updater.update_pairs)

    assert_floating(max_norm)
    assert_greater(max_norm, 0.0)

    assert_greater(len(input_axes), 0)
    assert_all_integer(input_axes)
    assert_all_greater_equal(input_axes, 0)
    assert_all_less(input_axes, param.ndim)

    input_axes = numpy.asarray(input_axes)
    updated_param = parameter_updater.update_pairs[param]

    norms = T.sqrt(T.sum(T.sqr(updated_param),
                         axis=input_axes,
                         keepdims=True))
    desired_norms = T.clip(norms, 0, max_norm)

    broadcast_mask = numpy.zeros(param.ndim, dtype=bool)
    broadcast_mask[input_axes] = True
    scales = T.patternbroadcast(desired_norms / (1e-7 + norms),
                                broadcast_mask)

    parameter_updater.update_pairs[param] = updated_param * scales
Beispiel #5
0
    def elevation_label_to_radians(labels):
        '''
        Converts NORB elevation labels to radians.
        '''
        assert_equal(labels.ndim, 1)
        assert_all_integer(labels)

        result = (labels * 5.0 + 30.) / 180. * numpy.pi
        assert_true(numpy.all(result >= 0.0))
        assert_true(numpy.all(result <= (numpy.pi / 2.0)))

        return result
Beispiel #6
0
    def azimuth_label_to_radians(labels):
        '''
        Converts NORB azimuth labels to radians.

        Parameters:
        -----------

        labels
        '''
        assert_true(labels.ndim, 1)
        assert_all_integer(labels)

        result = (labels * 10.) / 180. * numpy.pi
        return result
Beispiel #7
0
def _norb_label_to_camera_direction(labels):
    '''
    Computes camera direction from NORB labels.

    This operates on numeric arrays, unlike the pylearn2 version which operates
    on Theano symbols.
    '''
    assert_false(isinstance(labels, theano.gof.Variable))
    assert_all_integer(labels)
    assert_equal(labels.ndim, 2)
    assert_in(labels.shape[1], (5, 11))

    def elevation_label_to_radians(labels):
        '''
        Converts NORB elevation labels to radians.
        '''
        assert_equal(labels.ndim, 1)
        assert_all_integer(labels)

        result = (labels * 5.0 + 30.) / 180. * numpy.pi
        assert_true(numpy.all(result >= 0.0))
        assert_true(numpy.all(result <= (numpy.pi / 2.0)))

        return result

    def azimuth_label_to_radians(labels):
        '''
        Converts NORB azimuth labels to radians.

        Parameters:
        -----------

        labels
        '''
        assert_true(labels.ndim, 1)
        assert_all_integer(labels)

        result = (labels * 10.) / 180. * numpy.pi
        return result

    elevations = elevation_label_to_radians(labels[:, 2])
    azimuths = azimuth_label_to_radians(labels[:, 3])
    rotated_vectors = rotate_unit_x_vector(elevations, azimuths)

    return numpy.cast[floatX](rotated_vectors)
Beispiel #8
0
def build_fc_classifier(input_node, sizes, sparse_init_counts, dropout_include_probabilities, rng, theano_rng):
    """
    Builds a stack of fully-connected layers followed by a Softmax.

    Each hidden layer will be preceded by a ReLU.

    Initialization:

    Weights are initialized in the same way as in Pylearn2's MLP tutorial:
    pylearn2/scripts/tutorials/multilayer_perceptron/mlp_tutorial_part_3.yaml

    This means the following:

    Of the N affine layers, the weights of the first N-1 are to all 0.0, except
    for k randomly-chosen elements, which are set to some random number drawn
    from the normal distribution with stddev=1.0.

    The biases are all initialized to 0.0.
    The last layer's weights and biases are both set to 0.0.

    Parameters
    ----------
    input_node: Node
      The node to build the stack on.

    sizes: Sequence
      A sequence of ints, indicating the output sizes of each layer.
      The last int is the number of classes.

    sparse_init_counts:
      A sequence of N-1 ints, where N = len(sizes).
      Used to initialize the weights of the first N-1 layers.
      If the n'th element is x, this means that the n'th layer
      will have x nonzeros, with the rest initialized to zeros.

    dropout_include_probabilities: Sequence
      A Sequence of N-1 floats, where N := len(sizes)
      The dropout include probabilities for the outputs of each of the layers,
      except for the final one.
      If any of these probabilities is 1.0, the corresponding Dropout node
      will be omitted.

    rng: numpy.random.RandomState
      The RandomState to draw initial weights from.

    theano_rng: theano.tensor.shared_randomstreams.RandomStreams
      The RandomStreams to draw dropout masks from.

    Returns
    -------
    rval: tuple
      (affine_nodes, output_node), where affine_nodes is a list of the
      AffineNodes, in order, and output_node is the final node, a Softmax.
    """
    assert_is_instance(input_node, Node)

    # pylint: disable=no-member
    assert_equal(input_node.output_format.dtype, numpy.dtype(theano.config.floatX))

    assert_greater(len(sizes), 0)
    assert_all_greater(sizes, 0)

    assert_equal(len(sparse_init_counts), len(sizes) - 1)
    assert_all_integer(sparse_init_counts)
    assert_all_greater(sparse_init_counts, 0)
    assert_all_less_equal(sparse_init_counts, sizes[:-1])

    assert_equal(len(dropout_include_probabilities), len(sizes))

    affine_nodes = []

    last_node = input_node

    for layer_index, layer_output_size in enumerate(sizes):
        # Add dropout, if asked for
        include_probability = dropout_include_probabilities[layer_index]
        if include_probability != 1.0:
            last_node = Dropout(last_node, include_probability, theano_rng)

        output_format = DenseFormat(axes=("b", "f"), shape=(-1, layer_output_size), dtype=None)

        if layer_index < (len(sizes) - 1):
            last_node = AffineLayer(last_node, output_format)
        else:
            last_node = SoftmaxLayer(last_node, output_format)

        affine_nodes.append(last_node.affine_node)

    # Not used in this demo, but keeping it in in case we want to start using
    # it again.
    def init_sparse_bias(shared_variable, num_nonzeros, rng):
        """
        Mimics the sparse initialization in
        pylearn2.models.mlp.Linear.set_input_space()
        """

        params = shared_variable.get_value()
        assert_equal(params.shape[0], 1)

        assert_greater_equal(num_nonzeros, 0)
        assert_less_equal(num_nonzeros, params.shape[1])

        params[...] = 0.0

        indices = rng.choice(params.size, size=num_nonzeros, replace=False)

        # normal dist with stddev=1.0
        params[0, indices] = rng.randn(num_nonzeros)

        # Found that for biases, this didn't help (it increased the
        # final misclassification rate by .001)
        # if num_nonzeros > 0:
        #     params /= float(num_nonzeros)

        shared_variable.set_value(params)

    def init_sparse_linear(shared_variable, num_nonzeros, rng):
        params = shared_variable.get_value()
        params[...] = 0.0

        assert_greater_equal(num_nonzeros, 0)
        assert_less_equal(num_nonzeros, params.shape[0])

        for c in xrange(params.shape[1]):
            indices = rng.choice(params.shape[0], size=num_nonzeros, replace=False)

            # normal dist with stddev=1.0, divided by 255.0
            #
            # We need to divide by 255 for convergence. This is because
            # we're using unnormalized (i.e. 0 to 255) pixel values, unlike the
            # 0.0-to-1.0 pixels in
            # pylearn2.scripts.tutorials.multilayer_perceptron/
            #
            # We could just do as the above tutorial does and normalize the
            # pixels to [0.0, 1.0], and not rescale the weights. However,
            # experiments show that this converges to a higher error, and also
            # makes mnist_visualizer.py's results look very "staticky", without
            # any recognizable digit hallucinations.
            params[indices, c] = rng.randn(num_nonzeros) / 255.0

        shared_variable.set_value(params)

    # Initialize the affine layer weights (not the biases, and not the softmax
    # weights)
    for sparse_init_count, affine_node in safe_izip(sparse_init_counts, affine_nodes[:-1]):
        # pylearn2 doesn't sparse_init the biases. I also found that
        # doing so slightly increases the final misclassification rate.
        init_sparse_linear(affine_node.linear_node.params, sparse_init_count, rng)

    return affine_nodes, last_node
def build_conv_classifier(input_node,
                          filter_shapes,
                          filter_counts,
                          filter_init_uniform_ranges,
                          pool_shapes,
                          pool_strides,
                          affine_output_sizes,
                          affine_init_stddevs,
                          dropout_include_rates,
                          conv_pads,
                          rng,
                          theano_rng):
    '''
    Builds a classification convnet on top of input_node.

    Returns
    -------
    rval: tuple
      (conv_nodes, affine_nodes, output_node), where:
         conv_nodes is a list of the Conv2d nodes.
         affine_nodes is a list of the AffineNodes.
         output_node is the final node, a Softmax.
    '''

    assert_is_instance(input_node, Lcn)

    conv_shape_args = (filter_shapes,
                       pool_shapes,
                       pool_strides)

    for conv_shapes in conv_shape_args:
        for conv_shape in conv_shapes:
            assert_all_integer(conv_shape)
            assert_all_greater(conv_shape, 0)

    conv_args = conv_shape_args + (filter_counts, filter_init_uniform_ranges)
    assert_all_equal([len(c) for c in conv_args])

    assert_equal(len(affine_output_sizes), len(affine_init_stddevs))

    assert_equal(len(dropout_include_rates),
                 len(filter_shapes) + len(affine_output_sizes))

    assert_equal(affine_output_sizes[-1], 10)  # for MNIST

    #assert_equal(input_node.output_format.axes, ('b', '0', '1'))

    #
    # Done sanity-checking args.
    #

    input_shape = input_node.output_format.shape

    # Converts from MNIST's ('b', '0', '1') to ('b', 'c', '0', '1')
    last_node = input_node

    conv_dropout_include_rates = \
        dropout_include_rates[:len(filter_shapes)]

    # Adds a dropout-conv-bias-relu-maxpool stack for each element in
    # filter_XXXX

    conv_layers = []

    def uniform_init(rng, params, init_range):
        '''
        Fills params with values uniformly sampled from
        [-init_range, init_range]
        '''

        assert_floating(init_range)
        assert_greater_equal(init_range, 0)

        values = params.get_value()
        values[...] = rng.uniform(low=-init_range,
                                  high=init_range,
                                  size=values.shape)
        params.set_value(values)

    for (filter_shape,
         filter_count,
         filter_init_range,
         pool_shape,
         pool_stride,
         conv_dropout_include_rate,
         conv_pads)                 in safe_izip(filter_shapes,
                                                 filter_counts,
                                                 filter_init_uniform_ranges,
                                                 pool_shapes,
                                                 pool_strides,
                                                 conv_dropout_include_rates,
                                                 conv_pads):
        if conv_dropout_include_rate != 1.0:
            last_node = Dropout(last_node,
                                conv_dropout_include_rate,
                                theano_rng)

        last_node = Conv2dLayer(last_node,
                                filter_shape,
                                filter_count,
                                conv_pads=conv_pads,
                                pool_window_shape=pool_shape,
                                pool_strides=pool_stride,
                                pool_pads='pylearn2')
        conv_layers.append(last_node)

        uniform_init(rng, last_node.conv2d_node.filters, filter_init_range)

    affine_dropout_include_rates = dropout_include_rates[len(filter_shapes):]

    affine_layers = []

    def normal_distribution_init(rng, params, stddev):
        '''
        Fills params with values uniformly sampled from
        [-init_range, init_range]
        '''

        assert_floating(stddev)
        assert_greater_equal(stddev, 0)

        values = params.get_value()
        values[...] = rng.standard_normal(values.shape) * stddev
        params.set_value(values)

    #
    # Adds a dropout-affine-relu stack for each element in affine_XXXX,
    # except for the last one, where it omits the dropout.
    #

    # Add a fully connected layer here:
    output_format = DenseFormat(axes=('b', 'f'),
                                    shape=(-1, 500),
                                    dtype=None)

    if affine_dropout_include_rates < 1.0:
        last_node = Dropout(last_node,
                            affine_dropout_include_rates,
                            theano_rng)

    last_node = AffineLayer(last_node, output_format, input_to_bf_map={('0', '1', 'c'): 'f'})
    affine_layers.append(last_node)
    normal_distribution_init(rng,
                         last_node.affine_node.linear_node.params,
                         0.05)
 

    for (affine_size,
         affine_init_stddev,
         affine_dropout_include_rate) in \
        safe_izip(affine_output_sizes,
                  affine_init_stddevs,
                  affine_dropout_include_rates):

        '''
        if affine_dropout_include_rate < 1.0:
            last_node = Dropout(last_node,
                                affine_dropout_include_rate,
                                theano_rng)
        '''
        # No need to supply an axis map for the first affine transform.
        # By default, it collapses all non-'b' axes into a feature vector,
        # which is what we want.

        # remap from bc01 to b01c before flattening to bf, as pylearn2 does,
        # just so that they do identical things.
        last_node = SoftmaxLayer(last_node,
                                 DenseFormat(axes=('b', 'f'),
                                             shape=(-1, affine_size),
                                             dtype=None))
                                 #input_to_bf_map={('0', '1', 'c'): 'f'})
        normal_distribution_init(rng,
                                 last_node.affine_node.linear_node.params,
                                 affine_init_stddev)
        # stddev_init(rng, last_node.bias_node.params, affine_init_stddev)
        affine_layers.append(last_node)

    return conv_layers, affine_layers, last_node
def build_conv_classifier(input_node,
                          filter_shapes,
                          filter_counts,
                          filter_init_uniform_ranges,
                          pool_shapes,
                          pool_strides,
                          affine_output_sizes,
                          affine_init_stddevs,
                          dropout_include_rates,
                          conv_pads,
                          rng,
                          theano_rng):
    '''
    Builds a classification convnet on top of input_node.

    Returns
    -------
    rval: tuple
      (conv_nodes, affine_nodes, output_node), where:
         conv_nodes is a list of the Conv2d nodes.
         affine_nodes is a list of the AffineNodes.
         output_node is the final node, a Softmax.
    '''

    assert_is_instance(input_node, Lcn)

    conv_shape_args = (filter_shapes,
                       pool_shapes,
                       pool_strides)

    for conv_shapes in conv_shape_args:
        for conv_shape in conv_shapes:
            assert_all_integer(conv_shape)
            assert_all_greater(conv_shape, 0)

    conv_args = conv_shape_args + (filter_counts, filter_init_uniform_ranges)
    assert_all_equal([len(c) for c in conv_args])

    assert_equal(len(affine_output_sizes), len(affine_init_stddevs))

    assert_equal(len(dropout_include_rates),
                 len(filter_shapes) + len(affine_output_sizes))

    assert_equal(affine_output_sizes[-1], 10)  # for MNIST

    #assert_equal(input_node.output_format.axes, ('b', '0', '1'))

    #
    # Done sanity-checking args.
    #

    input_shape = input_node.output_format.shape

    # Converts from MNIST's ('b', '0', '1') to ('b', 'c', '0', '1')
    last_node = input_node

    conv_dropout_include_rates = \
        dropout_include_rates[:len(filter_shapes)]

    # Adds a dropout-conv-bias-relu-maxpool stack for each element in
    # filter_XXXX

    conv_layers = []

    def uniform_init(rng, params, init_range):
        '''
        Fills params with values uniformly sampled from
        [-init_range, init_range]
        '''

        assert_floating(init_range)
        assert_greater_equal(init_range, 0)

        values = params.get_value()
        values[...] = rng.uniform(low=-init_range,
                                  high=init_range,
                                  size=values.shape)
        params.set_value(values)

    for (filter_shape,
         filter_count,
         filter_init_range,
         pool_shape,
         pool_stride,
         conv_dropout_include_rate,
         conv_pad)                 in safe_izip(filter_shapes,
                                                 filter_counts,
                                                 filter_init_uniform_ranges,
                                                 pool_shapes,
                                                 pool_strides,
                                                 conv_dropout_include_rates,
                                                 conv_pads):
        if conv_dropout_include_rate != 1.0:
            last_node = Dropout(last_node,
                                conv_dropout_include_rate,
                                theano_rng)

        last_node = Conv2dLayer(last_node,
                                filter_shape,
                                filter_count,
                                conv_pads=conv_pad,
                                pool_window_shape=pool_shape,
                                pool_strides=pool_stride,
                                pool_pads='pylearn2')
        conv_layers.append(last_node)

        uniform_init(rng, last_node.conv2d_node.filters, filter_init_range)

    affine_dropout_include_rates = dropout_include_rates[len(filter_shapes):]

    affine_layers = []

    def normal_distribution_init(rng, params, stddev):
        '''
        Fills params with values uniformly sampled from
        [-init_range, init_range]
        '''

        assert_floating(stddev)
        assert_greater_equal(stddev, 0)

        values = params.get_value()
        values[...] = rng.standard_normal(values.shape) * stddev
        params.set_value(values)

    #
    # Adds a dropout-affine-relu stack for each element in affine_XXXX,
    # except for the last one, where it omits the dropout.
    #

    # Add a fully connected layer here:
    output_format = DenseFormat(axes=('b', 'f'),
                                    shape=(-1, 500),
                                    dtype=None)

    if affine_dropout_include_rates < 1.0:
        last_node = Dropout(last_node,
                            affine_dropout_include_rates,
                            theano_rng)

    last_node = AffineLayer(last_node, output_format, input_to_bf_map={('0', '1', 'c'): 'f'})
    affine_layers.append(last_node)
    normal_distribution_init(rng,
                         last_node.affine_node.linear_node.params,
                         0.05)


    for (affine_size,
         affine_init_stddev,
         affine_dropout_include_rate) in \
        safe_izip(affine_output_sizes,
                  affine_init_stddevs,
                  affine_dropout_include_rates):

        '''
        if affine_dropout_include_rate < 1.0:
            last_node = Dropout(last_node,
                                affine_dropout_include_rate,
                                theano_rng)
        '''
        # No need to supply an axis map for the first affine transform.
        # By default, it collapses all non-'b' axes into a feature vector,
        # which is what we want.

        # remap from bc01 to b01c before flattening to bf, as pylearn2 does,
        # just so that they do identical things.
        last_node = SoftmaxLayer(last_node,
                                 DenseFormat(axes=('b', 'f'),
                                             shape=(-1, affine_size),
                                             dtype=None))
                                 #input_to_bf_map={('0', '1', 'c'): 'f'})
        normal_distribution_init(rng,
                                 last_node.affine_node.linear_node.params,
                                 affine_init_stddev)
        # stddev_init(rng, last_node.bias_node.params, affine_init_stddev)
        affine_layers.append(last_node)

    #################################################################################################
    ### BUILD THE SECOND NETWORK WITH FLAT PARAMETERS (given the dimensions of the first) ###########
    #################################################################################################

    rng = numpy.random.RandomState(281934)
    std_deviation = .05

    # Fetch all parameters and shapes
    parameters = []
    for conv_layer in conv_layers:
        filters = conv_layer.conv2d_node.filters
        parameters.append(filters)

        bias = conv_layer.bias_node.params
        parameters.append(bias)

    for affine_layer in affine_layers:
        weights = affine_layer.affine_node.linear_node.params
        parameters.append(weights)

        biases = affine_layer.affine_node.bias_node.params
        parameters.append(biases)

    '''
    print(len(parameters))
    for parameter in parameters:
        print(parameter.get_value().shape)

    shapes = []
    params_flat_values = numpy.asarray([], dtype=theano.config.floatX)
    counter = 0
    for parameter in parameters:
        shape = parameter.get_value().shape
        if counter%2 == 0 and len(shape)==4:
            vector_param = numpy.asarray(numpy.ndarray.flatten(parameter.get_value()), dtype=theano.config.floatX)
            vector_param[...] = rng.standard_normal(vector_param.shape) * std_deviation
            col_length = shape[2]
            index_from = 0

            ###
            #for _ in range(shape[0]*shape[1]*shape[3]):
            #    index_to = index_from + col_length
            #    vector_param[index_from:index_to] = vector_param[index_from:index_to]/numpy.linalg.norm(vector_param[index_from:index_to])
            #    index_from = index_to
            ####

        elif counter%2==0:
            vector_param = numpy.asarray(numpy.ndarray.flatten(parameter.get_value()), dtype=theano.config.floatX)
            vector_param[...] = rng.standard_normal(vector_param.shape) * std_deviation
        else:
            vector_param = numpy.asarray(numpy.ndarray.flatten(parameter.get_value()), dtype=theano.config.floatX)

        params_flat_values = numpy.append(params_flat_values, vector_param)
        shapes.append(shape)
    '''
    params_flat_values = numpy.asarray([], dtype=theano.config.floatX)
    shapes = []

    for parameter in parameters:
        parameter_value = parameter.get_value()
        shapes.append(parameter_value.shape)
        vector_param = numpy.asarray(numpy.ndarray.flatten(parameter_value))
        params_flat_values = numpy.append(params_flat_values, vector_param)
        print(parameter.get_value().shape)

    print(params_flat_values)
    print(params_flat_values.shape)

    params_flat = theano.shared(params_flat_values)
    params_old_flat = theano.shared(params_flat_values)

    '''
    Builds a classification convnet on top of input_node.

    Returns
    -------
    rval: tuple
      (conv_nodes, affine_nodes, output_node), where:
         conv_nodes is a list of the Conv2d nodes.
         affine_nodes is a list of the AffineNodes.
         output_node is the final node, a Softmax.
    '''

    assert_is_instance(input_node, Lcn)

    conv_shape_args = (filter_shapes,
                       pool_shapes,
                       pool_strides)

    for conv_shapes in conv_shape_args:
        for conv_shape in conv_shapes:
            assert_all_integer(conv_shape)
            assert_all_greater(conv_shape, 0)

    conv_args = conv_shape_args + (filter_counts, filter_init_uniform_ranges)
    assert_all_equal([len(c) for c in conv_args])

    assert_equal(len(affine_output_sizes), len(affine_init_stddevs))

    assert_equal(len(dropout_include_rates),
                 len(filter_shapes) + len(affine_output_sizes))

    assert_equal(affine_output_sizes[-1], 10)  # for MNIST

    #assert_equal(input_node.output_format.axes, ('b', '0', '1'))

    #
    # Done sanity-checking args.
    #

    input_shape = input_node.output_format.shape

    # Converts from MNIST's ('b', '0', '1') to ('b', 'c', '0', '1')
    last_node = input_node

    conv_dropout_include_rates = \
        dropout_include_rates[:len(filter_shapes)]

    # Adds a dropout-conv-bias-relu-maxpool stack for each element in
    # filter_XXXX

    conv_layers = []
    counter = 0
    index_from = 0

    for (filter_shape,
         filter_count,
         filter_init_range,
         pool_shape,
         pool_stride,
         conv_dropout_include_rate,
         conv_pad)                 in safe_izip(filter_shapes,
                                                 filter_counts,
                                                 filter_init_uniform_ranges,
                                                 pool_shapes,
                                                 pool_strides,
                                                 conv_dropout_include_rates,
                                                 conv_pads):
        if conv_dropout_include_rate != 1.0:
            last_node = Dropout(last_node,
                                conv_dropout_include_rate,
                                theano_rng)

        print(shapes)

        shape1 = shapes[counter]
        shape2 = shapes[counter+1]
        size1= numpy.prod(numpy.asarray(shape1))
        size2= numpy.prod(numpy.asarray(shape2))
        index_to = index_from + size1
        #filters_ = theano.tensor.transpose(params_flat[index_from:index_to].reshape(shape1), axes=[0,1,3,2])
        filters_ = params_flat[index_from:index_to].reshape(shape1)
        index_from = index_to
        index_to = index_from + size2
        bias_ = params_flat[index_from:index_to].reshape(shape2)
        index_from = index_to

        last_node = Conv2dLayer(last_node,
                                filter_shape,
                                filter_count,
                                conv_pads=conv_pad,
                                pool_window_shape=pool_shape,
                                pool_strides=pool_stride,
                                pool_pads='pylearn2',
                                filters=filters_,
                                bias=bias_)
        conv_layers.append(last_node)

        counter = counter + 2


    affine_dropout_include_rates = dropout_include_rates[len(filter_shapes):]

    affine_layers = []

    #
    # Adds a dropout-affine-relu stack for each element in affine_XXXX,
    # except for the last one, where it omits the dropout.
    #

    # Add a fully connected layer here:

    shape1 = shapes[counter]
    #shape1 = (shape1[1], shape1[0])
    shape2 = shapes[counter+1]
    size1= numpy.prod(numpy.asarray(shape1))
    size2= numpy.prod(numpy.asarray(shape2))
    index_to = index_from + size1
    weights_ = params_flat[index_from:index_to].reshape(shape1)
    index_from = index_to
    index_to = index_from + size2
    bias_ = params_flat[index_from:index_to].reshape(shape2)
    index_from = index_to

    output_format = DenseFormat(axes=('b', 'f'),
                                    shape=(-1, 500),
                                    dtype=None)

    if affine_dropout_include_rates < 1.0:
        last_node = Dropout(last_node,
                            affine_dropout_include_rates,
                            theano_rng)

    last_node = AffineLayer(last_node, output_format, weights=weights_, bias=bias_, input_to_bf_map={('0', '1', 'c'): 'f'})
    affine_layers.append(last_node)

    counter += 2

    for (affine_size,
         affine_init_stddev,
         affine_dropout_include_rate) in \
        safe_izip(affine_output_sizes,
                  affine_init_stddevs,
                  affine_dropout_include_rates):

        if affine_dropout_include_rate < 1.0:
            last_node = Dropout(last_node,
                                affine_dropout_include_rate,
                                theano_rng)

        # No need to supply an axis map for the first affine transform.
        # By default, it collapses all non-'b' axes into a feature vector,
        # which is what we want.

        shape1 = shapes[counter]
        #shape1 = (shape1[1], shape1[0])
        shape2 = shapes[counter+1]
        size1= numpy.prod(numpy.asarray(shape1))
        size2= numpy.prod(numpy.asarray(shape2))
        index_to = index_from + size1
        weights_ = params_flat[index_from:index_to].reshape(shape1)
        index_from = index_to
        index_to = index_from + size2
        bias_ = params_flat[index_from:index_to].reshape(shape2)
        index_from = index_to
        # remap from bc01 to b01c before flattening to bf, as pylearn2 does,
        # just so that they do identical things.
        last_node = SoftmaxLayer(last_node,
                                 DenseFormat(axes=('b', 'f'),
                                             shape=(-1, affine_size),
                                             dtype=None),
                                 weights=weights_,
                                 bias=bias_)
                                 #input_to_bf_map={('0', '1', 'c'): 'f'})

        # stddev_init(rng, last_node.bias_node.params, affine_init_stddev)
        affine_layers.append(last_node)

        counter += 2


    return conv_layers, affine_layers, last_node, params_flat, params_old_flat, shapes
Beispiel #11
0
def build_fc_classifier(input_node,
                        sizes,
                        sparse_init_counts,
                        dropout_include_probabilities,
                        rng,
                        theano_rng):
    '''
    Builds a stack of fully-connected layers followed by a Softmax.

    Each hidden layer will be preceded by a ReLU.

    Initialization:

    Weights are initialized in the same way as in Pylearn2's MLP tutorial:
    pylearn2/scripts/tutorials/multilayer_perceptron/mlp_tutorial_part_3.yaml

    This means the following:

    Of the N affine layers, the weights of the first N-1 are to all 0.0, except
    for k randomly-chosen elements, which are set to some random number drawn
    from the normal distribution with stddev=1.0.

    The biases are all initialized to 0.0.
    The last layer's weights and biases are both set to 0.0.

    Parameters
    ----------
    input_node: Node
      The node to build the stack on.

    sizes: Sequence
      A sequence of ints, indicating the output sizes of each layer.
      The last int is the number of classes.

    sparse_init_counts:
      A sequence of N-1 ints, where N = len(sizes).
      Used to initialize the weights of the first N-1 layers.
      If the n'th element is x, this means that the n'th layer
      will have x nonzeros, with the rest initialized to zeros.

    dropout_include_probabilities: Sequence
      A Sequence of N-1 floats, where N := len(sizes)
      The dropout include probabilities for the outputs of each of the layers,
      except for the final one.
      If any of these probabilities is 1.0, the corresponding Dropout node
      will be omitted.

    rng: numpy.random.RandomState
      The RandomState to draw initial weights from.

    theano_rng: theano.tensor.shared_randomstreams.RandomStreams
      The RandomStreams to draw dropout masks from.

    Returns
    -------
    rval: tuple
      (affine_nodes, output_node), where affine_nodes is a list of the
      AffineNodes, in order, and output_node is the final node, a Softmax.
    '''
    assert_is_instance(input_node, Node)

    # pylint: disable=no-member
    assert_equal(input_node.output_format.dtype,
                 numpy.dtype(theano.config.floatX))

    assert_greater(len(sizes), 0)
    assert_all_greater(sizes, 0)

    assert_equal(len(sparse_init_counts), len(sizes) - 1)
    assert_all_integer(sparse_init_counts)
    assert_all_greater(sparse_init_counts, 0)
    assert_all_less_equal(sparse_init_counts, sizes[:-1])

    assert_equal(len(dropout_include_probabilities), len(sizes))

    '''
    affine_nodes = []

    last_node = input_node

    for layer_index, layer_output_size in enumerate(sizes):
        # Add dropout, if asked for
        include_probability = dropout_include_probabilities[layer_index]
        if include_probability != 1.0:
            last_node = Dropout(last_node, include_probability, theano_rng)

        output_format = DenseFormat(axes=('b', 'f'),
                                    shape=(-1, layer_output_size),
                                    dtype=None)

        if layer_index < (len(sizes) - 1):
            last_node = AffineLayer(last_node, output_format)
        else:
            last_node = SoftmaxLayer(last_node, output_format)

        affine_nodes.append(last_node.affine_node)


    # Not used in this demo, but keeping it in in case we want to start using
    # it again.
    def init_sparse_bias(shared_variable, num_nonzeros, rng):

        #Mimics the sparse initialization in
        #pylearn2.models.mlp.Linear.set_input_space()


        params = shared_variable.get_value()
        assert_equal(params.shape[0], 1)

        assert_greater_equal(num_nonzeros, 0)
        assert_less_equal(num_nonzeros, params.shape[1])

        params[...] = 0.0

        indices = rng.choice(params.size,
                             size=num_nonzeros,
                             replace=False)

        # normal dist with stddev=1.0
        params[0, indices] = rng.randn(num_nonzeros)

        # Found that for biases, this didn't help (it increased the
        # final misclassification rate by .001)
        # if num_nonzeros > 0:
        #     params /= float(num_nonzeros)

        shared_variable.set_value(params)

    def init_sparse_linear(shared_variable, num_nonzeros, rng):
        params = shared_variable.get_value()
        params[...] = 0.0

        assert_greater_equal(num_nonzeros, 0)
        assert_less_equal(num_nonzeros, params.shape[0])

        for c in xrange(params.shape[1]):
            indices = rng.choice(params.shape[0],
                                 size=num_nonzeros,
                                 replace=False)

            # normal dist with stddev=1.0, divided by 255.0
            #
            # We need to divide by 255 for convergence. This is because
            # we're using unnormalized (i.e. 0 to 255) pixel values, unlike the
            # 0.0-to-1.0 pixels in
            # pylearn2.scripts.tutorials.multilayer_perceptron/
            #
            # We could just do as the above tutorial does and normalize the
            # pixels to [0.0, 1.0], and not rescale the weights. However,
            # experiments show that this converges to a higher error, and also
            # makes mnist_visualizer.py's results look very "staticky", without
            # any recognizable digit hallucinations.
            params[indices, c] = rng.randn(num_nonzeros) / 255.0

        shared_variable.set_value(params)

    # Initialize the affine layer weights (not the biases, and not the softmax
    # weights)
    for sparse_init_count, affine_node in safe_izip(sparse_init_counts,
                                                    affine_nodes[:-1]):
        # pylearn2 doesn't sparse_init the biases. I also found that
        # doing so slightly increases the final misclassification rate.
        init_sparse_linear(affine_node.linear_node.params,
                           sparse_init_count,
                           rng)

    #################################################################################################
    ### BUILD THE SECOND NETWORK WITH FLAT PARAMETERS (given the dimensions of the first) ###########
    #################################################################################################

    parameters = []
    shapes = []
    for affine_node in affine_nodes:
        weights = affine_node.linear_node.params
        bias = affine_node.bias_node.params
        parameters.append(weights)
        parameters.append(bias)
        shapes.append(weights.get_value().shape)
        shapes.append(bias.get_value().shape)

    params_flat_values = numpy.asarray([], dtype=theano.config.floatX)
    for parameter in parameters:
        vector_param = numpy.asarray(numpy.ndarray.flatten(parameter.get_value()), dtype=theano.config.floatX)
        params_flat_values = numpy.append(params_flat_values, vector_param)

    params_flat = theano.shared(params_flat_values)
    params_old_flat = theano.shared(params_flat_values)

    affine_nodes = []
    last_node = input_node
    counter = 0
    index_from = 0
    for layer_index, layer_output_size in enumerate(sizes):

        shape1 = shapes[counter]
        shape2 = shapes[counter+1]
        size1= numpy.prod(numpy.asarray(shape1))
        size2= numpy.prod(numpy.asarray(shape2))
        index_to = index_from + size1
        weights_ = params_flat[index_from:index_to].reshape(shape1)
        index_from = index_to
        index_to = index_from + size2
        bias_ = params_flat[index_from:index_to].reshape(shape2)
        index_from = index_to
        counter = counter + 2

        # Add dropout, if asked for
        include_probability = dropout_include_probabilities[layer_index]
        if include_probability != 1.0:
            last_node = Dropout(last_node, include_probability, theano_rng)

        output_format = DenseFormat(axes=('b', 'f'),
                                    shape=(-1, layer_output_size),
                                    dtype=None)

        if layer_index < (len(sizes) - 1):
            last_node = AffineLayer(last_node, output_format, weights=weights_, bias=bias_)
        else:
            last_node = SoftmaxLayer(last_node, output_format, weights=weights_, bias=bias_)

        affine_nodes.append(last_node.affine_node)

    return affine_nodes, last_node, params_flat, params_old_flat
    '''

    std_deviation = .05

    input_size = 784
    params_temp1 = [rng.standard_normal( (sizes[0]* input_size) ).astype(theano.config.floatX)*std_deviation,
                    numpy.zeros(sizes[0], dtype=theano.config.floatX) ]

    params_temp2 = sum([ [rng.standard_normal( sizes[i] * sizes[i+1] ).astype(theano.config.floatX)*std_deviation,
                          numpy.zeros(sizes[i+1], dtype=theano.config.floatX)] for i in range(len(sizes)-1) ],[] )

    params_flat_values = numpy.concatenate( params_temp1 + params_temp2 )

    params_flat = theano.shared(params_flat_values)
    params_old_flat = theano.shared(params_flat_values)

    shapes = []
    param_arrays = []
    index_to = input_size * sizes[0]
    param_arrays.append(params_flat[:index_to].reshape((sizes[0], input_size))) # Add weights
    shapes.append((input_size, sizes[0]))
    index_from = index_to
    index_to += sizes[0]
    param_arrays.append(params_flat[index_from:index_to]) # Add bias
    shapes.append((index_to-index_from, ))

    for i in range(len(sizes)-1):

        index_from = index_to
        index_to += sizes[i]*sizes[i+1]
        param_arrays.append(params_flat[index_from:index_to].reshape((sizes[i+1],sizes[i]))) # Add weight
        shapes.append((sizes[i], sizes[i+1]))
        #print(index_from, index_to)
        #print 'reshaped to'
        #print(sizes[i], sizes[i+1])
        index_from = index_to
        index_to += sizes[i+1]
        param_arrays.append(params_flat[index_from:index_to]) # Add bias
        shapes.append((index_to-index_from, ))

    layers = [input_node]

    for i in range(len(sizes)-1):  # repeat twice
        layers.append(AffineLayer(input_node=layers[-1],  # last element of <layers>
                                  output_format=DenseFormat(axes=('b', 'f'),  # axis order: (batch, feature)
                                                            shape=(-1, sizes[i]),   # output shape: (variable batch size, 10 classes)
                                                            dtype=None) ,   # don't change the input data type
                                  weights = theano.tensor.transpose(param_arrays[i*2]),
                                  bias = param_arrays[i*2+1]
                                  ))

    layers.append(SoftmaxLayer(input_node=layers[-1],
                               output_format=DenseFormat(axes=('b', 'f'),  # axis order: (batch, feature)
                                                         shape=(-1, sizes[i+1]),   # output shape: (variable batch size, 10 classes)
                                                         dtype=None),      # don't change the input data type
                               weights = theano.tensor.transpose(param_arrays[(i+1)*2]),
                               bias = param_arrays[(i+1)*2+1]
                               ))  # collapse the channel, row, and column axes to a single feature axis

    softmax_layer = layers[-1]

    last_node = softmax_layer
    affine_nodes = []
    for i in range(1,len(layers)):
        affine_nodes.append(layers[i].affine_node)

    print shapes

    return affine_nodes, last_node, params_flat, params_old_flat, shapes
Beispiel #12
0
def make_h5_file(path,
                 partition_names,
                 partition_sizes,
                 tensor_names,
                 tensor_formats):
    '''
    Creates a h5py.File with groups that can be wrapped by H5Dataset.

    Usage
    -----

    h5_file = make_hf_file(file_path, p_names, p_sizes, t_names, t_formats)
      1: Call this function to create a h5py.File object
      2: Fill the h5py.File's data tensors with appropriate data.
      3: Close the h5py.File, then re-open it using H5Dataset,
         a read-only dataset interface.

    Parameters
    ----------
    partition_names: Sequence
      Names of the sub-datasets, e.g. ['test', 'train'].
      May only contain alphanumeric characters and underscores, as
      load_h5_dataset() uses these names as NamedTuple names.

    partition_sizes: Sequence
      Number of examples in each sub-dataset, e.g. [50000, 10000] for
      MNIST.

    tensor_names: Sequence
      Names of the data tensors, e.g. ['images', 'labels']. Each
      sub-tensor uses the same tensor_names.

    tensor_formats: Sequence
      The DataFormats of the data tensors, e.g. (for MNIST):
      [DataFormat(axes=['b', '0', '1'], shape=[-1, 28, 28], dtype='uint8'),
       DataFormat(axes=['b'], shape=[-1], dtype='uint8')]

    The example parameter values above would create an h5py.File
    with the following hierarchical structure:

    hfpy.File/
      'partition_names': an h5py.Dataset of strings, ['test', 'train']
      'tensor_names': an h5py.Dataset of strings, ['images', 'labels']
      'partitions': an h5py.Group with the following members:
        'train': an h5py.Group, with the following members:
          'images': an h5py.Dataset tensor, with shape given by
                    partition_sizes[0] and tensor_formats[0].
          'labels': an h5py.Dataset tensor, with shape given by
                    partition_sizes[0] and tensor_formats[1].
        'test': an h5py.Group, with the following members:
          'images': an h5py.Dataset tensor, with shape given by
                    partition_sizes[1] and tensor_formats[0].
          'labels': an h5py.Dataset tensor, with shape given by
                    partition_sizes[1] and tensor_formats[1].
    '''

    assert_is_instance(path, basestring)
    assert_equal(os.path.splitext(path)[1], '.h5')
    absolute_path = os.path.abspath(path)
    assert_true(absolute_path.startswith(simplelearn.data.data_path),
                ("{} is not a subdirectory of simplelearn.data.data_path "
                 "{}").format(absolute_path, simplelearn.data.data_path))

    assert_all_is_instance(partition_names, basestring)
    assert_equal(len(frozenset(partition_names)), len(partition_names))
    for partition_name in partition_names:
        for char in partition_name:
            if not (char.isalnum() or char == "_"):
                raise ValueError("Partition name {} must contain only "
                                 "alphanumeric characters or "
                                 "underscores.".format(partition_name))

    assert_all_integer(partition_sizes)
    assert_all_greater_equal(partition_sizes, 0)

    assert_all_is_instance(tensor_names, basestring)
    assert_equal(len(frozenset(tensor_names)), len(tensor_names))

    assert_all_is_instance(tensor_formats, DenseFormat)
    for tensor_format in tensor_formats:
        assert_in('b', tensor_format.axes)

    # Done sanity-checking args

    h5_file = h5py.File(absolute_path, mode='w')

    # Add ordered lists of tensor/partition names, since h5py.Group.keys()
    # can't be trusted to list group members in the order that they were
    # added in.

    def add_ordered_names(list_name, names, group):
        '''
        Adds a list of names to a group, as a h5py.Dataset of strings.
        '''
        max_name_length = max([len(n) for n in names])
        string_dtype = 'S{}'.format(max_name_length)
        result = group.create_dataset(list_name,
                                      (len(names), ),
                                      dtype=string_dtype)
        for n, name in enumerate(names):
            result[n] = name

    # Not sure if storing partition order is necessary, but why not.
    add_ordered_names('partition_names', partition_names, h5_file)

    # Storing tensor order is definitely necessary.
    add_ordered_names('tensor_names', tensor_names, h5_file)

    partitions = h5_file.create_group('partitions')

    for partition_name, partition_size in safe_izip(partition_names,
                                                    partition_sizes):
        partition = partitions.create_group(partition_name)

        for tensor_name, tensor_format in safe_izip(tensor_names,
                                                    tensor_formats):
            tensor_shape = list(tensor_format.shape)
            tensor_shape[tensor_format.axes.index('b')] = partition_size

            # fletcher32: checksum against data corruption with tiny overhead.
            # http://docs.h5py.org/en/latest/high/dataset.html#fletcher32-filter
            tensor = partition.create_dataset(tensor_name,
                                              tensor_shape,
                                              tensor_format.dtype,
                                              fletcher32=True)

            # Label the tensor axes by their axis names in fmt.
            for index, axis in enumerate(tensor_format.axes):
                tensor.dims[index].label = axis

    return h5_file
def build_fc_classifier(input_node,
                        sizes,
                        sparse_init_counts,
                        dropout_include_probabilities,
                        rng,
                        theano_rng):
    '''
    Builds a stack of fully-connected layers followed by a Softmax.

    Each hidden layer will be preceded by a ReLU.

    Initialization:

    Weights are initialized in the same way as in Pylearn2's MLP tutorial:
    pylearn2/scripts/tutorials/multilayer_perceptron/mlp_tutorial_part_3.yaml

    This means the following:

    Of the N affine layers, the weights of the first N-1 are to all 0.0, except
    for k randomly-chosen elements, which are set to some random number drawn
    from the normal distribution with stddev=1.0.

    The biases are all initialized to 0.0.
    The last layer's weights and biases are both set to 0.0.

    Parameters
    ----------
    input_node: Node
      The node to build the stack on.

    sizes: Sequence
      A sequence of ints, indicating the output sizes of each layer.
      The last int is the number of classes.

    sparse_init_counts:
      A sequence of N-1 ints, where N = len(sizes).
      Used to initialize the weights of the first N-1 layers.
      If the n'th element is x, this means that the n'th layer
      will have x nonzeros, with the rest initialized to zeros.

    dropout_include_probabilities: Sequence
      A Sequence of N-1 floats, where N := len(sizes)
      The dropout include probabilities for the outputs of each of the layers,
      except for the final one.
      If any of these probabilities is 1.0, the corresponding Dropout node
      will be omitted.

    rng: numpy.random.RandomState
      The RandomState to draw initial weights from.

    theano_rng: theano.tensor.shared_randomstreams.RandomStreams
      The RandomStreams to draw dropout masks from.

    Returns
    -------
    rval: tuple
      (affine_nodes, output_node), where affine_nodes is a list of the
      AffineNodes, in order, and output_node is the final node, a Softmax.
    '''
    assert_is_instance(input_node, Node)
    assert_equal(input_node.output_format.dtype,
                 numpy.dtype(theano.config.floatX))

    assert_greater(len(sizes), 0)
    assert_all_greater(sizes, 0)

    assert_equal(len(sparse_init_counts), len(sizes) - 1)
    assert_all_integer(sparse_init_counts)
    assert_all_greater(sparse_init_counts, 0)
    assert_all_less_equal(sparse_init_counts, sizes[:-1])

    assert_equal(len(dropout_include_probabilities), len(sizes))

    affine_nodes = []

    last_node = input_node

    for layer_index, layer_output_size in enumerate(sizes):
        # Add dropout, if asked for
        include_probability = dropout_include_probabilities[layer_index]
        if include_probability != 1.0:
            last_node = Dropout(last_node, include_probability, theano_rng)

        output_format = DenseFormat(axes=('b', 'f'),
                                    shape=(-1, layer_output_size),
                                    dtype=None)

        if layer_index < (len(sizes) - 1):
            last_node = AffineLayer(last_node, output_format)
        else:
            last_node = SoftmaxLayer(last_node, output_format)

        affine_nodes.append(last_node.affine_node)

    def init_sparse_bias(shared_variable, num_nonzeros, rng):
        '''
        Mimics the sparse initialization in
        pylearn2.models.mlp.Linear.set_input_space()
        '''

        params = shared_variable.get_value()
        assert_equal(params.shape[0], 1)

        assert_greater_equal(num_nonzeros, 0)
        assert_less_equal(num_nonzeros, params.shape[1])

        params[...] = 0.0

        indices = rng.choice(params.size,
                             size=num_nonzeros,
                             replace=False)

        # normal dist with stddev=1.0
        params[0, indices] = rng.randn(num_nonzeros)

        # Found that for biases, this didn't help (it increased the
        # final misclassification rate by .001)
        # if num_nonzeros > 0:
        #     params /= float(num_nonzeros)

        shared_variable.set_value(params)

    def init_sparse_linear(shared_variable, num_nonzeros, rng):
        params = shared_variable.get_value()
        params[...] = 0.0

        assert_greater_equal(num_nonzeros, 0)
        assert_less_equal(num_nonzeros, params.shape[0])

        for c in xrange(params.shape[1]):
            indices = rng.choice(params.shape[0],
                                 size=num_nonzeros,
                                 replace=False)

            # normal dist with stddev=1.0
            params[indices, c] = rng.randn(num_nonzeros)

        # TODO: it's somewhat worrisome that the tutorial in
        # pylearn2.scripts.tutorials.multilayer_perceptron/
        #   multilayer_perceptron.ipynb
        # seems to do fine without scaling the weights like this
        if num_nonzeros > 0:
            params /= float(num_nonzeros)
            # Interestingly, while this seems more correct (normalize
            # columns to norm=1), it prevents the NN from converging.
            # params /= numpy.sqrt(float(num_nonzeros))

        shared_variable.set_value(params)

    # Initialize the affine layer weights (not the biases, and not the softmax
    # weights)
    for sparse_init_count, affine_node in safe_izip(sparse_init_counts,
                                                    affine_nodes[:-1]):
        # pylearn2 doesn't sparse_init the biases. I also found that
        # doing so slightly increases the final misclassification rate.
        init_sparse_linear(affine_node.linear_node.params,
                           sparse_init_count,
                           rng)

    return affine_nodes, last_node
def make_instance_dataset(norb_name,
                          a_norb,
                          b_norb,
                          test_elevation_stride,
                          test_azimuth_stride,
                          objects=None):
    '''
    Creates instance recognition datasets from category recognition datasets.

    Merges two category recognition datasets (with disjoint object instances),
    and re-partitions them into instance recognition datasets (with disjoint
    camera views).

    The instance recognition dataset consists of a train and test set.

    All objects not selected by <objects> are ignored.

    Of the remaining images, he test set consists of all images that satisfy
    both the test_elevation_stride and test_azimuth_stride. The other
    images are used for the training set.

    If the category datset is in stereo, only the left stereo images are used.

    Parameters
    ----------
    norb_name: str
      The name of the category recognition dataset (e.g. 'big_norb'). Used to
      build the name of the instance recognition dataset. Alphanumeric
      characters and '_' only.

    a_norb: NORB Dataset
      One of the category recognition datasets (i.e. training set).

    b_norb: NORB Dataset
      The other category recognition dataset (i.e. testing set).

    test_elevation_stride: int
      Use every M'th elevation as a test image.

    test_azimuth_stride: int
      Use every N'th azimuth as a test image.

    objects: Sequence
      [(c0, i0), (c1, i1), ..., (cN, iN)]
      Each (cx, ix) pair specifies an object to include, by their
      class and instance labels cx and ix.

    Returns
    -------
    rval: str
      The path to the newly created .h5 file.
    '''

    assert_is_instance(norb_name, basestring)
    assert_all_true(c.isalnum() or c == '_' for c in norb_name)

    assert_is_instance(a_norb, Dataset)
    assert_is_instance(b_norb, Dataset)
    assert_all_equal(a_norb.names, b_norb.names)
    assert_all_equal(a_norb.formats, b_norb.formats)

    assert_integer(test_elevation_stride)
    assert_greater(test_elevation_stride, 0)

    assert_integer(test_azimuth_stride)
    assert_greater(test_azimuth_stride, 0)

    if objects is not None:
        assert_is_instance(objects, Sequence)
        for id_pair in objects:
            assert_equal(len(id_pair), 2)
            assert_all_integer(id_pair)
            assert_all_greater_equal(id_pair, 0)

    #
    # Done sanity-checking args
    #

    (category_index,
     instance_index,
     azimuth_index,
     elevation_index) = range(4)  # no need for lighting_index (= 4)

    def get_row_indices(labels,
                        test_elevation_stride,
                        test_azimuth_stride,
                        objects):
        '''
        Returns row indices or training and testing sets.
        '''

        logical_and = numpy.logical_and

        if objects is not None:
            objects = numpy.asarray(objects)
            obj_cols = (category_index, instance_index)
            object_mask = (labels[:, obj_cols] == objects).all(axis=1)
        else:
            object_mask = numpy.ones(labels.shape[0], dtype=bool)

        test_mask = logical_and(
            object_mask,
            (labels[:, elevation_index] % test_elevation_stride) == 0)

        test_mask = logical_and(
            test_mask,
            (labels[:, azimuth_index] % (test_azimuth_stride * 2)) == 0)

        train_mask = logical_and(object_mask, numpy.logical_not(test_mask))

        return tuple(numpy.nonzero(m)[0] for m in (train_mask, test_mask))

    a_train_indices, a_test_indices = get_row_indices(a_norb.tensors[1],
                                                      test_elevation_stride,
                                                      test_azimuth_stride,
                                                      objects)

    b_train_indices, b_test_indices = get_row_indices(b_norb.tensors[1],
                                                      test_elevation_stride,
                                                      test_azimuth_stride,
                                                      objects)

    def create_h5_filepath(norb_name,
                           test_elevation_stride,
                           test_azimuth_stride,
                           objects):
        '''
        Creates an hdf filepath based on the args.

        For which-norb: "big_norb", elevation_stride: 2, azimuth_stride: 1:
          <data_dir>/big_norb_instance/e2_a1_all.h5

        For same as above, but with objects: [[1, 2], [3, 7], [4, 1]]
          <data_dir>/big_norb_instance/e2_a1_1-2_3-7_4-1.h5
        '''
        output_dir = os.path.join(simplelearn.data.data_path,
                                  '{}_instance'.format(norb_name))

        if not os.path.isdir(output_dir):
            os.mkdir(output_dir)

        filename = "e{:02d}_a{:02d}_o_".format(test_elevation_stride,
                                               test_azimuth_stride)

        if objects is None:
            filename = filename + 'all'
        else:
            for id_pair in objects:
                filename = filename + "%d-%d" % tuple(id_pair)

        filename = filename + '.h5'

        return os.path.join(output_dir, filename)

    h5_path = create_h5_filepath(norb_name,
                                 test_elevation_stride,
                                 test_azimuth_stride,
                                 objects)

    def get_mono_format(input_image_format):
        axes = input_image_format.axes
        shape = input_image_format.shape

        if 's' in axes:
            s_index = axes.index('s')
            axes = list(axes)
            del axes[s_index]

            shape = list(shape)
            del shape[s_index]

        return DenseFormat(axes=axes,
                           shape=shape,
                           dtype=input_image_format.dtype)

    mono_image_format = get_mono_format(a_norb.formats[0])
    label_format = a_norb.formats[1]
    partition_names = ['train', 'test']
    partition_sizes = [len(a_train_indices) + len(b_train_indices),
                       len(a_test_indices) + len(b_test_indices)]
    train_indices = (a_train_indices, b_train_indices)
    test_indices = (a_test_indices, b_test_indices)

    # Creates a .h5 file and copies repartitioned data into it.
    with make_h5_file(h5_path,
                      partition_names,
                      partition_sizes,
                      a_norb.names,
                      [mono_image_format, label_format]) as h5_file:
        partitions = h5_file['partitions']

        for partition_name, (a_indices, b_indices) \
            in safe_izip(partition_names, [train_indices, test_indices]):

            partition = partitions[partition_name]

            a_images = a_norb.tensors[0]
            b_images = b_norb.tensors[0]
            out_images = partition['images']

            print("Copying {} partition.".format(partition_name))

            if 's' in a_norb.formats[0].axes:
                assert_equal(a_norb.formats[0].axes.index('s'), 1)

                out_images[:len(a_indices), ...] = a_images[a_indices, 0, ...]
                out_images[len(a_indices):, ...] = b_images[b_indices, 0, ...]
            else:
                out_images[:len(a_indices), ...] = a_images[a_indices, ...]
                out_images[len(a_indices):, ...] = b_images[b_indices, ...]

            a_labels = a_norb.tensors[1]
            b_labels = b_norb.tensors[1]
            out_labels = partition['labels']

            out_labels[:len(a_indices), :] = a_labels[a_indices, :]
            out_labels[len(a_indices):, :] = b_labels[b_indices, :]

            # Don't shuffle the data; that's the iterator's job.

    return h5_path