Exemple #1
0
def test_np_format_as_vector2conv2D():
    vector_space = VectorSpace(dim=8*8*3, sparse=False)
    conv2d_space = Conv2DSpace(shape=(8,8), num_channels=3,
                               axes=('b','c',0,1))
    data = np.arange(5*8*8*3).reshape(5, 8*8*3)
    rval = vector_space.np_format_as(data, conv2d_space)
    assert np.all(rval == data.reshape((5,3,8,8)))
    def get_weights_topo(self):
        """
        Returns a topological view of the weights, the first half
        corresponds to wxf and the second half to wyf.

        Returns
        -------
        weights : ndarray
            Same as the return value of `get_weights` but formatted as a 4D
            tensor with the axes being (hidden/factor units, rows, columns,
            channels).The the number of channels is either 1 or 3
            (because they will be visualized as grayscale or RGB color).
            At the moment the function only supports factors whose sqrt
            is exact.
        """
        if (not isinstance(self.input_space.components[0], Conv2DSpace) or
                not isinstance(self.input_space.components[1], Conv2DSpace)):
            raise NotImplementedError()
        wxf = self.wxf.get_value(borrow=False).T
        wyf = self.wyf.get_value(borrow=False).T
        convx = self.input_space.components[0]
        convy = self.input_space.components[1]
        vecx = VectorSpace(self.nvisx)
        vecy = VectorSpace(self.nvisy)
        wxf_view = vecx.np_format_as(
            wxf,
            Conv2DSpace(convx.shape,
                        num_channels=convx.num_channels,
                        axes=('b', 0, 1, 'c')))
        wyf_view = vecy.np_format_as(
            wyf,
            Conv2DSpace(convy.shape,
                        num_channels=convy.num_channels,
                        axes=('b', 0, 1, 'c')))
        h = int(numpy.ceil(numpy.sqrt(self.nfac)))
        new_weights = numpy.zeros((wxf_view.shape[0] * 2, wxf_view.shape[1],
                                   wxf_view.shape[2], wxf_view.shape[3]),
                                  dtype=wxf_view.dtype)
        t = 0
        while t < (self.nfac // h):
            filter_pair = numpy.concatenate(
                (wxf_view[h * t:h * (t + 1), ...], wyf_view[h * t:h *
                                                            (t + 1), ...]), 0)
            new_weights[h * 2 * t:h * 2 * (t + 1), ...] = filter_pair
            t += 1
        return new_weights
    def get_weights_topo(self):
        """
        Returns a topological view of the weights, the first half
        corresponds to wxf and the second half to wyf.

        Returns
        -------
        weights : ndarray
            Same as the return value of `get_weights` but formatted as a 4D
            tensor with the axes being (hidden/factor units, rows, columns,
            channels).The the number of channels is either 1 or 3
            (because they will be visualized as grayscale or RGB color).
            At the moment the function only supports factors whose sqrt
            is exact.
        """
        if not isinstance(self.input_space.components[0], Conv2DSpace) or not isinstance(
            self.input_space.components[1], Conv2DSpace
        ):
            raise NotImplementedError()
        wxf = self.wxf.get_value(borrow=False).T
        wyf = self.wyf.get_value(borrow=False).T
        convx = self.input_space.components[0]
        convy = self.input_space.components[1]
        vecx = VectorSpace(self.nvisx)
        vecy = VectorSpace(self.nvisy)
        wxf_view = vecx.np_format_as(
            wxf, Conv2DSpace(convx.shape, num_channels=convx.num_channels, axes=("b", 0, 1, "c"))
        )
        wyf_view = vecy.np_format_as(
            wyf, Conv2DSpace(convy.shape, num_channels=convy.num_channels, axes=("b", 0, 1, "c"))
        )
        h = int(numpy.ceil(numpy.sqrt(self.nfac)))
        new_weights = numpy.zeros(
            (wxf_view.shape[0] * 2, wxf_view.shape[1], wxf_view.shape[2], wxf_view.shape[3]), dtype=wxf_view.dtype
        )
        t = 0
        while t < (self.nfac // h):
            filter_pair = numpy.concatenate((wxf_view[h * t : h * (t + 1), ...], wyf_view[h * t : h * (t + 1), ...]), 0)
            new_weights[h * 2 * t : h * 2 * (t + 1), ...] = filter_pair
            t += 1
        return new_weights
Exemple #4
0
def test_np_format_as_conv2d_vector_conv2d():
    conv2d_space1 = Conv2DSpace(shape=(8, 8), num_channels=3,
                                axes=('c', 'b', 1, 0))
    vector_space = VectorSpace(dim=8*8*3, sparse=False)
    conv2d_space0 = Conv2DSpace(shape=(8, 8), num_channels=3,
                                axes=('b', 'c', 0, 1))
    data = np.arange(5*8*8*3).reshape(5, 3, 8, 8)

    vecval = conv2d_space0.np_format_as(data, vector_space)
    rval1 = vector_space.np_format_as(vecval, conv2d_space1)
    rval2 = conv2d_space0.np_format_as(data, conv2d_space1)
    assert np.allclose(rval1, rval2)

    nval = data.transpose(1, 0, 3, 2)
    assert np.allclose(nval, rval1)
Exemple #5
0
def test_np_format_as_vector2conv2D():
    vector_space = VectorSpace(dim=8*8*3, sparse=False)
    conv2d_space = Conv2DSpace(shape=(8, 8), num_channels=3,
                               axes=('b', 'c', 0, 1))
    data = np.arange(5*8*8*3).reshape(5, 8*8*3)
    rval = vector_space.np_format_as(data, conv2d_space)

    # Get data in a Conv2DSpace with default axes
    new_axes = conv2d_space.default_axes
    axis_to_shape = {'b': 5, 'c': 3, 0: 8, 1: 8}
    new_shape = tuple([axis_to_shape[ax] for ax in new_axes])
    nval = data.reshape(new_shape)
    # Then transpose
    nval = nval.transpose(*[new_axes.index(ax) for ax in conv2d_space.axes])
    assert np.all(rval == nval)
Exemple #6
0
def test_np_format_as_vector2conv2D():
    vector_space = VectorSpace(dim=8 * 8 * 3, sparse=False)
    conv2d_space = Conv2DSpace(shape=(8, 8),
                               num_channels=3,
                               axes=('b', 'c', 0, 1))
    data = np.arange(5 * 8 * 8 * 3).reshape(5, 8 * 8 * 3)
    rval = vector_space.np_format_as(data, conv2d_space)

    # Get data in a Conv2DSpace with default axes
    new_axes = conv2d_space.default_axes
    axis_to_shape = {'b': 5, 'c': 3, 0: 8, 1: 8}
    new_shape = tuple([axis_to_shape[ax] for ax in new_axes])
    nval = data.reshape(new_shape)
    # Then transpose
    nval = nval.transpose(*[new_axes.index(ax) for ax in conv2d_space.axes])
    assert np.all(rval == nval)
Exemple #7
0
def test_np_format_as_conv2D_vector_conv2D():
    conv2d_space1 = Conv2DSpace(shape=(8, 8),
                                num_channels=3,
                                axes=('c', 'b', 1, 0))
    vector_space = VectorSpace(dim=8 * 8 * 3, sparse=False)
    conv2d_space0 = Conv2DSpace(shape=(8, 8),
                                num_channels=3,
                                axes=('b', 'c', 0, 1))
    data = np.arange(5 * 8 * 8 * 3).reshape(5, 3, 8, 8)

    vecval = conv2d_space0.np_format_as(data, vector_space)
    rval1 = vector_space.np_format_as(vecval, conv2d_space1)
    rval2 = conv2d_space0.np_format_as(data, conv2d_space1)
    assert np.allclose(rval1, rval2)

    nval = data.transpose(1, 0, 3, 2)
    assert np.allclose(nval, rval1)
Exemple #8
0
def test_np_format_as_vector2vector():
    vector_space_initial = VectorSpace(dim=8*8*3, sparse=False)
    vector_space_final = VectorSpace(dim=8*8*3, sparse=False)
    data = np.arange(5*8*8*3).reshape(5, 8*8*3)
    rval = vector_space_initial.np_format_as(data, vector_space_final)
    assert np.all(rval == data)
Exemple #9
0
def test_np_format_as_vector2vector():
    vector_space_initial = VectorSpace(dim=8 * 8 * 3, sparse=False)
    vector_space_final = VectorSpace(dim=8 * 8 * 3, sparse=False)
    data = np.arange(5 * 8 * 8 * 3).reshape(5, 8 * 8 * 3)
    rval = vector_space_initial.np_format_as(data, vector_space_final)
    assert np.all(rval == data)
Exemple #10
0
class StereoViewConverter(object):

    """
    Converts stereo image data between two formats:
      A) A dense design matrix, one stereo pair per row (VectorSpace)
      B) An image pair (CompositeSpace of two Conv2DSpaces)

    Parameters
    ----------
    shape : tuple
    See doc for __init__'s <shape> parameter.
    """

    def __init__(self, shape, axes=None):
        """
        The arguments describe how the data is laid out in the design matrix.

        Parameters
        ----------

        shape : tuple
          A tuple of 4 ints, describing the shape of each datum.
          This is the size of each axis in <axes>, excluding the 'b' axis.

        axes : tuple
          A tuple of the following elements in any order:
            'b'  batch axis
            's'  stereo axis
             0   image axis 0 (row)
             1   image axis 1 (column)
            'c'  channel axis
        """
        shape = tuple(shape)

        if not all(isinstance(s, int) for s in shape):
            raise TypeError("Shape must be a tuple/list of ints")

        if len(shape) != 4:
            raise ValueError("Shape array needs to be of length 4, got %s." %
                             shape)

        datum_axes = list(axes)
        datum_axes.remove('b')
        if shape[datum_axes.index('s')] != 2:
            raise ValueError("Expected 's' axis to have size 2, got %d.\n"
                             "  axes:       %s\n"
                             "  shape:      %s" %
                             (shape[datum_axes.index('s')],
                              axes,
                              shape))
        self.shape = shape
        self.set_axes(axes)

        def make_conv2d_space(shape, axes):
            shape_axes = list(axes)
            shape_axes.remove('b')
            image_shape = tuple(shape[shape_axes.index(axis)]
                                for axis in (0, 1))
            conv2d_axes = list(axes)
            conv2d_axes.remove('s')
            return Conv2DSpace(shape=image_shape,
                               num_channels=shape[shape_axes.index('c')],
                               axes=conv2d_axes,
                               dtype=None)

        conv2d_space = make_conv2d_space(shape, axes)
        self.topo_space = CompositeSpace((conv2d_space, conv2d_space))
        self.storage_space = VectorSpace(dim=numpy.prod(shape))

    def get_formatted_batch(self, batch, space):
        """
        Returns a batch formatted to a space.

        Parameters
        ----------

        batch : ndarray
        The batch to format

        space : a pylearn2.space.Space
        The target space to format to.
        """
        return self.storage_space.np_format_as(batch, space)

    def design_mat_to_topo_view(self, design_mat):
        """
        Called by DenseDesignMatrix.get_formatted_view(), get_batch_topo()

        Parameters
        ----------

        design_mat : ndarray
        """
        return self.storage_space.np_format_as(design_mat, self.topo_space)

    def design_mat_to_weights_view(self, design_mat):
        """
        Called by DenseDesignMatrix.get_weights_view()

        Parameters
        ----------

        design_mat : ndarray
        """
        return self.design_mat_to_topo_view(design_mat)

    def topo_view_to_design_mat(self, topo_batch):
        """
        Used by DenseDesignMatrix.set_topological_view(), .get_design_mat()

        Parameters
        ----------

        topo_batch : ndarray
        """
        return self.topo_space.np_format_as(topo_batch, self.storage_space)

    def view_shape(self):
        """
        TODO: write documentation.
        """
        return self.shape

    def weights_view_shape(self):
        """
        TODO: write documentation.
        """
        return self.view_shape()

    def set_axes(self, axes):
        """
        Change the order of the axes.

        Parameters
        ----------

        axes : tuple
        Must have length 5, must contain 'b', 's', 0, 1, 'c'.
        """
        axes = tuple(axes)

        if len(axes) != 5:
            raise ValueError("Axes must have 5 elements; got %s" % str(axes))

        for required_axis in ('b', 's', 0, 1, 'c'):
            if required_axis not in axes:
                raise ValueError("Axes must contain 'b', 's', 0, 1, and 'c'. "
                                 "Got %s." % str(axes))

        if axes.index('b') != 0:
            raise ValueError("The 'b' axis must come first (axes = %s)." %
                             str(axes))

        def remove_b_axis(axes):
            axes = list(axes)
            axes.remove('b')
            return tuple(axes)

        if hasattr(self, 'axes'):
            # Reorders the shape vector to match the new axis ordering.
            assert hasattr(self, 'shape')
            old_axes = remove_b_axis(self.axes)  # pylint: disable-msg=E0203
            new_axes = remove_b_axis(axes)
            new_shape = tuple(self.shape[old_axes.index(a)] for a in new_axes)
            self.shape = new_shape

        self.axes = axes
Exemple #11
0
class StereoViewConverter(object):
    """
    Converts stereo image data between two formats:

    #. A dense design matrix, one stereo pair per row (`VectorSpace`)
    #. An image pair (`CompositeSpace` of two `Conv2DSpace`)

    The arguments describe how the data is laid out in the design matrix.

    Parameters
    ----------
    shape: tuple
        A tuple of 4 ints, describing the shape of each datum. This is the size
        of each axis in `<axes>`, excluding the `b` axis.
    axes : tuple
        Tuple of the following elements in any order:

        * 'b' : batch axis
        * 's' : stereo axis
        *  0  : image axis 0 (row)
        *  1  : image axis 1 (column)
        * 'c' : channel axis
    """
    def __init__(self, shape, axes=None):
        shape = tuple(shape)

        if not all(isinstance(s, int) for s in shape):
            raise TypeError("Shape must be a tuple/list of ints")

        if len(shape) != 4:
            raise ValueError("Shape array needs to be of length 4, got %s." %
                             shape)

        datum_axes = list(axes)
        datum_axes.remove('b')
        if shape[datum_axes.index('s')] != 2:
            raise ValueError("Expected 's' axis to have size 2, got %d.\n"
                             "  axes:       %s\n"
                             "  shape:      %s" %
                             (shape[datum_axes.index('s')], axes, shape))
        self.shape = shape
        self.set_axes(axes)

        def make_conv2d_space(shape, axes):
            shape_axes = list(axes)
            shape_axes.remove('b')
            image_shape = tuple(shape[shape_axes.index(axis)]
                                for axis in (0, 1))
            conv2d_axes = list(axes)
            conv2d_axes.remove('s')
            return Conv2DSpace(shape=image_shape,
                               num_channels=shape[shape_axes.index('c')],
                               axes=conv2d_axes)

        conv2d_space = make_conv2d_space(shape, axes)
        self.topo_space = CompositeSpace((conv2d_space, conv2d_space))
        self.storage_space = VectorSpace(dim=numpy.prod(shape))

    def get_formatted_batch(self, batch, space):
        return self.storage_space.np_format_as(batch, space)

    def design_mat_to_topo_view(self, design_mat):
        """
        Called by DenseDesignMatrix.get_formatted_view(), get_batch_topo()
        """
        return self.storage_space.np_format_as(design_mat, self.topo_space)

    def design_mat_to_weights_view(self, design_mat):
        """
        Called by DenseDesignMatrix.get_weights_view()
        """
        return self.design_mat_to_topo_view(design_mat)

    def topo_view_to_design_mat(self, topo_batch):
        """
        Used by `DenseDesignMatrix.set_topological_view()` and
        `DenseDesignMatrix.get_design_mat()`.
        """
        return self.topo_space.np_format_as(topo_batch, self.storage_space)

    def view_shape(self):
        return self.shape

    def weights_view_shape(self):
        return self.view_shape()

    def set_axes(self, axes):
        axes = tuple(axes)

        if len(axes) != 5:
            raise ValueError("Axes must have 5 elements; got %s" % str(axes))

        for required_axis in ('b', 's', 0, 1, 'c'):
            if required_axis not in axes:
                raise ValueError("Axes must contain 'b', 's', 0, 1, and 'c'. "
                                 "Got %s." % str(axes))

        if axes.index('b') != 0:
            raise ValueError("The 'b' axis must come first (axes = %s)." %
                             str(axes))

        def get_batchless_axes(axes):
            axes = list(axes)
            axes.remove('b')
            return tuple(axes)

        if hasattr(self, 'axes'):
            # Reorders the shape vector to match the new axis ordering.
            assert hasattr(self, 'shape')
            old_axes = get_batchless_axes(self.axes)
            new_axes = get_batchless_axes(axes)
            new_shape = tuple(self.shape[old_axes.index(a)] for a in new_axes)
            self.shape = new_shape

        self.axes = axes
Exemple #12
0
class HingeLoss(Layer):
    def __init__(self,
                 n_classes,
                 layer_name,
                 irange=None,
                 istdev=None,
                 no_affine=False,
                 sparse_init=None):

        super(HingeLoss, self).__init__()

        self.__dict__.update(locals())
        del self.self

        self.output_space = VectorSpace(n_classes)

        if not self.no_affine:
            self.b = sharedX(np.zeros((n_classes, )), name='hingeloss_b')

    def get_monitoring_channels(self):

        if self.no_affine:
            return OrderedDict()

        W = self.W

        assert W.ndim == 2

        sq_W = T.sqr(W)

        row_norms = T.sqrt(sq_W.sum(axis=1))
        col_norms = T.sqrt(sq_W.sum(axis=0))

        return OrderedDict([
            ('row_norms_min', row_norms.min()),
            ('row_norms_mean', row_norms.mean()),
            ('row_norms_max', row_norms.max()),
            ('col_norms_min', col_norms.min()),
            ('col_norms_mean', col_norms.mean()),
            ('col_norms_max', col_norms.max()),
        ])

    @wraps(Layer.get_layer_monitoring_channels)
    def get_layer_monitoring_channels(self,
                                      state_below=None,
                                      state=None,
                                      targets=None):

        # channels that does not require state information
        #         if self.no_affine:
        #             rval = OrderedDict()
        #
        #         W = self.W
        #
        #         assert W.ndim == 2
        #
        #         sq_W = T.sqr(W)
        #
        #         row_norms = T.sqrt(sq_W.sum(axis=1))
        #         col_norms = T.sqrt(sq_W.sum(axis=0))
        #
        #         rval = OrderedDict([('row_norms_min',  row_norms.min()),
        #                             ('row_norms_mean', row_norms.mean()),
        #                             ('row_norms_max',  row_norms.max()),
        #                             ('col_norms_min',  col_norms.min()),
        #                             ('col_norms_mean', col_norms.mean()),
        #                             ('col_norms_max',  col_norms.max()), ])

        rval = OrderedDict()
        if (state_below is not None) or (state is not None):
            if state is None:
                state = self.fprop(state_below)

            mx = state.max(axis=1)

            rval.update(
                OrderedDict([('mean_max_class', mx.mean()),
                             ('max_max_class', mx.max()),
                             ('min_max_class', mx.min())]))

            if targets is not None:
                y_hat = self.target_convert(T.argmax(state, axis=1))
                #Assume target is in [0,1] as binary one-hot
                y = self.target_convert(T.argmax(targets, axis=1))
                misclass = T.neq(y, y_hat).mean()
                misclass = T.cast(misclass, config.floatX)
                rval['misclass'] = misclass
                rval['nll'] = self.cost(Y_hat=state, Y=targets)

        return rval

    def get_monitoring_channels_from_state(self, state, target=None):
        warnings.warn("Layer.get_monitoring_channels_from_state is " + \
                    "deprecated. Use get_layer_monitoring_channels " + \
                    "instead. Layer.get_monitoring_channels_from_state " + \
                    "will be removed on or after september 24th 2014",
                    stacklevel=2)

        mx = state.max(axis=1)

        rval = OrderedDict([('mean_max_class', mx.mean()),
                            ('max_max_class', mx.max()),
                            ('min_max_class', mx.min())])

        if target is not None:
            y_hat = self.target_convert(T.argmax(state, axis=1))
            #Assume target is in [0,1] as binary one-hot
            y = self.target_convert(T.argmax(target, axis=1))
            misclass = T.neq(y, y_hat).mean()
            misclass = T.cast(misclass, config.floatX)
            rval['misclass'] = misclass
            rval['nll'] = self.cost(Y_hat=state, Y=target)

        return rval

    def set_input_space(self, space):
        self.input_space = space

        if not isinstance(space, Space):
            raise TypeError("Expected Space, got " + str(space) + " of type " +
                            str(type(space)))

        self.input_dim = space.get_total_dimension()
        self.needs_reformat = not isinstance(space, VectorSpace)

        desired_dim = self.input_dim
        self.desired_space = VectorSpace(desired_dim)

        if not self.needs_reformat:
            assert self.desired_space == self.input_space

        rng = self.mlp.rng

        if self.no_affine:
            self._params = []
        else:
            if self.irange is not None:
                assert self.istdev is None
                assert self.sparse_init is None
                W = rng.uniform(-self.irange, self.irange,
                                (self.input_dim, self.n_classes))
            elif self.istdev is not None:
                assert self.sparse_init is None
                W = rng.randn(self.input_dim, self.n_classes) * self.istdev
            else:
                assert self.sparse_init is not None
                W = np.zeros((self.input_dim, self.n_classes))
                for i in xrange(self.n_classes):
                    for j in xrange(self.sparse_init):
                        idx = rng.randint(0, self.input_dim)
                        while W[idx, i] != 0.:
                            idx = rng.randint(0, self.input_dim)
                        W[idx, i] = rng.randn()

            self.W = sharedX(W, 'hingeloss_W')

            self._params = [self.b, self.W]

    def get_weights_topo(self):
        if not isinstance(self.input_space, Conv2DSpace):
            raise NotImplementedError()
        desired = self.W.get_value().T
        ipt = self.desired_space.np_format_as(desired, self.input_space)
        rval = Conv2DSpace.convert_numpy(ipt, self.input_space.axes,
                                         ('b', 0, 1, 'c'))
        return rval

    def get_weights(self):
        if not isinstance(self.input_space, VectorSpace):
            raise NotImplementedError()

        return self.W.get_value()

    def set_weights(self, weights):
        self.W.set_value(weights)

    def set_biases(self, biases):
        self.b.set_value(biases)

    def get_biases(self):
        return self.b.get_value()

    def get_weights_format(self):
        return ('v', 'h')

    def fprop(self, state_below):
        self.input_space.validate(state_below)

        if self.needs_reformat:
            state_below = self.input_space.format_as(state_below,
                                                     self.desired_space)

        for value in get_debug_values(state_below):
            if self.mlp.batch_size is not None and value.shape[
                    0] != self.mlp.batch_size:
                raise ValueError("state_below should have batch size " +
                                 str(self.dbm.batch_size) + " but has " +
                                 str(value.shape[0]))

        self.desired_space.validate(state_below)
        assert state_below.ndim == 2

        if not hasattr(self, 'no_affine'):
            self.no_affine = False

        if self.no_affine:
            rval = state_below
        else:
            assert self.W.ndim == 2
            b = self.b
            W = self.W

            rval = T.dot(state_below, W) + b

        for value in get_debug_values(rval):
            if self.mlp.batch_size is not None:
                assert value.shape[0] == self.mlp.batch_size

        return rval

    def target_convert(self, Y):
        '''
        converts target [0,1] to [-1, 1]
        '''
        Y_t = 2. * Y - 1.
        return Y_t

    # def hinge_cost(self, W, Y, Y_hat, C=1.):
    def hinge_cost(self, Y, Y_hat):
        #prob = .5 * T.dot(self.W.T, self.W) + C * (T.maximum(1 - Y * Y_hat, 0) ** 2.).sum(axis=1)
        prob = (T.maximum(1 - Y * Y_hat, 0)**2.).sum(axis=1)
        return prob

    def cost(self, Y, Y_hat):
        """
        Y must be one-hot binary. Y_hat is a hinge loss estimate.
        of Y.
        """

        assert hasattr(Y_hat, 'owner')
        owner = Y_hat.owner
        assert owner is not None
        op = owner.op
        if isinstance(op, Print):
            assert len(owner.inputs) == 1
            Y_hat, = owner.inputs
            owner = Y_hat.owner
            op = owner.op
        assert Y_hat.ndim == 2
        Y_t = self.target_convert(Y)
        # prob = self.hinge_cost(self.W, Y_t, Y_hat)
        prob = self.hinge_cost(Y_t, Y_hat)
        assert prob.ndim == 1
        rval = prob.mean()

        return rval

    def cost_matrix(self, Y, Y_hat):
        """
        Y must be one-hot binary. Y_hat is a hinge loss estimate.
        of Y.
        """

        assert hasattr(Y_hat, 'owner')
        owner = Y_hat.owner
        assert owner is not None
        op = owner.op
        if isinstance(op, Print):
            assert len(owner.inputs) == 1
            Y_hat, = owner.inputs
            owner = Y_hat.owner
            op = owner.op

        assert Y_hat.ndim == 2
        Y_t = self.target_convert(Y)
        # prob = self.hinge_cost(self.W, Y_t, Y_hat)
        prob = self.hinge_cost(Y_t, Y_hat)
        return prob

    def get_weight_decay(self, coeff):
        if isinstance(coeff, str):
            coeff = float(coeff)
        assert isinstance(coeff, float) or hasattr(coeff, 'dtype')
        return coeff * T.sqr(self.W).sum()

    def get_l1_weight_decay(self, coeff):
        if isinstance(coeff, str):
            coeff = float(coeff)
        assert isinstance(coeff, float) or hasattr(coeff, 'dtype')
        W = self.W
        return coeff * abs(W).sum()

    @wraps(Layer._modify_updates)
    def _modify_updates(self, updates):

        if self.no_affine:
            return
Exemple #13
0
def train(d=None):
    train_X = np.array(d.train_X)
    train_y = np.array(d.train_Y)
    valid_X = np.array(d.valid_X)
    valid_y = np.array(d.valid_Y)
    test_X = np.array(d.test_X)
    test_y = np.array(d.test_Y)
    nb_classes = len(np.unique(train_y))
    train_y = convert_one_hot(train_y)
    valid_y = convert_one_hot(valid_y)
    # train_set = RotationalDDM(X=train_X, y=train_y)
    train_set = DenseDesignMatrix(X=train_X, y=train_y)
    valid_set = DenseDesignMatrix(X=valid_X, y=valid_y)
    print 'Setting up'
    batch_size = 100
    c0 = mlp.ConvRectifiedLinear(
        layer_name='c0',
        output_channels=64,
        irange=.05,
        kernel_shape=[5, 5],
        pool_shape=[4, 4],
        pool_stride=[2, 2],
        # W_lr_scale=0.25,
        max_kernel_norm=1.9365)
    c1 = mlp.ConvRectifiedLinear(
        layer_name='c1',
        output_channels=64,
        irange=.05,
        kernel_shape=[5, 5],
        pool_shape=[4, 4],
        pool_stride=[2, 2],
        # W_lr_scale=0.25,
        max_kernel_norm=1.9365)
    c2 = mlp.ConvRectifiedLinear(
        layer_name='c2',
        output_channels=64,
        irange=.05,
        kernel_shape=[5, 5],
        pool_shape=[4, 4],
        pool_stride=[5, 4],
        W_lr_scale=0.25,
        # max_kernel_norm=1.9365
    )
    sp0 = mlp.SoftmaxPool(
        detector_layer_dim=16,
        layer_name='sp0',
        pool_size=4,
        sparse_init=512,
    )
    sp1 = mlp.SoftmaxPool(
        detector_layer_dim=16,
        layer_name='sp1',
        pool_size=4,
        sparse_init=512,
    )
    r0 = mlp.RectifiedLinear(
        layer_name='r0',
        dim=512,
        sparse_init=512,
    )
    r1 = mlp.RectifiedLinear(
        layer_name='r1',
        dim=512,
        sparse_init=512,
    )
    s0 = mlp.Sigmoid(
        layer_name='s0',
        dim=500,
        # max_col_norm=1.9365,
        sparse_init=15,
    )
    out = mlp.Softmax(
        n_classes=nb_classes,
        layer_name='output',
        irange=.0,
        # max_col_norm=1.9365,
        # sparse_init=nb_classes,
    )
    epochs = EpochCounter(100)
    layers = [s0, out]
    decay_coeffs = [.00005, .00005, .00005]
    in_space = Conv2DSpace(
        shape=[d.size, d.size],
        num_channels=1,
    )
    vec_space = VectorSpace(d.size**2)
    nn = mlp.MLP(
        layers=layers,
        # input_space=in_space,
        nvis=d.size**2,
        # batch_size=batch_size,
    )
    trainer = sgd.SGD(
        learning_rate=0.01,
        # cost=SumOfCosts(costs=[
        # dropout.Dropout(),
        #     MethodCost(method='cost_from_X'),
        # WeightDecay(decay_coeffs),
        # ]),
        # cost=MethodCost(method='cost_from_X'),
        batch_size=batch_size,
        # train_iteration_mode='even_shuffled_sequential',
        termination_criterion=epochs,
        # learning_rule=learning_rule.Momentum(init_momentum=0.5),
    )
    trainer = bgd.BGD(
        batch_size=10000,
        line_search_mode='exhaustive',
        conjugate=1,
        updates_per_batch=10,
        termination_criterion=epochs,
    )
    lr_adjustor = LinearDecayOverEpoch(
        start=1,
        saturate=10,
        decay_factor=.1,
    )
    momentum_adjustor = learning_rule.MomentumAdjustor(
        final_momentum=.99,
        start=1,
        saturate=10,
    )
    trainer.setup(nn, train_set)
    print 'Learning'
    test_X = vec_space.np_format_as(test_X, nn.get_input_space())
    train_X = vec_space.np_format_as(train_X, nn.get_input_space())
    i = 0
    X = nn.get_input_space().make_theano_batch()
    Y = nn.fprop(X)
    predict = theano.function([X], Y)
    best = -40
    best_iter = -1
    while trainer.continue_learning(nn):
        print '--------------'
        print 'Training Epoch ' + str(i)
        trainer.train(dataset=train_set)
        nn.monitor()
        print 'Evaluating...'
        predictions = convert_categorical(predict(train_X[:2000]))
        score = accuracy_score(convert_categorical(train_y[:2000]),
                               predictions)
        print 'Score on train: ' + str(score)
        predictions = convert_categorical(predict(test_X))
        score = accuracy_score(test_y, predictions)
        print 'Score on test: ' + str(score)
        best, best_iter = (best, best_iter) if best > score else (score, i)
        print 'Current best: ' + str(best) + ' at iter ' + str(best_iter)
        print classification_report(test_y, predictions)
        print 'Adjusting parameters...'
        # momentum_adjustor.on_monitor(nn, valid_set, trainer)
        # lr_adjustor.on_monitor(nn, valid_set, trainer)
        i += 1
        print ' '
Exemple #14
0
class StereoViewConverter(object):
    """
    Converts stereo image data between two formats:

    #. A dense design matrix, one stereo pair per row (`VectorSpace`)
    #. An image pair (`CompositeSpace` of two `Conv2DSpace`)

    The arguments describe how the data is laid out in the design matrix.

    Parameters
    ----------
    shape: tuple
        A tuple of 4 ints, describing the shape of each datum. This is the size
        of each axis in `<axes>`, excluding the `b` axis.
    axes : tuple
        Tuple of the following elements in any order:

        * 'b' : batch axis
        * 's' : stereo axis
        *  0  : image axis 0 (row)
        *  1  : image axis 1 (column)
        * 'c' : channel axis
    """
    def __init__(self, shape, axes=None):
        shape = tuple(shape)

        if not all(isinstance(s, int) for s in shape):
            raise TypeError("Shape must be a tuple/list of ints")

        if len(shape) != 4:
            raise ValueError("Shape array needs to be of length 4, got %s." %
                             shape)

        datum_axes = list(axes)
        datum_axes.remove('b')
        if shape[datum_axes.index('s')] != 2:
            raise ValueError("Expected 's' axis to have size 2, got %d.\n"
                             "  axes:       %s\n"
                             "  shape:      %s" %
                             (shape[datum_axes.index('s')],
                              axes,
                              shape))
        self.shape = shape
        self.set_axes(axes)

        def make_conv2d_space(shape, axes):
            shape_axes = list(axes)
            shape_axes.remove('b')
            image_shape = tuple(shape[shape_axes.index(axis)]
                                for axis in (0, 1))
            conv2d_axes = list(axes)
            conv2d_axes.remove('s')
            return Conv2DSpace(shape=image_shape,
                               num_channels=shape[shape_axes.index('c')],
                               axes=conv2d_axes)

        conv2d_space = make_conv2d_space(shape, axes)
        self.topo_space = CompositeSpace((conv2d_space, conv2d_space))
        self.storage_space = VectorSpace(dim=numpy.prod(shape))

    def get_formatted_batch(self, batch, space):
        """
        .. todo::

            WRITEME
        """
        return self.storage_space.np_format_as(batch, space)

    def design_mat_to_topo_view(self, design_mat):
        """
        Called by DenseDesignMatrix.get_formatted_view(), get_batch_topo()
        """
        return self.storage_space.np_format_as(design_mat, self.topo_space)

    def design_mat_to_weights_view(self, design_mat):
        """
        Called by DenseDesignMatrix.get_weights_view()
        """
        return self.design_mat_to_topo_view(design_mat)

    def topo_view_to_design_mat(self, topo_batch):
        """
        Used by `DenseDesignMatrix.set_topological_view()` and
        `DenseDesignMatrix.get_design_mat()`.
        """
        return self.topo_space.np_format_as(topo_batch, self.storage_space)

    def view_shape(self):
        """
        .. todo::

            WRITEME
        """
        return self.shape

    def weights_view_shape(self):
        """
        .. todo::

            WRITEME
        """
        return self.view_shape()

    def set_axes(self, axes):
        """
        .. todo::

            WRITEME
        """
        axes = tuple(axes)

        if len(axes) != 5:
            raise ValueError("Axes must have 5 elements; got %s" % str(axes))

        for required_axis in ('b', 's', 0, 1, 'c'):
            if required_axis not in axes:
                raise ValueError("Axes must contain 'b', 's', 0, 1, and 'c'. "
                                 "Got %s." % str(axes))

        if axes.index('b') != 0:
            raise ValueError("The 'b' axis must come first (axes = %s)." %
                             str(axes))

        def get_batchless_axes(axes):
            axes = list(axes)
            axes.remove('b')
            return tuple(axes)

        if hasattr(self, 'axes'):
            # Reorders the shape vector to match the new axis ordering.
            assert hasattr(self, 'shape')
            old_axes = get_batchless_axes(self.axes)
            new_axes = get_batchless_axes(axes)
            new_shape = tuple(self.shape[old_axes.index(a)] for a in new_axes)
            self.shape = new_shape

        self.axes = axes
Exemple #15
0
class HingeLoss(Layer):

    def __init__(self, n_classes, layer_name, irange = None,
                 istdev = None,
                 no_affine=False,
                 sparse_init = None):
        
        super(HingeLoss, self).__init__();

        self.__dict__.update(locals())
        del self.self

        self.output_space = VectorSpace(n_classes)

        if not self.no_affine:
            self.b = sharedX(np.zeros((n_classes,)), name = 'hingeloss_b')

    def get_monitoring_channels(self):

        if self.no_affine:
            return OrderedDict()

        W = self.W

        assert W.ndim == 2

        sq_W = T.sqr(W)

        row_norms = T.sqrt(sq_W.sum(axis=1))
        col_norms = T.sqrt(sq_W.sum(axis=0))

        return OrderedDict([
                            ('row_norms_min'  , row_norms.min()),
                            ('row_norms_mean' , row_norms.mean()),
                            ('row_norms_max'  , row_norms.max()),
                            ('col_norms_min'  , col_norms.min()),
                            ('col_norms_mean' , col_norms.mean()),
                            ('col_norms_max'  , col_norms.max()),
                            ])

    @wraps(Layer.get_layer_monitoring_channels)
    def get_layer_monitoring_channels(self, state_below=None,
                                    state=None, targets=None):

        # channels that does not require state information
#         if self.no_affine:
#             rval = OrderedDict()
#
#         W = self.W
# 
#         assert W.ndim == 2
# 
#         sq_W = T.sqr(W)
# 
#         row_norms = T.sqrt(sq_W.sum(axis=1))
#         col_norms = T.sqrt(sq_W.sum(axis=0))
# 
#         rval = OrderedDict([('row_norms_min',  row_norms.min()),
#                             ('row_norms_mean', row_norms.mean()),
#                             ('row_norms_max',  row_norms.max()),
#                             ('col_norms_min',  col_norms.min()),
#                             ('col_norms_mean', col_norms.mean()),
#                             ('col_norms_max',  col_norms.max()), ])

        rval = OrderedDict()
        if (state_below is not None) or (state is not None):
            if state is None:
                state = self.fprop(state_below)

            mx = state.max(axis=1)

            rval.update(OrderedDict([
                                ('mean_max_class', mx.mean()),
                                ('max_max_class', mx.max()),
                                ('min_max_class', mx.min())]))

            if targets is not None:
                y_hat = self.target_convert(T.argmax(state, axis=1))
                #Assume target is in [0,1] as binary one-hot
                y = self.target_convert(T.argmax(targets, axis=1))
                misclass = T.neq(y, y_hat).mean()
                misclass = T.cast(misclass, config.floatX)
                rval['misclass'] = misclass
                rval['nll'] = self.cost(Y_hat=state, Y=targets)

        return rval


    def get_monitoring_channels_from_state(self, state, target=None):
        warnings.warn("Layer.get_monitoring_channels_from_state is " + \
                    "deprecated. Use get_layer_monitoring_channels " + \
                    "instead. Layer.get_monitoring_channels_from_state " + \
                    "will be removed on or after september 24th 2014",
                    stacklevel=2)

        mx = state.max(axis=1)

        rval =  OrderedDict([
                ('mean_max_class' , mx.mean()),
                ('max_max_class' , mx.max()),
                ('min_max_class' , mx.min())
        ])

        if target is not None:
            y_hat = self.target_convert(T.argmax(state, axis=1))
            #Assume target is in [0,1] as binary one-hot
            y = self.target_convert(T.argmax(target, axis=1))
            misclass = T.neq(y, y_hat).mean()
            misclass = T.cast(misclass, config.floatX)
            rval['misclass'] = misclass
            rval['nll'] = self.cost(Y_hat=state, Y=target)

        return rval

    def set_input_space(self, space):
        self.input_space = space

        if not isinstance(space, Space):
            raise TypeError("Expected Space, got "+
                    str(space)+" of type "+str(type(space)))

        self.input_dim = space.get_total_dimension()
        self.needs_reformat = not isinstance(space, VectorSpace)

        desired_dim = self.input_dim
        self.desired_space = VectorSpace(desired_dim)

        if not self.needs_reformat:
            assert self.desired_space == self.input_space

        rng = self.mlp.rng

        if self.no_affine:
            self._params = []
        else:
            if self.irange is not None:
                assert self.istdev is None
                assert self.sparse_init is None
                W = rng.uniform(-self.irange,self.irange, (self.input_dim,self.n_classes))
            elif self.istdev is not None:
                assert self.sparse_init is None
                W = rng.randn(self.input_dim, self.n_classes) * self.istdev
            else:
                assert self.sparse_init is not None
                W = np.zeros((self.input_dim, self.n_classes))
                for i in xrange(self.n_classes):
                    for j in xrange(self.sparse_init):
                        idx = rng.randint(0, self.input_dim)
                        while W[idx, i] != 0.:
                            idx = rng.randint(0, self.input_dim)
                        W[idx, i] = rng.randn()

            self.W = sharedX(W,  'hingeloss_W' )

            self._params = [ self.b, self.W ]

    def get_weights_topo(self):
        if not isinstance(self.input_space, Conv2DSpace):
            raise NotImplementedError()
        desired = self.W.get_value().T
        ipt = self.desired_space.np_format_as(desired, self.input_space)
        rval = Conv2DSpace.convert_numpy(ipt,
                                         self.input_space.axes,
                                         ('b', 0, 1, 'c'))
        return rval

    def get_weights(self):
        if not isinstance(self.input_space, VectorSpace):
            raise NotImplementedError()

        return self.W.get_value()

    def set_weights(self, weights):
        self.W.set_value(weights)

    def set_biases(self, biases):
        self.b.set_value(biases)

    def get_biases(self):
        return self.b.get_value()

    def get_weights_format(self):
        return ('v', 'h')

    def fprop(self, state_below):
        self.input_space.validate(state_below)

        if self.needs_reformat:
            state_below = self.input_space.format_as(state_below, self.desired_space)

        for value in get_debug_values(state_below):
            if self.mlp.batch_size is not None and value.shape[0] != self.mlp.batch_size:
                raise ValueError("state_below should have batch size "+str(self.dbm.batch_size)+" but has "+str(value.shape[0]))

        self.desired_space.validate(state_below)
        assert state_below.ndim == 2

        if not hasattr(self, 'no_affine'):
            self.no_affine = False

        if self.no_affine:
            rval = state_below
        else:
            assert self.W.ndim == 2
            b = self.b
            W = self.W

            rval = T.dot(state_below, W) + b

        for value in get_debug_values(rval):
            if self.mlp.batch_size is not None:
                assert value.shape[0] == self.mlp.batch_size

        return rval

    def target_convert(self, Y):
        '''
        converts target [0,1] to [-1, 1]
        '''
        Y_t = 2. * Y - 1.
        return Y_t

    # def hinge_cost(self, W, Y, Y_hat, C=1.):
    def hinge_cost(self, Y, Y_hat):
        #prob = .5 * T.dot(self.W.T, self.W) + C * (T.maximum(1 - Y * Y_hat, 0) ** 2.).sum(axis=1)
        prob = (T.maximum(1 - Y * Y_hat, 0) ** 2.).sum(axis=1)
        return prob

    def cost(self, Y, Y_hat):
        """
        Y must be one-hot binary. Y_hat is a hinge loss estimate.
        of Y.
        """

        assert hasattr(Y_hat, 'owner')
        owner = Y_hat.owner
        assert owner is not None
        op = owner.op
        if isinstance(op, Print):
            assert len(owner.inputs) == 1
            Y_hat, = owner.inputs
            owner = Y_hat.owner
            op = owner.op
        assert Y_hat.ndim == 2
        Y_t = self.target_convert(Y)
        # prob = self.hinge_cost(self.W, Y_t, Y_hat)
        prob = self.hinge_cost(Y_t, Y_hat)
        assert prob.ndim == 1
        rval = prob.mean()

        return rval


    def cost_matrix(self, Y, Y_hat):
        """
        Y must be one-hot binary. Y_hat is a hinge loss estimate.
        of Y.
        """

        assert hasattr(Y_hat, 'owner')
        owner = Y_hat.owner
        assert owner is not None
        op = owner.op
        if isinstance(op, Print):
            assert len(owner.inputs) == 1
            Y_hat, = owner.inputs
            owner = Y_hat.owner
            op = owner.op

        assert Y_hat.ndim == 2
        Y_t = self.target_convert(Y)
        # prob = self.hinge_cost(self.W, Y_t, Y_hat)
        prob = self.hinge_cost(Y_t, Y_hat)
        return prob


    def get_weight_decay(self, coeff):
        if isinstance(coeff, str):
            coeff = float(coeff)
        assert isinstance(coeff, float) or hasattr(coeff, 'dtype')
        return coeff * T.sqr(self.W).sum()

    def get_l1_weight_decay(self, coeff):
        if isinstance(coeff, str):
            coeff = float(coeff)
        assert isinstance(coeff, float) or hasattr(coeff, 'dtype')
        W = self.W
        return coeff * abs(W).sum()

    @wraps(Layer._modify_updates)
    def _modify_updates(self, updates):

        if self.no_affine:
            return
Exemple #16
0
class StereoViewConverter(object):
    """
    Converts stereo image data between two formats:
      A) A dense design matrix, one stereo pair per row (VectorSpace)
      B) An image pair (CompositeSpace of two Conv2DSpaces)

    Parameters
    ----------
    shape : tuple
    See doc for __init__'s <shape> parameter.
    """
    def __init__(self, shape, axes=None):
        """
        The arguments describe how the data is laid out in the design matrix.

        Parameters
        ----------

        shape : tuple
          A tuple of 4 ints, describing the shape of each datum.
          This is the size of each axis in <axes>, excluding the 'b' axis.

        axes : tuple
          A tuple of the following elements in any order:
            'b'  batch axis
            's'  stereo axis
             0   image axis 0 (row)
             1   image axis 1 (column)
            'c'  channel axis
        """
        shape = tuple(shape)

        if not all(isinstance(s, int) for s in shape):
            raise TypeError("Shape must be a tuple/list of ints")

        if len(shape) != 4:
            raise ValueError("Shape array needs to be of length 4, got %s." %
                             shape)

        datum_axes = list(axes)
        datum_axes.remove('b')
        if shape[datum_axes.index('s')] != 2:
            raise ValueError("Expected 's' axis to have size 2, got %d.\n"
                             "  axes:       %s\n"
                             "  shape:      %s" %
                             (shape[datum_axes.index('s')], axes, shape))
        self.shape = shape
        self.set_axes(axes)

        def make_conv2d_space(shape, axes):
            shape_axes = list(axes)
            shape_axes.remove('b')
            image_shape = tuple(shape[shape_axes.index(axis)]
                                for axis in (0, 1))
            conv2d_axes = list(axes)
            conv2d_axes.remove('s')
            return Conv2DSpace(shape=image_shape,
                               num_channels=shape[shape_axes.index('c')],
                               axes=conv2d_axes,
                               dtype=None)

        conv2d_space = make_conv2d_space(shape, axes)
        self.topo_space = CompositeSpace((conv2d_space, conv2d_space))
        self.storage_space = VectorSpace(dim=numpy.prod(shape))

    def get_formatted_batch(self, batch, space):
        """
        Returns a batch formatted to a space.

        Parameters
        ----------

        batch : ndarray
        The batch to format

        space : a pylearn2.space.Space
        The target space to format to.
        """
        return self.storage_space.np_format_as(batch, space)

    def design_mat_to_topo_view(self, design_mat):
        """
        Called by DenseDesignMatrix.get_formatted_view(), get_batch_topo()

        Parameters
        ----------

        design_mat : ndarray
        """
        return self.storage_space.np_format_as(design_mat, self.topo_space)

    def design_mat_to_weights_view(self, design_mat):
        """
        Called by DenseDesignMatrix.get_weights_view()

        Parameters
        ----------

        design_mat : ndarray
        """
        return self.design_mat_to_topo_view(design_mat)

    def topo_view_to_design_mat(self, topo_batch):
        """
        Used by DenseDesignMatrix.set_topological_view(), .get_design_mat()

        Parameters
        ----------

        topo_batch : ndarray
        """
        return self.topo_space.np_format_as(topo_batch, self.storage_space)

    def view_shape(self):
        """
        TODO: write documentation.
        """
        return self.shape

    def weights_view_shape(self):
        """
        TODO: write documentation.
        """
        return self.view_shape()

    def set_axes(self, axes):
        """
        Change the order of the axes.

        Parameters
        ----------

        axes : tuple
        Must have length 5, must contain 'b', 's', 0, 1, 'c'.
        """
        axes = tuple(axes)

        if len(axes) != 5:
            raise ValueError("Axes must have 5 elements; got %s" % str(axes))

        for required_axis in ('b', 's', 0, 1, 'c'):
            if required_axis not in axes:
                raise ValueError("Axes must contain 'b', 's', 0, 1, and 'c'. "
                                 "Got %s." % str(axes))

        if axes.index('b') != 0:
            raise ValueError("The 'b' axis must come first (axes = %s)." %
                             str(axes))

        def remove_b_axis(axes):
            axes = list(axes)
            axes.remove('b')
            return tuple(axes)

        if hasattr(self, 'axes'):
            # Reorders the shape vector to match the new axis ordering.
            assert hasattr(self, 'shape')
            old_axes = remove_b_axis(self.axes)  # pylint: disable-msg=E0203
            new_axes = remove_b_axis(axes)
            new_shape = tuple(self.shape[old_axes.index(a)] for a in new_axes)
            self.shape = new_shape

        self.axes = axes
Exemple #17
0
class L2SquareHinge(Layer):
    """
    A layer that can apply an affine transformation
    and use a l2 regularized square hinge loss.

    Parameters
    ----------
    n_classes : int
        Number of classes for softmax targets.
    layer_name : string
        Name of Softmax layers.
    irange : float
        If specified, initialized each weight randomly in
        U(-irange, irange).
    istdev : float
        If specified, initialize each weight randomly from
        N(0,istdev).
    sparse_init : int
        If specified, initial sparse_init number of weights
        for each unit from N(0,1).
    W_lr_scale : float
        Scale for weight learning rate.
    b_lr_scale : float
        Scale for bias learning rate.
    max_row_norm : float
        Maximum norm for a row of the weight matrix.
    no_affine : boolean
        If True, softmax nonlinearity is applied directly to
        inputs.
    max_col_norm : float
        Maximum norm for a column of the weight matrix.
    init_bias_target_marginals : dataset
        Take the probability distribution of the targets into account to
        intelligently initialize biases.
    binary_target_dim : int, optional
        If your targets are class labels (i.e. a binary vector) then set the
        number of targets here so that an IndexSpace of the proper dimension
        can be used as the target space. This allows the softmax to compute
        the cost much more quickly than if it needs to convert the targets
        into a VectorSpace.
    """
    def __init__(self,
                 n_classes,
                 layer_name,
                 C=0.1,
                 irange=None,
                 istdev=None,
                 sparse_init=None,
                 W_lr_scale=None,
                 b_lr_scale=None,
                 max_row_norm=None,
                 no_affine=False,
                 max_col_norm=None,
                 init_bias_target_marginals=None,
                 binary_target_dim=None):

        super(L2SquareHinge, self).__init__()

        if isinstance(W_lr_scale, str):
            W_lr_scale = float(W_lr_scale)

        self.__dict__.update(locals())
        del self.self
        del self.init_bias_target_marginals

        assert isinstance(n_classes, py_integer_types)

        if binary_target_dim is not None:
            assert isinstance(binary_target_dim, py_integer_types)
            self._has_binary_target = True
            self._target_space = IndexSpace(dim=binary_target_dim,
                                            max_labels=n_classes)
        else:
            self._has_binary_target = False

        self.output_space = VectorSpace(n_classes)

        self.b = sharedX(np.zeros((n_classes, )), name='hinge_b')
        if init_bias_target_marginals:
            y = init_bias_target_marginals.y
            if init_bias_target_marginals.y_labels is None:
                marginals = y.mean(axis=0)
            else:
                # compute class frequencies
                if np.max(y.shape) != np.prod(y.shape):
                    raise AssertionError("Use of "
                                         "`init_bias_target_marginals` "
                                         "requires that each example has "
                                         "a single label.")
            marginals = np.bincount(y.flat) / float(y.shape[0])

            assert marginals.ndim == 1
            b = pseudoinverse_softmax_numpy(marginals).astype(self.b.dtype)
            assert b.ndim == 1
            assert b.dtype == self.b.dtype
            self.b.set_value(b)
        else:
            assert init_bias_target_marginals is None

    @wraps(Layer.get_lr_scalers)
    def get_lr_scalers(self):

        rval = OrderedDict()
        if self.W_lr_scale is not None:
            assert isinstance(self.W_lr_scale, float)
            rval[self.W] = self.W_lr_scale

        if not hasattr(self, 'b_lr_scale'):
            self.b_lr_scale = None

        if self.b_lr_scale is not None:
            assert isinstance(self.b_lr_scale, float)
            rval[self.b] = self.b_lr_scale

        return rval

    @wraps(Layer.get_monitoring_channels)
    def get_monitoring_channels(self):
        warnings.warn("Layer.get_monitoring_channels is " + \
                    "deprecated. Use get_layer_monitoring_channels " + \
                    "instead. Layer.get_monitoring_channels " + \
                    "will be removed on or after september 24th 2014",
                    stacklevel=2)

        W = self.W
        assert W.ndim == 2
        sq_W = T.sqr(W)
        row_norms = T.sqrt(sq_W.sum(axis=1))
        col_norms = T.sqrt(sq_W.sum(axis=0))
        return OrderedDict([
            ('row_norms_min', row_norms.min()),
            ('row_norms_mean', row_norms.mean()),
            ('row_norms_max', row_norms.max()),
            ('col_norms_min', col_norms.min()),
            ('col_norms_mean', col_norms.mean()),
            ('col_norms_max', col_norms.max()),
        ])

    @wraps(Layer.get_monitoring_channels_from_state)
    def get_monitoring_channels_from_state(self, state, target=None):
        warnings.warn("Layer.get_monitoring_channels_from_state is " + \
                    "deprecated. Use get_layer_monitoring_channels " + \
                    "instead. Layer.get_monitoring_channels_from_state " + \
                    "will be removed on or after september 24th 2014",
                    stacklevel=2)
        # channels that does not require state information
        W = self.W
        assert W.ndim == 2
        sq_W = T.sqr(W)
        row_norms = T.sqrt(sq_W.sum(axis=1))
        col_norms = T.sqrt(sq_W.sum(axis=0))
        rval = OrderedDict([
            ('row_norms_min', row_norms.min()),
            ('row_norms_mean', row_norms.mean()),
            ('row_norms_max', row_norms.max()),
            ('col_norms_min', col_norms.min()),
            ('col_norms_mean', col_norms.mean()),
            ('col_norms_max', col_norms.max()),
        ])

        mx = state.max(axis=1)
        rval.update(
            OrderedDict([('mean_max_class', mx.mean()),
                         ('max_max_class', mx.max()),
                         ('min_max_class', mx.min())]))
        if target is not None:
            y_hat = T.argmax(state, axis=1)
            y = T.argmax(target, axis=1)
            misclass = T.neq(y, y_hat).mean()
            misclass = T.cast(misclass, config.floatX)
            rval['misclass'] = misclass
            rval['nll'] = self.cost(Y_hat=state, Y=target)

        return rval

    @wraps(Layer.get_layer_monitoring_channels)
    def get_layer_monitoring_channels(self,
                                      state_below=None,
                                      state=None,
                                      targets=None):

        # channels that does not require state information
        W = self.W
        assert W.ndim == 2
        sq_W = T.sqr(W)
        row_norms = T.sqrt(sq_W.sum(axis=1))
        col_norms = T.sqrt(sq_W.sum(axis=0))
        rval = OrderedDict([
            ('row_norms_min', row_norms.min()),
            ('row_norms_mean', row_norms.mean()),
            ('row_norms_max', row_norms.max()),
            ('col_norms_min', col_norms.min()),
            ('col_norms_mean', col_norms.mean()),
            ('col_norms_max', col_norms.max()),
        ])

        if (state_below is not None) or (state is not None):
            if state is None:
                state = self.fprop(state_below)
            mx = state.max(axis=1)
            rval.update(
                OrderedDict([('mean_max_class', mx.mean()),
                             ('max_max_class', mx.max()),
                             ('min_max_class', mx.min())]))

            if targets is not None:
                y_hat = T.argmax(state, axis=1)
                y = T.argmax(targets, axis=1)
                misclass = T.neq(y, y_hat).mean()
                misclass = T.cast(misclass, config.floatX)
                rval['misclass'] = misclass
                rval['nll'] = self.cost(Y_hat=state, Y=targets)
        return rval

    @wraps(Layer.set_input_space)
    def set_input_space(self, space):
        self.input_space = space

        if not isinstance(space, Space):
            raise TypeError("Expected Space, got " + str(space) + " of type " +
                            str(type(space)))
        self.input_dim = space.get_total_dimension()
        self.needs_reformat = not isinstance(space, VectorSpace)
        desired_dim = self.input_dim
        self.desired_space = VectorSpace(desired_dim)

        if not self.needs_reformat:
            assert self.desired_space == self.input_space

        rng = self.mlp.rng
        if self.no_affine:
            self._params = []
        else:
            print(self.input_dim, self.n_classes)
            if self.irange is not None:
                assert self.istdev is None
                assert self.sparse_init is None
                W = rng.uniform(-self.irange, self.irange,
                                (self.input_dim, self.n_classes))
            elif self.istdev is not None:
                assert self.sparse_init is None
                W = rng.randn(self.input_dim, self.n_classes) * self.istdev
            else:
                assert self.sparse_init is not None
                W = np.zeros((self.input_dim, self.n_classes))
                for i in xrange(self.n_classes):
                    for j in xrange(self.sparse_init):
                        idx = rng.randint(0, self.input_dim)
                        while W[idx, i] != 0.:
                            idx = rng.randint(0, self.input_dim)
                        W[idx, i] = rng.randn()

            self.W = sharedX(W, 'hinge_W')

            self._params = [self.b, self.W]

    @wraps(Layer.get_weights_topo)
    def get_weights_topo(self):

        if not isinstance(self.input_space, Conv2DSpace):
            raise NotImplementedError()
        desired = self.W.get_value().T
        ipt = self.desired_space.np_format_as(desired, self.input_space)
        rval = Conv2DSpace.convert_numpy(ipt, self.input_space.axes,
                                         ('b', 0, 1, 'c'))
        return rval

    @wraps(Layer.get_weights)
    def get_weights(self):
        if not isinstance(self.input_space, VectorSpace):
            raise NotImplementedError()
        return self.W.get_value()

    @wraps(Layer.set_weights)
    def set_weights(self, weights):
        self.W.set_value(weights)

    @wraps(Layer.set_biases)
    def set_biases(self, biases):
        self.b.set_value(biases)

    @wraps(Layer.get_biases)
    def get_biases(self):
        return self.b.get_value()

    @wraps(Layer.get_weights_format)
    def get_weights_format(self):
        return ('v', 'h')

    @wraps(Layer.fprop)
    def fprop(self, state_below):
        ## Precondition
        self.input_space.validate(state_below)
        if self.needs_reformat:
            state_below = self.input_space.format_as(state_below,
                                                     self.desired_space)
        self.desired_space.validate(state_below)
        assert state_below.ndim == 2
        assert self.W.ndim == 2

        ## Linear prediction
        rval = T.dot(state_below, self.W) + self.b
        return rval

    def hinge_cost(self, Y, Y_hat):
        ### print size of Y_hat

        #Y = Print(message="Y")(Y)
        #Y_hat = Print(message="Y_hat")(Y_hat)

        prob = (self.C * self.W.norm(2) +
                (T.maximum(0, 1 - (Y - Y_hat))**2.)).sum(axis=1)
        #.W = Print(message="W")(self.W)
        #prob = (T.maximum(1 - Y * Y_hat, 0) ** 2.).sum(axis=0)
        #prob = Print(message="prob")(prob)
        return prob

    @wraps(Layer.cost)
    def cost(self, Y, Y_hat):
        return self.hinge_cost(Y, Y_hat).mean()

    # @wraps(Layer.cost_matrix)
    # def cost_matrix(self, Y, Y_hat):
    #     # cost = self._cost(Y, Y_hat)
    #     # if self._has_binary_target:
    #     #     flat_Y = Y.flatten()
    #     #     flat_matrix = T.alloc(0, (Y.shape[0]*cost.shape[1]))
    #     #     flat_indices = flat_Y + T.extra_ops.repeat(
    #     #         T.arange(Y.shape[0])*cost.shape[1], Y.shape[1]
    #     #     )
    #     #     cost = T.set_subtensor(flat_matrix[flat_indices], flat_Y)

    #     # return cost
    #     return None

    @wraps(Layer.get_weight_decay)
    def get_weight_decay(self, coeff):
        if isinstance(coeff, str):
            coeff = float(coeff)
        assert isinstance(coeff, float) or hasattr(coeff, 'dtype')
        return coeff * T.sqr(self.W).sum()

    @wraps(Layer.get_l1_weight_decay)
    def get_l1_weight_decay(self, coeff):
        if isinstance(coeff, str):
            coeff = float(coeff)
        assert isinstance(coeff, float) or hasattr(coeff, 'dtype')
        W = self.W
        return coeff * abs(W).sum()

    @wraps(Layer._modify_updates)
    def _modify_updates(self, updates):
        if self.no_affine:
            return
        if self.max_row_norm is not None:
            W = self.W
            if W in updates:
                updated_W = updates[W]
                row_norms = T.sqrt(T.sum(T.sqr(updated_W), axis=1))
                desired_norms = T.clip(row_norms, 0, self.max_row_norm)
                scales = desired_norms / (1e-7 + row_norms)
                updates[W] = updated_W * scales.dimshuffle(0, 'x')
        if self.max_col_norm is not None:
            assert self.max_row_norm is None
            W = self.W
            if W in updates:
                updated_W = updates[W]
                col_norms = T.sqrt(T.sum(T.sqr(updated_W), axis=0))
                desired_norms = T.clip(col_norms, 0, self.max_col_norm)
                updates[W] = updated_W * (desired_norms / (1e-7 + col_norms))