Exemple #1
def test_isfinite():
    Tests that pylearn2.utils.isfinite correctly
    identifies `np.nan` and `np.inf` values in an array.
    arr = np.random.random(100)
    assert isfinite(arr)
    arr[0] = np.nan
    assert not isfinite(arr)
    arr[0] = np.inf
    assert not isfinite(arr)
    arr[0] = -np.inf
    assert not isfinite(arr)
 def set_H(self, H):
     data_size, num_latent_topics = H.shape
     assert data_size == self._data_size
     assert num_latent_topics == self._num_latent_topics
     if not isfinite(self.H.get_value(borrow=True)):
         raise Exception("NaN or Inf in H")
 def set_W(self, W):
     self._num_latent_topics, data_size = W.shape
     assert data_size == self._data_size
     self.output_space = VectorSpace(self._num_latent_topics)
     if not isfinite(self.W.get_value(borrow=True)):
         raise Exception("NaN or Inf in W")
Exemple #4
    def test_mean_keep_dimensions(self):
        data_set = cifar10.CIFAR10(which_set="train")
        pp = RemoveMean(axis=1)

        data_set.apply_preprocessor(pp, can_fit=True)
        result = data_set.get_design_matrix()

        assert isfinite(result)
    def train_all(self, dataset):
            Train model

            dataset: Pylearn dataset object.

        if not isfinite(self.H.get_value(borrow=True)):
            raise Exception("NaN or Inf in H")

        if not isfinite(self.W.get_value(borrow=True)):
            raise Exception("NaN or Inf in W")

def entropy_binary_vector(P):
    .. todo::

        WRITEME properly

    If P[i,j] represents the probability of some binary random variable X[i,j]
    being 1, then rval[i] gives the entropy of the random vector X[i,:]

    for Pv in get_debug_values(P):
        assert Pv.min() >= 0.0
        assert Pv.max() <= 1.0

    oneMinusP = 1. - P

    PlogP = xlogx(P)
    omPlogOmP = xlogx(oneMinusP)

    term1 = - T.sum(PlogP, axis=1)
    assert len(term1.type.broadcastable) == 1

    term2 = - T.sum(omPlogOmP, axis=1)
    assert len(term2.type.broadcastable) == 1

    rval = term1 + term2

    debug_vals = get_debug_values(PlogP, omPlogOmP, term1, term2, rval)
    for plp, olo, t1, t2, rv in debug_vals:

        debug_assert(not contains_nan(t1))
        debug_assert(not contains_nan(t2))
        debug_assert(not contains_nan(rv))

    return rval
    def test_zero_image(self):
        Test on zero-value image if cause any division by zero

        X = as_floatX(np.zeros((5, 32 * 32 * 3)))

        axes = ['b', 0, 1, 'c']
        view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3),
        dataset = DenseDesignMatrix(X=X, view_converter=view_converter)
        dataset.axes = axes
        preprocessor = LeCunLCN(img_shape=[32, 32])
        result = dataset.get_design_matrix()

        assert isfinite(result)
Exemple #9
    def test_channel(self):
        Test if works fine withe different number of channel as argument

        rng = np.random.RandomState([1, 2, 3])
        X = as_floatX(rng.randn(5, 32 * 32 * 3))

        axes = ['b', 0, 1, 'c']
        view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3),
        dataset = DenseDesignMatrix(X=X, view_converter=view_converter)
        dataset.axes = axes
        preprocessor = LeCunLCN(img_shape=[32, 32], channels=[1, 2])
        result = dataset.get_design_matrix()

        assert isfinite(result)
Exemple #11
Exemple #12
    def test_zero_vector(self):
        """ Test that passing in the zero vector does not result in
            a divide by 0 """

        dataset = DenseDesignMatrix(X=as_floatX(np.zeros((1, 1))))

        # the settings of subtract_mean and use_norm are not relevant to
        # the test
        # std_bias = 0.0 is the only value for which there should be a risk
        # of failure occurring
        preprocessor = GlobalContrastNormalization(subtract_mean=True,


        result = dataset.get_design_matrix()

        assert isfinite(result)
Exemple #14
def test_rgb_yuv():
    Test on a random image if the per-processor loads and works without
    anyerror and doesn't result in any nan or inf values


    rng = np.random.RandomState([1, 2, 3])
    X = as_floatX(rng.randn(5, 32 * 32 * 3))

    axes = ['b', 0, 1, 'c']
    view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3),
    dataset = DenseDesignMatrix(X=X, view_converter=view_converter)
    dataset.axes = axes
    preprocessor = RGB_YUV()
    result = dataset.get_design_matrix()

    assert isfinite(result)
    #variableType = "%d"
    #if outputType != "int":
    #    variableType = "%f"

    #np.savetxt(output_path, y, fmt=variableType)
    #return True

# load the testing set to get the labels
nParticles = 1000
test_data, test_labels = getTestSet(nParticles, 95, 100)
path = os.path.join(pylearn2.__path__[0], 'myStuff', 'nano_particle_1_best.pkl' )
pred_mat = predict(path, test_data)

if not isfinite(pred_mat):
    print 'Not Finite!'

#print test_data
for i in xrange(nParticles):
    for j in xrange(5):
        print test_labels[j,i*3], pred_mat[j,i*3]

from scipy.stats import pearsonr
from sklearn.metrics import r2_score

for i in xrange(nParticles):
    R2 = r2_score(test_labels[:,i], pred_mat[:,i])
    pearR, pvalue2 = pearsonr(test_labels[:,i], pred_mat[:,i])
    print 'Particle %d:\tR^2:%.3f\tPearson R:%.3f'%(i,R2, pearR)
Exemple #17
    def train(self, dataset):
        Runs one epoch of SGD training on the specified dataset.

        dataset : Dataset
        if not hasattr(self, 'sgd_update'):
            raise Exception("train called without first calling setup")

        # Make sure none of the parameters have bad values
        for param in self.params:
            value = param.get_value(borrow=True)
            if not isfinite(value):
                raise Exception("NaN in " + param.name)

        self.first = False
        rng = self.rng
        if not is_stochastic(self.train_iteration_mode):
            rng = None

        data_specs = self.cost.get_data_specs(self.model)

        # The iterator should be built from flat data specs, so it returns
        # flat, non-redundent tuples of data.
        mapping = DataSpecsMapping(data_specs)
        space_tuple = mapping.flatten(data_specs[0], return_tuple=True)

        # print 'space tuple', type(space_tuple), space_tuple
        from pylearn2.space import VectorSpace


        # we have 3 classes in dataset (active, inactive, middle), but only two softmax neurons
        # therefore VectorSpace has dim = 2 and an error will be raised when trying to convert
        # label to a vector of length 2. So we change the vector length for a while and convert
        # things manually.
        space_tuple = (space_tuple[0], VectorSpace(dim=3))

        # # #  END OF CHANGINGS # # #

        source_tuple = mapping.flatten(data_specs[1], return_tuple=True)
        if len(space_tuple) == 0:
            # No data will be returned by the iterator, and it is impossible
            # to know the size of the actual batch.
            # It is not decided yet what the right thing to do should be.
            raise NotImplementedError(
                "Unable to train with SGD, because "
                "the cost does not actually use data from the data set. "
                "data_specs: %s" % str(data_specs))
        flat_data_specs = (CompositeSpace(space_tuple), source_tuple)

        iterator = dataset.iterator(mode=self.train_iteration_mode,
                                    return_tuple=True, rng=rng,

        # print 'flat data specs', type(flat_data_specs), flat_data_specs
        # flat data specs <type 'tuple'>
        # (CompositeSpace(Conv2DSpace(shape=(18, 3492), num_channels=1, axes=('c', 0, 1, 'b'), dtype=float64),
        #                             VectorSpace(dim=2, dtype=float64)),
        #                 'features', 'targets'))

        on_load_batch = self.on_load_batch
        for batch in iterator:
            # batch is a list with two numpy arrays: [sample, label]
            # self.params is a list with theano.tensor.sharedvar.TensorSharedVariables
            # theano.tensor.sharedvar.TensorSharedVariable.get_value() returns numpy.array
            # you can set value with theano.tensor.sharedvar.TensorSharedVariable.set_value(np.array_object)

            # this being here might cause troubles as batch is a nasty thing right now
            for callback in on_load_batch:


            self.print_params("on entering iteration", t.cyan)

            # GOOD ADVICE: if something is very wrong check it the following map is valid
            # TODO: check this
            # active     1    [[ 0. 1. 0. ]]    [[ 0. 1. ]]
            # nonactive  0    [[ 1. 0. 0. ]]    [[ 1. 0. ]]
            # middle    -1    [[ 0. 0. 1. ]]

            batch_1_on_load = batch[1].copy()

            # if label was '0'
            if (batch[1] == np.array((1, 0, 0))).all():
                # print "example: nonactive"
                batch = (batch[0], np.reshape(np.array((1, 0)), (1, 2)))
            # if label was '1'
            elif (batch[1] == np.array((0, 1, 0))).all():
                # print "example: active"
                batch = (batch[0], np.reshape(np.array((0, 1)), (1, 2)))
            # else we have to deal with unlabeled example
                # print "example: middle"
                parameters_on_load = self.get_parameters()

                # # # RUNNING AS INACTIVE SAMPLE # # #
                # print 'running as inactive'
                # setting label as inactive
                batch = (batch[0], np.reshape(np.array((1, 0)), (1, 2)))
                self.print_params("on entering inactive", t.blue)
                # updating the model
                self.print_params("after update inactive", t.green)
                # remember changing in parameters
                params_after_inactive = self.get_parameters()
                diff_inactive = self.get_difference(params_after_inactive, parameters_on_load)
                self.print_dict_of_params(diff_inactive, "difference")
                # bring back on load parameters
                self.print_params('after restore', t.yellow)
                # # # RUNNING AS ACTIVE SAMPLE # # #
                # print 'running as active'
                # setting label as active
                batch = (batch[0], np.reshape(np.array((0, 1)), (1, 2)))
                self.print_params('on entering active', t.blue)
                # updating the model
                self.print_params('after update active', t.green)
                # remember changing in parameters
                params_after_active = self.get_parameters()
                diff_active = self.get_difference(params_after_active, parameters_on_load)
                self.print_dict_of_params(diff_active, "difference")
                # bring back on load parameters
                self.print_params('after restore', t.yellow)
                # # # UPDATING THE MODEL # # #
                update_vector = self.calculate_update(diff_active, diff_inactive)
                self.print_dict_of_params(update_vector, "update vector")

            # end of if

            self.print_params('on leaving', t.red)

            # iterator might return a smaller batch if dataset size
            # isn't divisible by batch_size
            # Note: if data_specs[0] is a NullSpace, there is no way to know
            # how many examples would actually have been in the batch,
            # since it was empty, so actual_batch_size would be reported as 0.

            # OK, now lines below need batch in the previous size. So I just set the batch to what is used to be
            # before my wicked transformations.
            batch = (batch[0], batch_1_on_load)


            # # #  END OF CHANGINGS # # #
            actual_batch_size = flat_data_specs[0].np_batch_size(batch)
            for callback in self.update_callbacks:

        # Make sure none of the parameters have bad values
        for param in self.params:
            value = param.get_value(borrow=True)
            if not isfinite(value):
                raise Exception("NaN in " + param.name)
        self.second = True
Exemple #18
def test_wiskott():
    """loads wiskott dataset"""
    data = Wiskott()
    assert isfinite(data.X)
Exemple #19
for path in paths:
    if j % 100 == 0:
        print j
        raw_path = path
        path = base + '/' + path
        img = image.load(path)

        if len(img.shape) == 3 and img.shape[2] == 4:
            img = img[:, :, 0:3]
        img = img.reshape(*([1] + list(img.shape))).astype('float32')
        channels = [f(img[:, :, :, i:i + 1]) for i in xrange(img.shape[3])]
        if len(channels) != 3:
            assert len(channels) == 1
            channels = [channels[0]] * 3
        img = np.concatenate(channels, axis=3)
        img = img[0, :, :, :]

        assert isfinite(img)

        path = outdir + '/' + raw_path
        path = path[0:-3]
        assert path.endswith('.')
        path = path + 'npy'
        np.save(path, img)
    except Exception, e:
        print e
    j += 1
Exemple #20
    def train(self, dataset):
        Runs one epoch of SGD training on the specified dataset.

        dataset : Dataset
        if not hasattr(self, 'sgd_update'):
            raise Exception("train called without first calling setup")

        # Make sure none of the parameters have bad values
        for param in self.params:
            value = param.get_value(borrow=True)
            if not isfinite(value):
                raise Exception("NaN in " + param.name)

        self.first = False
        rng = self.rng
        if not is_stochastic(self.train_iteration_mode):
            rng = None

        data_specs = self.cost.get_data_specs(self.model)

        # The iterator should be built from flat data specs, so it returns
        # flat, non-redundent tuples of data.
        mapping = DataSpecsMapping(data_specs)
        space_tuple = mapping.flatten(data_specs[0], return_tuple=True)
        source_tuple = mapping.flatten(data_specs[1], return_tuple=True)
        if len(space_tuple) == 0:
            # No data will be returned by the iterator, and it is impossible
            # to know the size of the actual batch.
            # It is not decided yet what the right thing to do should be.
            raise NotImplementedError("Unable to train with SGD, because "
                    "the cost does not actually use data from the data set. "
                    "data_specs: %s" % str(data_specs))
        flat_data_specs = (CompositeSpace(space_tuple), source_tuple)

        iterator = dataset.iterator(mode=self.train_iteration_mode,
                data_specs=flat_data_specs, return_tuple=True,
                rng = rng, num_batches = self.batches_per_iter)

        on_load_batch = self.on_load_batch
        for batch in iterator:
            for callback in on_load_batch:
            # iterator might return a smaller batch if dataset size
            # isn't divisible by batch_size
            # Note: if data_specs[0] is a NullSpace, there is no way to know
            # how many examples would actually have been in the batch,
            # since it was empty, so actual_batch_size would be reported as 0.
            actual_batch_size = flat_data_specs[0].np_batch_size(batch)
            for callback in self.update_callbacks:

        # Make sure none of the parameters have bad values
        for param in self.params:
            value = param.get_value(borrow=True)
            if not isfinite(value):
                raise Exception("NaN in " + param.name)
Exemple #21
    def add_patch(self, patch, rescale=True, recenter=True, activation=None,
                  warn_blank_patch = True):
        Adds an image patch to the `PatchViewer`.

        Patches are added left to right, top to bottom. If this method is
        called when the `PatchViewer` is already full, it will clear the
        viewer and start adding patches at the upper left again.

        patch : ndarray
            If this `PatchViewer` is in color (controlled by the `is_color`
            parameter of the constructor) `patch` should be a 3D ndarray, with
            the first axis being the rows of the image, the second axis
            being the columsn of the image, and the third being RGB color
            If this `PatchViewer` is grayscale, `patch` should be either a
            3D ndarray with the third axis having length 1, or a 2D ndarray.

            The values of the ndarray should be floating point. 0 is displayed
            as gray. Negative numbers are displayed as blacker. Positive
            numbers are displayed as whiter. See the `rescale` parameter for
            more detail. This color convention was chosen because it is useful
            for displaying weight matrices.
        rescale : bool
            If True, the maximum absolute value of a pixel in `patch` sets the
            scale, so that abs(patch).max() is absolute white and
            -abs(patch).max() is absolute black.
            If False, `patch` should lie in [-1, 1].
        recenter : bool
            If True (default), if `patch` has smaller dimensions than were
            specified to the constructor's `patch_shape` argument, we will
            display the patch in the center of the area allocated to it in
            the display grid.
            If False, we will raise an exception if `patch` is not exactly
            the specified shape.
        activation : WRITEME
        warn_blank_patch : WRITEME
        if warn_blank_patch and \
               (patch.min() == patch.max()) and \
               (rescale or patch.min() == 0.0):
            warnings.warn("displaying totally blank patch")

        if self.is_color:
            assert patch.ndim == 3
            if not (patch.shape[-1] == 3):
                raise ValueError("Expected color image to have shape[-1]=3, "
                                 "but shape[-1] is " + str(patch.shape[-1]))
            assert patch.ndim in [2, 3]
            if patch.ndim == 3:
                if patch.shape[-1] != 1:
                    raise ValueError("Expected 2D patch or 3D patch with 1 "
                                     "channel, but got patch with shape " + \

        if recenter:
            assert patch.shape[0] <= self.patch_shape[0]
            if patch.shape[1] > self.patch_shape[1]:
                raise ValueError("Given patch of width %d but only patches up"
                                 " to width %d fit" \
                                 % (patch.shape[1], self.patch_shape[1]))
            rs_pad = (self.patch_shape[0] - patch.shape[0]) / 2
            re_pad = self.patch_shape[0] - rs_pad - patch.shape[0]
            cs_pad = (self.patch_shape[1] - patch.shape[1]) / 2
            ce_pad = self.patch_shape[1] - cs_pad - patch.shape[1]
            if patch.shape[0:2] != self.patch_shape:
                raise ValueError('Expected patch with shape %s, got %s' %
                                 (str(self.patch_shape), str(patch.shape)))
            rs_pad = 0
            re_pad = 0
            cs_pad = 0
            ce_pad = 0

        temp = patch.copy()

        assert isfinite(temp)

        if rescale:
            scale = np.abs(temp).max()
            if scale > 0:
                temp /= scale
            if temp.min() < -1.0 or temp.max() > 1.0:
                raise ValueError('When rescale is set to False, pixel values '
                                 'must lie in [-1,1]. Got [%f, %f].'
                                 % (temp.min(), temp.max()))
        temp *= 0.5
        temp += 0.5

        assert temp.min() >= 0.0
        assert temp.max() <= 1.0

        if self.cur_pos == (0, 0):

        rs = self.pad[0] + (self.cur_pos[0] *
                            (self.patch_shape[0] + self.pad[0]))
        re = rs + self.patch_shape[0]

        assert self.cur_pos[1] <= self.grid_shape[1]
        cs = self.pad[1] + (self.cur_pos[1] *
                            (self.patch_shape[1] + self.pad[1]))
        ce = cs + self.patch_shape[1]

        assert ce <= self.image.shape[1], (ce, self.image.shape[1])

        temp *= (temp > 0)

        if len(temp.shape) == 2:
            temp = temp[:, :, np.newaxis]

        assert ce-ce_pad <= self.image.shape[1]
        self.image[rs + rs_pad:re - re_pad, cs + cs_pad:ce - ce_pad, :] = temp

        if activation is not None:
            if (not isinstance(activation, tuple) and
               not isinstance(activation, list)):
                activation = (activation,)

            for shell, amt in enumerate(activation):
                assert 2 * shell + 2 < self.pad[0]
                assert 2 * shell + 2 < self.pad[1]
                if amt >= 0:
                    act = amt * np.asarray(self.colors[shell])
                    self.image[rs + rs_pad - shell - 1,
                               cs + cs_pad - shell - 1:
                               ce - ce_pad + 1 + shell,
                               :] = act
                    self.image[re - re_pad + shell,
                               cs + cs_pad - 1 - shell:
                               ce - ce_pad + 1 + shell,
                               :] = act
                    self.image[rs + rs_pad - 1 - shell:
                               re - re_pad + 1 + shell,
                               cs + cs_pad - 1 - shell,
                               :] = act
                    self.image[rs + rs_pad - shell - 1:
                               re - re_pad + shell + 1,
                               ce - ce_pad + shell,
                               :] = act

        self.cur_pos = (self.cur_pos[0], self.cur_pos[1] + 1)
        if self.cur_pos[1] == self.grid_shape[1]:
            self.cur_pos = (self.cur_pos[0] + 1, 0)
            if self.cur_pos[0] == self.grid_shape[0]:
                self.cur_pos = (0, 0)
    def __call__(self, X, termination_criterion, initial_H=None):
            Compute for each sample its representation.

            X : Sample matrix. numpy.ndarray
            termination_criterion: pylearn TerminationCriterion object
            initial_H: Numpy matrix.

            H: H matrix with the representation.

        dataset_size = X.shape[0]

        H = None
        if initial_H is not None:
            if H.shape[0] == dataset_size and H.shape[1] == self._num_latent_topics:
                H = initial_H

        if H is None:
            if not hasattr(self, "predict_clusters"):
                h = tensor.matrix(name="h")
                x = tensor.matrix(name="x")
                kxb = self._kernel(x, self._budget)
                a = (self.W * tensor.dot(self.W, self._kernel_matrix)).sum(axis=1) \
                    - 2.0 * tensor.dot(kxb, self.W.T)
                b = tensor.argmin(a, axis=1)
                self.predict_clusters = function([x], b)

            H = .2 * numpy.ones((self._data_size, self._num_latent_topics)).astype(self.W.dtype)
            clusters = self.predict_clusters(X)
            for i, cluster in enumerate(clusters):
                H[i, cluster] += 1.0

        if not hasattr(self, "predict_representation"):
            h = tensor.matrix(name="h")
            x = tensor.matrix(name="x")
            kxb = self._kernel(x, self._budget)
            kxbp = 0.5 * (numpy.abs(kxb) + kxb)
            kxbn = 0.5 * (numpy.abs(kxb) - kxb)
            a = tensor.dot(h, tensor.dot(self.W, self.kbn))
            b = tensor.dot(kxbp + a, self.W.T)
            c = tensor.dot(h, tensor.dot(self.W, self.kbp))
            d = tensor.dot(kxbn + c, self.W.T)
            e = h * tensor.sqrt(b / (d + self.lambda_vals))
            f = tensor.maximum(e, eps)
            self.predict_representation = function([x, h], f)

        keep_training = True
        if not isfinite(H):
            raise Exception("NaN or Inf in H")

        while keep_training:
            H = self.predict_representation(X, H)
            if not isfinite(H):
                raise Exception("NaN or Inf in H")
            keep_training = termination_criterion.continue_learning(self)

        return H
    def __init__(self, kernel, data, W,
                 lambda_vals=.0, H=None,
                 termination_criterion=None, kernel_matrix=None):
            Convex non-negative matrix factorization.
            This model compute the CNMF factorization of a dataset.

            kernel: Object that is going to compute the kernel between vectors.
                The object must follow the interface in kernel_two_kay_MF.kernels.
            data: Numpy matrix.
            W: Numpy matrix.
            lambda_vals: Regularization to avoid division by zero.
            H: Numpy matrix.
            termination_criterion: instance of \
                pylearn2.termination_criteria.TerminationCriterion, optional
            kernel_matrix: Numpy matrix. Represents dot product in the feature space of the data.
                If this matrix is not provided, it is going to be computed.


        self._kernel = kernel

        self._data = data
        if not isfinite(self._data):
            raise Exception("NaN or Inf in data")

        if kernel_matrix is not None:
            assert kernel_matrix.shape[0] == self._data.shape[0]
            self._kernel_matrix = kernel_matrix
            if not isfinite(self._kernel_matrix):
                raise Exception("NaN or Inf in kernel_matrix")

        self.W = W
        if not isfinite(self.W):
            raise Exception("NaN or Inf in W")

        assert self.W.shape[1] == self._data.shape[0]

        self._data_size, self._num_features = self._data.shape
        self._num_latent_topics, _ = self.W.shape

        self.W = sharedX(self.W, name="W", borrow=True)

        if H is not None:
            if H.shape[1] != self._num_latent_topics or H.shape[0] != self._data_size:
                self.H = sharedX(
                if not isfinite(H):
                    raise Exception("NaN or Inf in H")
                    self.H = sharedX(H, name="H", borrow=True)
            self.H = sharedX(
                numpy.random.rand(self._data_size, self._num_latent_topics).astype(self.W.dtype),
                name="H", borrow=True)

        self._params = [self.W, self.H]

        self.input_space = VectorSpace(self._num_features)
        self.output_space = VectorSpace(self._num_latent_topics)

        self.lambda_vals = lambda_vals


        self.termination_criterion = termination_criterion
Exemple #24
