コード例 #1
0
ファイル: convRBM.py プロジェクト: colinsongf/convRBM
    def score_samples(self, v):
        """Compute the pseudo-likelihood of v.

        Parameters
        ----------
        v : {array-like, sparse matrix} shape (n_samples, n_features)
            Values of the visible layer.

        Returns
        -------
        pseudo_likelihood : array-like, shape (n_samples,)
            Value of the pseudo-likelihood (proxy to likelihood).
        print update"""
        rng = check_random_state(self.random_state)
        fe = self._free_energy(v)

        if issparse(v):
            v_ = v.toarray()
        else:
            v_ = v.copy()
        i_ = rng.randint(0, v.shape[1], v.shape[0])
        v_[np.arange(v.shape[0]), i_] = 1 - v_[np.arange(v.shape[0]), i_]
        fe_ = self._free_energy(v_)
        #print fe_
        #print fe
        return v.shape[1] * logistic_sigmoid(fe_ - fe, log=True)
コード例 #2
0
ファイル: rbm.py プロジェクト: Mittens2/PT-for-RBMs
    def score_samples(self, X):
        """Compute the pseudo-likelihood of X.
        Parameters
        ----------
        X : {array-like, sparse matrix} shape (n_samples, n_features)
            Values of the visible layer. Must be all-boolean (not checked).
        Returns
        -------
        pseudo_likelihood : array-like, shape (n_temperatures, n_samples,)
            Value of the pseudo-likelihood (proxy for likelihood).
        Notes
        -----
        This method is not deterministic: it computes a quantity called the
        free energy on X, then on a randomly corrupted version of X, and
        returns the log of the logistic function of the difference.
        """
        check_is_fitted(self, "components_")

        v = check_array(X, accept_sparse='csr')
        rng = check_random_state(self.random_state)

        # Randomly corrupt one feature in each sample in v.
        ind = (np.arange(v.shape[0]), rng.randint(0, v.shape[1], v.shape[0]))
        if issparse(v):
            data = -2 * v[ind] + 1
            v_ = v + sp.csr_matrix((data.A.ravel(), ind), shape=v.shape)
        else:
            v_ = v.copy()
            v_[ind] = 1 - v_[ind]

        fe = self._free_energy(v, 0)
        fe_ = self._free_energy(v_, 0)
        return v.shape[1] * log_logistic(fe_ - fe)
コード例 #3
0
    def init_weights(self, X):
        """ If the user specifies the training dataset, it can be useful to                                                                                   
        initialize the visibile biases according to the empirical expected                                                                                
        feature values of the training data.                                                                                                              

        TODO: Generalize this biasing. Currently, the biasing is only written for                                                                         
               the case of binary RBMs.
        """
        #
        eps = self.thresh

        # Mean across  samples
        if issparse(X):
            probVis = sp.csr_matrix.mean(X, axis=0)
        else:
            probVis = np.mean(X, axis=0)

        # safe for CSR / sparse mats ?
        # do we need it if we use softmax ?
        probVis[probVis < eps] = eps  # Some regularization (avoid Inf/NaN)
        #probVis[probVis < (1.0-eps)] = (1.0-eps)
        self.v_bias = np.log(probVis /
                             (1.0 - probVis))  # Biasing as the log-proportion

        # (does not work)
        # self.v_bias = softmax(probVis)

        # initialize arrays to 0
        self.W = np.asarray(self.random_state.normal(
            0, self.sigma, (self.n_components, X.shape[1])),
                            order='fortran')

        self.dW_prev = np.zeros_like(self.W)
        self.W2 = self.W * self.W
        return 0
コード例 #4
0
ファイル: convRBM.py プロジェクト: jiajunshen/convRBM
    def score_samples(self, v):
        """Compute the pseudo-likelihood of v.

        Parameters
        ----------
        v : {array-like, sparse matrix} shape (n_samples, n_features)
            Values of the visible layer.

        Returns
        -------
        pseudo_likelihood : array-like, shape (n_samples,)
            Value of the pseudo-likelihood (proxy to likelihood).
        print update"""
        rng = check_random_state(self.random_state)
        fe = self._free_energy(v)

        if issparse(v):
            v_ = v.toarray()
        else:
            v_ = v.copy()
        i_ = rng.randint(0, v.shape[1], v.shape[0])
        v_[np.arange(v.shape[0]), i_] = 1 - v_[np.arange(v.shape[0]), i_]
        fe_ = self._free_energy(v_)
	#print fe_
	#print fe
        return v.shape[1] * logistic_sigmoid(fe_ - fe, log=True)
コード例 #5
0
 def decode(self, vec, pretty=False, strict=True):
     # TODO: Whether we should use 'strict' mode depends on whether the model
     # we got this vector from does softmax sampling of visibles. Anywhere this
     # is called on fantasy samples, we should use the model to set this param.
     if issparse(vec):
         vec = vec.toarray().reshape(-1)
     assert vec.shape == (self.nchars * self.maxlen, )
     chars = []
     for position_index in range(self.maxlen):
         # Hack - insert a tab between name parts in binomial mode
         if isinstance(self, BinomialShortTextCodec
                       ) and pretty and position_index == self.maxlen / 2:
             chars.append('\t')
         subarr = vec[position_index * self.nchars:(position_index + 1) *
                      self.nchars]
         if np.count_nonzero(subarr) != 1 and strict:
             char = self.MYSTERY
         else:
             char_index = np.argmax(subarr)
             char = self.alphabet[char_index]
             if pretty and char == self.FILLER:
                 # Hack
                 char = ' ' if isinstance(self,
                                          BinomialShortTextCodec) else ''
         chars.append(char)
     return ''.join(chars)
コード例 #6
0
 def corrupt(self, v):
     # Randomly corrupt one feature in each sample in v.
     ind = (np.arange(v.shape[0]),
            self.rng_.randint(0, v.shape[1], v.shape[0]))
     if issparse(v):
         data = -2 * v[ind] + 1
         v_ = v + sp.csr_matrix((data.A.ravel(), ind), shape=v.shape)
     else:
         v_ = v.copy()
         v_[ind] = 1 - v_[ind]
     return v_, None
コード例 #7
0
ファイル: RBM.py プロジェクト: CalculatedContent/char-rbm
 def corrupt(self, v):
     # Randomly corrupt one feature in each sample in v.
     ind = (np.arange(v.shape[0]),
            self.rng_.randint(0, v.shape[1], v.shape[0]))
     if issparse(v):
         data = -2 * v[ind] + 1
         v_ = v + sp.csr_matrix((data.A.ravel(), ind), shape=v.shape)
     else:
         v_ = v.copy()
         v_[ind] = 1 - v_[ind]
     return v_, None
コード例 #8
0
 def _corrupt_data(self, v):
     self.random_state = check_random_state(self.random_state)
     """Randomly corrupt one feature in each sample in v."""
     ind = (np.arange(v.shape[0]),
            self.random_state.randint(0, v.shape[1], v.shape[0]))
     if issparse(v):
         data = -2 * v[ind] + 1
         v_ = v + sp.csr_matrix((data.A.ravel(), ind), shape=v.shape)
     else:
         v_ = v.copy()
         v_[ind] = 1 - v_[ind]
     return v, v_
コード例 #9
0
    def fit(self, X, y=None, sample_weight=None):
        X = check_array(X, accept_sparse=['csc'], ensure_2d=False)
        if issparse(X):
            # Pre-sort indices to avoid that each individual tree of the
            # ensemble sorts the indices.
            X.sort_indices()

        rnd = check_random_state(self.random_state)
        y = rnd.uniform(size=X.shape[0])
        super(RandomTreesEmbeddingUnsupervised,
              self).fit(X, y, sample_weight=sample_weight)
        return self
コード例 #10
0
    def fit(self, X, y=None, sample_weight=None):
        X = check_array(X, accept_sparse=['csc'], ensure_2d=False)
        if issparse(X):
            # Pre-sort indices to avoid that each individual tree of the
            # ensemble sorts the indices.
            X.sort_indices()

        rnd = check_random_state(self.random_state)
        y = rnd.uniform(size=X.shape[0])
        super(RandomTreesEmbeddingUnsupervised,
              self).fit(X, y, sample_weight=sample_weight)
        return self
コード例 #11
0
    def fit_transform(self, X, y=None, sample_weight=None):
        X = check_array(X, accept_sparse=['csc'], ensure_2d=False)
        if issparse(X):
            # Pre-sort indices to avoid that each individual tree of the
            # ensemble sorts the indices.
            X.sort_indices()

        self.fit(X, y, sample_weight=sample_weight)
        coding = self.apply(X)

        if self.use_one_hot:
            self.one_hot_encoder_ = OneHotEncoder(sparse=self.sparse_output)
            coding = self.one_hot_encoder_.fit_transform(coding)

        return coding
コード例 #12
0
    def mh_update(self, v, h):
        """update TAP hidden magnetizations, to second order"""
        a = safe_sparse_dot(v, self.W.T) + self.h_bias

        v_fluc = (v - (np.multiply(v, v)))
        #a += (v-v*v).dot((self.W2).T)*(0.5-h)

        if issparse(h):
            h_half = (0.5 - h.to_dense())
        else:
            h_half = (0.5 - h)

        a += np.multiply(safe_sparse_dot(v_fluc, self.W2.T), h_half)

        return expit(a, out=a)
コード例 #13
0
    def fit_transform(self, X, y=None, sample_weight=None):
        X = check_array(X, accept_sparse=['csc'], ensure_2d=False)
        if issparse(X):
            # Pre-sort indices to avoid that each individual tree of the
            # ensemble sorts the indices.
            X.sort_indices()

        self.fit(X, y, sample_weight=sample_weight)
        coding = self.apply(X)

        if self.use_one_hot:
            self.one_hot_encoder_ = OneHotEncoder(sparse=self.sparse_output)
            coding = self.one_hot_encoder_.fit_transform(coding)

        return coding
コード例 #14
0
    def mv_update(self, v, h):
        """update TAP visbile magnetizations, to second order"""

        # a = np.dot(h, self.W) + self.v_bias
        a = safe_sparse_dot(h, self.W) + self.v_bias

        h_fluc = h - np.multiply(h, h)
        #a += h_fluc.dot(self.W2)*(0.5-v)

        # 0.5-v is elementwise => dense
        if issparse(v):
            v_half = (0.5 - v.todense())
        else:
            v_half = (0.5 - v)

        a += np.multiply(safe_sparse_dot(h_fluc, self.W2), v_half)
        return expit(a, out=a)
コード例 #15
0
def expanded_X_y_sample_weights(X,
                                y_proba,
                                expand_factor=10,
                                sample_weight=None,
                                shuffle=True,
                                random_state=None):
    """
    scikit-learn can't optimize cross-entropy directly if target
    probability values are not indicator vectors.
    As a workaround this function expands the dataset according to
    target probabilities. ``expand_factor=None`` means no dataset
    expansion.
    """
    rng = check_random_state(random_state)
    if expand_factor:
        if sample_weight is not None:
            X, y, sample_weight = zip(
                *expand_dataset(X,
                                y_proba,
                                factor=expand_factor,
                                random_state=rng,
                                extra_arrays=[sample_weight]))
        else:
            X, y = zip(*expand_dataset(
                X, y_proba, factor=expand_factor, random_state=rng))
    else:
        y = y_proba.argmax(axis=1)

    if isinstance(X, (list, tuple)) and len(X) and issparse(X[0]):
        X = vstack(X)

    if shuffle:
        if sample_weight is not None:
            X, y, sample_weight = _shuffle(X,
                                           y,
                                           sample_weight,
                                           random_state=rng)
        else:
            X, y = _shuffle(X, y, random_state=rng)
    return X, y, sample_weight
コード例 #16
0
 def decode(self, vec, pretty=False, strict=True):
     # TODO: Whether we should use 'strict' mode depends on whether the model
     # we got this vector from does softmax sampling of visibles. Anywhere this
     # is called on fantasy samples, we should use the model to set this param.
     if issparse(vec):
         vec = vec.toarray().reshape(-1)
     assert vec.shape == (self.nchars * self.maxlen,)
     chars = []
     for position_index in range(self.maxlen):
         # Hack - insert a tab between name parts in binomial mode
         if isinstance(self, BinomialShortTextCodec) and pretty and position_index == self.maxlen/2:
             chars.append('\t')
         subarr = vec[position_index * self.nchars:(position_index + 1) * self.nchars]
         if np.count_nonzero(subarr) != 1 and strict:
             char = self.MYSTERY
         else:
             char_index = np.argmax(subarr)
             char = self.alphabet[char_index]
             if pretty and char == self.FILLER:
                 # Hack
                 char = ' ' if isinstance(self, BinomialShortTextCodec) else ''
         chars.append(char)
     return ''.join(chars)
コード例 #17
0
    def score_samples(self, X):
        """Compute the pseudo-likelihood of X.

        Parameters
        ----------
        X : {array-like, sparse matrix} shape (n_samples, n_features)
            Values of the visible layer. Must be all-boolean (not checked).

        Returns
        -------
        pseudo_likelihood : array-like, shape (n_samples,)
            Value of the pseudo-likelihood (proxy for likelihood).

        Notes
        -----
        This method is not deterministic: it computes a quantity called the
        free energy on X, then on a randomly corrupted version of X, and
        returns the log of the logistic function of the difference.
        """
        check_is_fitted(self, "components_")

        v = check_array(X, accept_sparse='csr')
        rng = check_random_state(self.random_state)

        # Randomly corrupt one feature in each sample in v.
        ind = (np.arange(v.shape[0]),
               rng.randint(0, v.shape[1], v.shape[0]))
        if issparse(v):
            data = -2 * v[ind] + 1
            v_ = v + sp.csr_matrix((data.A.ravel(), ind), shape=v.shape)
        else:
            v_ = v.copy()
            v_[ind] = 1 - v_[ind]

        fe = self._free_energy(v)
        fe_ = self._free_energy(v_)
        return v.shape[1] * log_logistic(fe_ - fe)
コード例 #18
0
    def fit(self, X, y, sample_weight=None, sample_var=None):
        """
        Fit the forest.

        Parameters
        ----------
        X : ndarray or scipy.sparse matrix, (n_samples, n_features)
            Input data.

        y : array, shape (n_samples, n_outputs)
            Target. Will be cast to X's dtype if necessary

        sample_weight : numpy array of shape [n_samples]
            Individual weights for each sample. Weights will not be normalized. The weighted square loss
            will be minimized by the forest.

        sample_var : numpy array of shape [n_samples, n_outputs]
            Variance of composite samples (not used here. Exists for API compatibility)

        Returns
        -------
        self
        """

        # Validate or convert input data
        X = check_array(X, accept_sparse="csc", dtype=DTYPE)
        y = check_array(y, accept_sparse='csc', ensure_2d=False, dtype=None)
        if sample_weight is not None:
            sample_weight = check_array(sample_weight, ensure_2d=False)
        if issparse(X):
            # Pre-sort indices to avoid that each individual tree of the
            # ensemble sorts the indices.
            X.sort_indices()

        # Remap output
        self.n_features_ = X.shape[1]

        y = np.atleast_1d(y)
        if y.ndim == 2 and y.shape[1] == 1:
            warn(
                "A column-vector y was passed when a 1d array was"
                " expected. Please change the shape of y to "
                "(n_samples,), for example using ravel().",
                DataConversionWarning,
                stacklevel=2)

        if y.ndim == 1:
            # reshape is necessary to preserve the data contiguity against vs
            # [:, np.newaxis] that does not.
            y = np.reshape(y, (-1, 1))

        self.n_outputs_ = y.shape[1]

        y, expanded_class_weight = self._validate_y_class_weight(y)

        if getattr(y, "dtype", None) != DOUBLE or not y.flags.contiguous:
            y = np.ascontiguousarray(y, dtype=DOUBLE)

        if expanded_class_weight is not None:
            if sample_weight is not None:
                sample_weight = sample_weight * expanded_class_weight
            else:
                sample_weight = expanded_class_weight

        if self.subsample_fr == 'auto':
            self.subsample_fr_ = (X.shape[0] /
                                  2)**(1 - 1 /
                                       (2 * X.shape[1] + 2)) / (X.shape[0] / 2)
        else:
            self.subsample_fr_ = self.subsample_fr

        # Check parameters
        self._validate_estimator()

        random_state = check_random_state(self.random_state)

        if not self.warm_start or not hasattr(self, "estimators_"):
            # Free allocated memory, if any
            self.estimators_ = []

        n_more_estimators = self.n_estimators - len(self.estimators_)

        if n_more_estimators < 0:
            raise ValueError('n_estimators=%d must be larger or equal to '
                             'len(estimators_)=%d when warm_start==True' %
                             (self.n_estimators, len(self.estimators_)))

        elif n_more_estimators == 0:
            warn("Warm-start fitting without increasing n_estimators does not "
                 "fit new trees.")
        else:
            if self.warm_start and len(self.estimators_) > 0:
                # We draw from the random state to get the random state we
                # would have got if we hadn't used a warm_start.
                random_state.randint(MAX_INT, size=len(self.estimators_))

            trees = [
                self._make_estimator(append=False, random_state=random_state)
                for i in range(n_more_estimators)
            ]

            # Parallel loop: we prefer the threading backend as the Cython code
            # for fitting the trees is internally releasing the Python GIL
            # making threading more efficient than multiprocessing in
            # that case. However, for joblib 0.12+ we respect any
            # parallel_backend contexts set at a higher level,
            # since correctness does not rely on using threads.
            self.n_slices = int(np.ceil((self.n_estimators)**(1 / 2)))
            self.slice_len = int(np.ceil(self.n_estimators / self.n_slices))
            s_inds = []
            # TODO. This slicing should ultimately be done inside the parallel function
            # so that we don't need to create a matrix of size roughly n_samples * n_estimators
            for it in range(self.n_slices):
                half_sample_inds = np.random.choice(X.shape[0],
                                                    X.shape[0] // 2,
                                                    replace=False)
                for _ in np.arange(
                        it * self.slice_len,
                        min((it + 1) * self.slice_len, self.n_estimators)):
                    s_inds.append(half_sample_inds[np.random.choice(
                        X.shape[0] // 2,
                        int(np.ceil(self.subsample_fr_ * (X.shape[0] // 2))),
                        replace=False)])
            trees = Parallel(
                n_jobs=self.n_jobs,
                verbose=self.verbose,
                **_joblib_parallel_args(prefer='threads'))(
                    delayed(_parallel_add_trees)(t,
                                                 self,
                                                 X,
                                                 y,
                                                 sample_weight,
                                                 s_inds[i],
                                                 i,
                                                 len(trees),
                                                 verbose=self.verbose)
                    for i, t in enumerate(trees))
            # Collect newly grown trees
            self.estimators_.extend(trees)

        return self