Esempio n. 1
0
def test_np_matrix():
    """
    Confirm that input validation code does not return np.matrix
    """
    X = np.arange(12).reshape(3, 4)

    assert_false(isinstance(as_float_array(X), np.matrix))
    assert_false(isinstance(as_float_array(np.matrix(X)), np.matrix))
    assert_false(isinstance(as_float_array(sp.csc_matrix(X)), np.matrix))

    assert_false(isinstance(atleast2d_or_csr(X), np.matrix))
    assert_false(isinstance(atleast2d_or_csr(np.matrix(X)), np.matrix))
    assert_false(isinstance(atleast2d_or_csr(sp.csc_matrix(X)), np.matrix))

    assert_false(isinstance(atleast2d_or_csc(X), np.matrix))
    assert_false(isinstance(atleast2d_or_csc(np.matrix(X)), np.matrix))
    assert_false(isinstance(atleast2d_or_csc(sp.csr_matrix(X)), np.matrix))

    assert_false(isinstance(safe_asarray(X), np.matrix))
    assert_false(isinstance(safe_asarray(np.matrix(X)), np.matrix))
    assert_false(isinstance(safe_asarray(sp.lil_matrix(X)), np.matrix))

    assert_true(atleast2d_or_csr(X, copy=False) is X)
    assert_false(atleast2d_or_csr(X, copy=True) is X)
    assert_true(atleast2d_or_csc(X, copy=False) is X)
    assert_false(atleast2d_or_csc(X, copy=True) is X)
Esempio n. 2
0
def test_np_matrix():
    """
    Confirm that input validation code does not return np.matrix
    """
    X = np.arange(12).reshape(3, 4)

    assert_false(isinstance(as_float_array(X), np.matrix))
    assert_false(isinstance(as_float_array(np.matrix(X)), np.matrix))
    assert_false(isinstance(as_float_array(sp.csc_matrix(X)), np.matrix))

    assert_false(isinstance(atleast2d_or_csr(X), np.matrix))
    assert_false(isinstance(atleast2d_or_csr(np.matrix(X)), np.matrix))
    assert_false(isinstance(atleast2d_or_csr(sp.csc_matrix(X)), np.matrix))

    assert_false(isinstance(atleast2d_or_csc(X), np.matrix))
    assert_false(isinstance(atleast2d_or_csc(np.matrix(X)), np.matrix))
    assert_false(isinstance(atleast2d_or_csc(sp.csr_matrix(X)), np.matrix))

    assert_false(isinstance(safe_asarray(X), np.matrix))
    assert_false(isinstance(safe_asarray(np.matrix(X)), np.matrix))
    assert_false(isinstance(safe_asarray(sp.lil_matrix(X)), np.matrix))

    assert_true(atleast2d_or_csr(X, copy=False) is X)
    assert_false(atleast2d_or_csr(X, copy=True) is X)
    assert_true(atleast2d_or_csc(X, copy=False) is X)
    assert_false(atleast2d_or_csc(X, copy=True) is X)
Esempio n. 3
0
def entropy(*args):
   xy = zip(*args)
   # probs
   proba = [ float(xy.count(c)) / len(xy) for c in dict.fromkeys(list(xy)) ]
   safe_asarray(xy)
   #very pythonic list comprehension
   # the follwoing line is just a list comprehnsion with x =
   # x[numpy.isfinite(x)] having ability to filter crap out
   # x = x[numpy.logical_not(numpy.isnan(x))]
   entropy = -np.sum([ ((p * np.log2(p)) , 0 )    [ math.isnan(p * np.log2(p)) or math.isinf(p * np.log2(p)) ] for p in proba ])
   assert_all_finite(entropy)
   return entropy
def test_safe_asarray():
    """Test that array dtype conversion works."""
    # Test with sparse arrays
    X = sp.csc_matrix(np.arange(4, dtype=np.float))
    Y = safe_asarray(X)
    assert_true(Y.dtype == np.float)
    # Check that no copy has been performed
    Y.data[0] = 7  # value not in original array
    assert_equal(X.data[0], Y.data[0])

    Y = safe_asarray(X, dtype=np.int)
    assert_equal(Y.data.dtype, np.int)

    # Test with dense arrays
    X = np.arange(4, dtype=np.float)
    Y = safe_asarray(X)
    assert_true(Y.dtype == np.float)
    # Check that no copy has been performed
    Y[0] = 7
    assert_equal(X[0], Y[0])

    Y = safe_asarray(X, dtype=np.int)
    assert_equal(Y.dtype, np.int)

    # Non-regression: LIL and DOK used to fail for lack of a .data attribute
    X = np.ones([2, 3])
    safe_asarray(sp.dok_matrix(X))
    safe_asarray(sp.lil_matrix(X), dtype=X.dtype)
Esempio n. 5
0
def test_safe_asarray():
    """Test that array dtype conversion works."""
    # Test with sparse arrays
    X = sp.csc_matrix(np.arange(4, dtype=np.float))
    Y = safe_asarray(X)
    assert_true(Y.dtype == np.float)
    # Check that no copy has been performed
    Y.data[0] = 7  # value not in original array
    assert_equal(X.data[0], Y.data[0])

    Y = safe_asarray(X, dtype=np.int)
    assert_equal(Y.data.dtype, np.int)

    # Test with dense arrays
    X = np.arange(4, dtype=np.float)
    Y = safe_asarray(X)
    assert_true(Y.dtype == np.float)
    # Check that no copy has been performed
    Y[0] = 7
    assert_equal(X[0], Y[0])

    Y = safe_asarray(X, dtype=np.int)
    assert_equal(Y.dtype, np.int)

    # Non-regression: LIL and DOK used to fail for lack of a .data attribute
    X = np.ones([2, 3])
    safe_asarray(sp.dok_matrix(X))
    safe_asarray(sp.lil_matrix(X), dtype=X.dtype)
Esempio n. 6
0
    def fit(self, X, y):
        """
        Fit LogisticRegressor model.

        Parameters
        ----------
        X : numpy array or sparse matrix of shape [n_samples,n_features]
            Training data
        y : numpy array of shape [n_samples, n_targets]
            Target values
        n_jobs : The number of jobs to use for the computation.
            If -1 all CPUs are used. This will only provide speedup for
            n_targets > 1 and sufficient large problems

        Returns
        -------
        self : returns an instance of self.
        """
        self.X = safe_asarray(X)
        self.y = np.asarray(y)
        initial_theta = zeros(self.X.shape[1])
        cost = lambda theta: compute_cost(
            initial_theta, self.X, self.y, C=self.C_, normf=self.norm_)
        grad = lambda theta: compute_grad(
            self.coef_, self.X, self.y, C=self.C_)

        self.coef_ = fmin_bfgs(self.decorated_cost,
                               initial_theta,
                               fprime=self.decorated_grad)
        print 'check1:', check_grad(cost, grad, initial_theta)
        print 'check2:', check_grad(cost, grad, self.coef_)

        # self.coef_ = fmin_bfgs(self.decorated_cost, initial_theta)
        return self
Esempio n. 7
0
def _transform_selected(X, transform, selected="all", copy=True):
    """Apply a transform function to portion of selected features

    Parameters
    ----------
    X : array-like or sparse matrix, shape=(n_samples, n_features)
        Dense array or sparse matrix.

    transform : callable
        A callable transform(X) -> X_transformed

    copy : boolean, optional
        Copy X even if it could be avoided.

    selected: "all" or array of indices or mask
        Specify which features to apply the transform to.

    Returns
    -------
    X : array or sparse matrix, shape=(n_samples, n_features_new)
    """
    if selected == "all":
        X = safe_asarray(X, copy=copy, force_all_finite=False)
        return transform(X)

    X = atleast2d_or_csc(X, copy=copy, force_all_finite=False)

    if len(selected) == 0:
        return X

    n_features = X.shape[1]
    ind = np.arange(n_features)
    sel = np.zeros(n_features, dtype=bool)
    sel[np.asarray(selected)] = True
    not_sel = np.logical_not(sel)
    n_selected = np.sum(sel)

    if n_selected == 0:
        # No features selected.
        return X
    elif n_selected == n_features:
        # All features selected.
        return transform(X)
    else:
        X_sel = transform(X[:, ind[sel]])
        X_not_sel = X[:, ind[not_sel]]

        if sparse.issparse(X_sel) or sparse.issparse(X_not_sel):
            return sparse.hstack((X_sel, X_not_sel)).tocsr()
        else:
            return np.hstack((X_sel, X_not_sel))
Esempio n. 8
0
def test_safe_asarray():
    """Test that array dtype conversion works."""
    # Test with sparse arrays
    X = sp.csc_matrix(np.arange(4, dtype=np.float))
    Y = safe_asarray(X)
    assert_true(Y.dtype == np.float)
    # Check that no copy has been performed
    Y.data[0] = 7  # value not in original array
    assert_equal(X.data[0], Y.data[0])

    Y = safe_asarray(X, dtype=np.int)
    assert_equal(Y.data.dtype, np.int)

    # Test with dense arrays
    X = np.arange(4, dtype=np.float)
    Y = safe_asarray(X)
    assert_true(Y.dtype == np.float)
    # Check that no copy has been performed
    Y[0] = 7
    assert_equal(X[0], Y[0])

    Y = safe_asarray(X, dtype=np.int)
    assert_equal(Y.dtype, np.int)
Esempio n. 9
0
    def fit(self, X, y, sample_weight=1.0, solver=None):
        X = safe_asarray(X, dtype=np.float)
        y = np.asarray(y, dtype=np.float)

        X, y, X_mean, y_mean, X_std = \
           self._center_data(X, y, self.fit_intercept,
                   self.normalize, self.copy_X)

        self.coef_ = ridge_regression(X, y,
                                      alpha=self.alpha,
                                      sample_weight=sample_weight,
                                      solver=solver,
                                      max_iter=self.max_iter,
                                      tol=self.tol)
        self._set_intercept(X_mean, y_mean, X_std)
        return self
Esempio n. 10
0
    def fit_transform(self, X, y=None):
        """Fit estimator and transform dataset.

        Parameters
        ----------
        X : array-like, shape=(n_samples, n_features)
            Input data used to build forests.

        Returns
        -------
        X_transformed: sparse matrix, shape=(n_samples, n_out)
            Transformed dataset.
        """
        X = safe_asarray(X)
        rnd = check_random_state(self.random_state)
        y = rnd.uniform(size=X.shape[0])
        super(RandomTreesEmbedding, self).fit(X, y)
        self.one_hot_encoder_ = OneHotEncoder()
        return self.one_hot_encoder_.fit_transform(self.apply(X))
Esempio n. 11
0
def balance_weights(y):
    """Compute sample weights such that the class distribution of y becomes
       balanced.

    Parameters
    ----------
    y : array-like
        Labels for the samples.

    Returns
    -------
    weights : array-like
        The sample weights.
    """
    y = safe_asarray(y)
    y = np.searchsorted(np.unique(y), y)
    bins = np.bincount(y)

    weights = 1. / bins.take(y)
    weights *= bins.min()

    return weights
Esempio n. 12
0
def balance_weights(y):
    """Compute sample weights such that the class distribution of y becomes
       balanced.

    Parameters
    ----------
    y : array-like
        Labels for the samples.

    Returns
    -------
    weights : array-like
        The sample weights.
    """
    y = safe_asarray(y)
    y = np.searchsorted(np.unique(y), y)
    bins = np.bincount(y)

    weights = 1. / bins.take(y)
    weights *= bins.min()

    return weights
Esempio n. 13
0
    def fit(self, X, y, sample_weight=1.0, solver='auto'):
        """Fit Ridge regression model

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Training data

        y : array-like, shape = [n_samples] or [n_samples, n_responses]
            Target values

        sample_weight : float or numpy array of shape [n_samples]
            Individual weights for each sample

        solver : {'auto', 'dense_cholesky', 'sparse_cg'}
            Solver to use in the computational
            routines. 'dense_cholesky' will use the standard
            scipy.linalg.solve function, 'sparse_cg' will use the
            conjugate gradient solver as found in
            scipy.sparse.linalg.cg while 'auto' will chose the most
            appropriate depending on the matrix X.

        Returns
        -------
        self : returns an instance of self.
        """
        X = safe_asarray(X, dtype=np.float)
        y = np.asarray(y, dtype=np.float)

        X, y, X_mean, y_mean, X_std = \
           self._center_data(X, y, self.fit_intercept,
                   self.normalize, self.copy_X)

        self.coef_ = ridge_regression(X, y, self.alpha, sample_weight,
                                      solver, self.tol)
        self._set_intercept(X_mean, y_mean, X_std)
        return self
    def fit(self, X, y, sample_weight=1.0, solver='auto'):
        """Fit Ridge regression model

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Training data

        y : array-like, shape = [n_samples] or [n_samples, n_responses]
            Target values

        sample_weight : float or numpy array of shape [n_samples]
            Individual weights for each sample

        solver : {'auto', 'dense_cholesky', 'sparse_cg'}
            Solver to use in the computational
            routines. 'dense_cholesky' will use the standard
            scipy.linalg.solve function, 'sparse_cg' will use the
            conjugate gradient solver as found in
            scipy.sparse.linalg.cg while 'auto' will chose the most
            appropriate depending on the matrix X.

        Returns
        -------
        self : returns an instance of self.
        """
        X = safe_asarray(X, dtype=np.float)
        y = np.asarray(y, dtype=np.float)

        X, y, X_mean, y_mean, X_std = \
           self._center_data(X, y, self.fit_intercept,
                   self.normalize, self.copy_X)

        self.coef_ = ridge_regression(X, y, self.alpha, sample_weight, solver,
                                      self.tol)
        self._set_intercept(X_mean, y_mean, X_std)
        return self
Esempio n. 15
0
def ordinal_logistic_fit(X, y, max_iter=10000, verbose=False, solver='TNC'):
    """
    Ordinal logistic regression or proportional odds model.
    Uses scipy's optimize.fmin_slsqp solver.

    Parameters
    ----------
    X : {array, sparse matrix}, shape (n_samples, n_feaures)
        Input data
    y : array-like
        Target values
    max_iter : int
        Maximum number of iterations
    verbose: bool
        Print convergence information

    Returns
    -------
    w : array, shape (n_features,)
        coefficients of the linear model
    theta : array, shape (k,), where k is the different values of y
        vector of thresholds
    """

    X = utils.safe_asarray(X)
    y = np.asarray(y)

    # .. order input ..
    idx = np.argsort(y)
    idx_inv = np.zeros_like(idx)
    idx_inv[idx] = np.arange(idx.size)
    X = X[idx]
    y = y[idx].astype(np.int)
    # make them continuous and start at zero
    unique_y = np.unique(y)
    for i, u in enumerate(unique_y):
        y[y == u] = i
    unique_y = np.unique(y)

    # .. utility arrays used in f_grad ..
    alpha = 0.
    k1 = np.sum(y == unique_y[0])
    E0 = (y[:, np.newaxis] == np.unique(y)).astype(np.int)
    E1 = np.roll(E0, -1, axis=-1)
    E1[:, -1] = 0.
    E0, E1 = map(sparse.csr_matrix, (E0.T, E1.T))

    def f_obj(x0, X, y):
        """
        Objective function
        """
        w, theta_0 = np.split(x0, [X.shape[1]])
        theta_1 = np.roll(theta_0, 1)
        t0 = theta_0[y]
        z = np.diff(theta_0)

        Xw = X.dot(w)
        a = t0 - Xw
        b = t0[k1:] - X[k1:].dot(w)
        c = (theta_1 - theta_0)[y][k1:]

        if np.any(c > 0):
            return BIG

        #loss = -(c[idx] + np.log(np.exp(-c[idx]) - 1)).sum()
        loss = -np.log(1 - np.exp(c)).sum()

        loss += b.sum() + log_logistic(b).sum() \
            + log_logistic(a).sum() \
            + .5 * alpha * w.dot(w) - np.log(z).sum()  # penalty
        if np.isnan(loss):
            pass
            #import ipdb; ipdb.set_trace()
        return loss

    def f_grad(x0, X, y):
        """
        Gradient of the objective function
        """
        w, theta_0 = np.split(x0, [X.shape[1]])
        theta_1 = np.roll(theta_0, 1)
        t0 = theta_0[y]
        t1 = theta_1[y]
        z = np.diff(theta_0)

        Xw = X.dot(w)
        a = t0 - Xw
        b = t0[k1:] - X[k1:].dot(w)
        c = (theta_1 - theta_0)[y][k1:]

        # gradient for w
        phi_a = phi(a)
        phi_b = phi(b)
        grad_w = -X[k1:].T.dot(phi_b) + X.T.dot(1 - phi_a) + alpha * w

        # gradient for theta
        idx = c > 0
        tmp = np.empty_like(c)
        tmp[idx] = 1. / (np.exp(-c[idx]) - 1)
        tmp[~idx] = np.exp(c[~idx]) / (1 - np.exp(c[~idx])) # should not need
        grad_theta = (E1 - E0)[:, k1:].dot(tmp) \
            + E0[:, k1:].dot(phi_b) - E0.dot(1 - phi_a)

        grad_theta[:-1] += 1. / np.diff(theta_0)
        grad_theta[1:] -= 1. / np.diff(theta_0)
        out = np.concatenate((grad_w, grad_theta))
        return out

    def f_hess(x0, s, X, y):
        x0 = np.asarray(x0)
        w, theta_0 = np.split(x0, [X.shape[1]])
        theta_1 = np.roll(theta_0, 1)
        t0 = theta_0[y]
        t1 = theta_1[y]
        z = np.diff(theta_0)

        Xw = X.dot(w)
        a = t0 - Xw
        b = t0[k1:] - X[k1:].dot(w)
        c = (theta_1 - theta_0)[y][k1:]

        D = np.diag(phi(a) * (1 - phi(a)))
        D_= np.diag(phi(b) * (1 - phi(b)))
        D1 = np.diag(np.exp(-c) / (np.exp(-c) - 1) ** 2)
        Ex = (E1 - E0)[:, k1:].toarray()
        Ex0 = E0.toarray()
        H_A = X[k1:].T.dot(D_).dot(X[k1:]) + X.T.dot(D).dot(X)
        H_C = - X[k1:].T.dot(D_).dot(E0[:, k1:].T.toarray()) \
            - X.T.dot(D).dot(E0.T.toarray())
        H_B = Ex.dot(D1).dot(Ex.T) + Ex0[:, k1:].dot(D_).dot(Ex0[:, k1:].T) \
            - Ex0.dot(D).dot(Ex0.T)

        p_w = H_A.shape[0]
        tmp0 = H_A.dot(s[:p_w]) + H_C.dot(s[p_w:])
        tmp1 = H_C.T.dot(s[:p_w]) + H_B.dot(s[p_w:])
        return np.concatenate((tmp0, tmp1))

        import ipdb; ipdb.set_trace()
        import pylab as pl
        pl.matshow(H_B)
        pl.colorbar()
        pl.title('True')
        import numdifftools as nd
        Hess = nd.Hessian(lambda x: f_obj(x, X, y))
        H = Hess(x0)
        pl.matshow(H[H_A.shape[0]:, H_A.shape[0]:])
        #pl.matshow()
        pl.title('estimated')
        pl.colorbar()
        pl.show()


    def grad_hess(x0, X, y):
        grad = f_grad(x0, X, y)
        hess = lambda x: f_hess(x0, x, X, y)
        return grad, hess

    x0 = np.random.randn(X.shape[1] + unique_y.size) / X.shape[1]
    x0[X.shape[1]:] = np.sort(unique_y.size * np.random.rand(unique_y.size))

    #print('Check grad: %s' % optimize.check_grad(f_obj, f_grad, x0, X, y))
    #print(optimize.approx_fprime(x0, f_obj, 1e-6, X, y))
    #print(f_grad(x0, X, y))
    #print(optimize.approx_fprime(x0, f_obj, 1e-6, X, y) - f_grad(x0, X, y))
    #import ipdb; ipdb.set_trace()

    def callback(x0):
        x0 = np.asarray(x0)
        print('Check grad: %s' % optimize.check_grad(f_obj, f_grad, x0, X, y))
        if verbose:
        # check that gradient is correctly computed
            print('OBJ: %s' % f_obj(x0, X, y))

    if solver == 'TRON':
        import pytron
        out = pytron.minimize(f_obj, grad_hess, x0, args=(X, y))
    else:
        options = {'maxiter' : max_iter, 'disp': 0, 'maxfun':10000}
        out = optimize.minimize(f_obj, x0, args=(X, y), method=solver,
            jac=f_grad, hessp=f_hess, options=options, callback=callback)

    if not out.success:
        warnings.warn(out.message)
    w, theta = np.split(out.x, [X.shape[1]])
    return w, theta
Esempio n. 16
0
    def fit(self, X, y, sample_weight=1.0):
        """Fit Ridge regression model

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Training data

        y : array-like, shape = [n_samples] or [n_samples, n_responses]
            Target values

        sample_weight : float or array-like of shape [n_samples]
            Sample weight

        Returns
        -------
        self : Returns self.
        """
        X = safe_asarray(X, dtype=np.float)
        y = np.asarray(y, dtype=np.float)

        n_samples, n_features = X.shape

        X, y, X_mean, y_mean, X_std = LinearModel._center_data(X, y,
                self.fit_intercept, self.normalize, self.copy_X)

        gcv_mode = self.gcv_mode
        with_sw = len(np.shape(sample_weight))

        if gcv_mode is None or gcv_mode == 'auto':
            if n_features > n_samples or with_sw:
                gcv_mode = 'eigen'
            else:
                gcv_mode = 'svd'
        elif gcv_mode == "svd" and with_sw:
            # FIXME non-uniform sample weights not yet supported
            warnings.warn("non-uniform sample weights unsupported for svd, "
                "forcing usage of eigen")
            gcv_mode = 'eigen'

        if gcv_mode == 'eigen':
            _pre_compute = self._pre_compute
            _errors = self._errors
            _values = self._values
        elif gcv_mode == 'svd':
            # assert n_samples >= n_features
            _pre_compute = self._pre_compute_svd
            _errors = self._errors_svd
            _values = self._values_svd
        else:
            raise ValueError('bad gcv_mode "%s"' % gcv_mode)

        v, Q, QT_y = _pre_compute(X, y)
        n_y = 1 if len(y.shape) == 1 else y.shape[1]
        cv_values = np.zeros((n_samples * n_y, len(self.alphas)))
        C = []

        error = self.score_func is None and self.loss_func is None

        for i, alpha in enumerate(self.alphas):
            if error:
                out, c = _errors(sample_weight * alpha, y, v, Q, QT_y)
            else:
                out, c = _values(sample_weight * alpha, y, v, Q, QT_y)
            cv_values[:, i] = out.ravel()
            C.append(c)

        if error:
            best = cv_values.mean(axis=0).argmin()
        else:
            func = self.score_func if self.score_func else self.loss_func
            out = [func(y.ravel(), cv_values[:, i])
                    for i in range(len(self.alphas))]
            best = np.argmax(out) if self.score_func else np.argmin(out)

        self.alpha_ = self.alphas[best]
        self.dual_coef_ = C[best]
        self.coef_ = safe_sparse_dot(self.dual_coef_.T, X)

        self._set_intercept(X_mean, y_mean, X_std)

        if self.store_cv_values:
            if len(y.shape) == 1:
                cv_values_shape = n_samples, len(self.alphas)
            else:
                cv_values_shape = n_samples, n_y, len(self.alphas)
            self.cv_values_ = cv_values.reshape(cv_values_shape)

        return self
Esempio n. 17
0
yKrn = np.zeros( shape=(1,20000) )
#yKrn = np.array( []) #, dtype=np.float64 )
print("yKrn type", type(yKrn) )
#yKrn = [ entropy(xk)  for xk in xKrn]       #patData.shape([0]) ] #.ravel()
#for i in np.arange(0,1,0.1):
#        print i
for i in xKrn:
        print i
        yKrn[i]= entropy( xKrn[ i:i+clmns ] )
        print xKrn[i]
        i+=clmns
print("##yKrn##", shape(yKrn)  )
print yKrn

np.ravel(yKrn)
safe_asarray(yKrn)   #.ravel()
np.asarray_chkfinite(yKrn)
print("l885: yKrn", type(yKrn) )
#yKrn = yKrn.astype(numpy.float32, copy=False)


#safe_asarray(yKrn).ravel()#print(len(yKrn) )
np.array(yKrn,float);  as_float_array(XKrn);  as_float_array(yKrn)

#yKrn.astype(float)
np.float64(yKrn);  np.asarray( yKrn )
#warn_if_not_float(yKrn)
XKrn = XKrn[np.logical_not(np.isnan(XKrn))]; yKrn = XKrn[np.logical_not(np.isnan(yKrn))]
assert_all_finite(XKrn); assert_all_finite(yKrn)    #X_vec = np.vectorize(XKrn) #y_vec = np.vectorize(yKrn)
XKrn.ravel(), yKrn.ravel()
print("912: XKrn row, yKrn row", XKrn.shape, yKrn.shape )
    def fit(self, X, y, sample_weight=1.0):
        """Fit Ridge regression model

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Training data

        y : array-like, shape = [n_samples] or [n_samples, n_responses]
            Target values

        sample_weight : float or array-like of shape [n_samples]
            Sample weight

        Returns
        -------
        self : Returns self.
        """
        X = safe_asarray(X, dtype=np.float)
        y = np.asarray(y, dtype=np.float)

        n_samples, n_features = X.shape

        X, y, X_mean, y_mean, X_std = LinearModel._center_data(
            X, y, self.fit_intercept, self.normalize, self.copy_X)

        gcv_mode = self.gcv_mode
        with_sw = len(np.shape(sample_weight))

        if gcv_mode is None or gcv_mode == 'auto':
            if n_features > n_samples or with_sw:
                gcv_mode = 'eigen'
            else:
                gcv_mode = 'svd'
        elif gcv_mode == "svd" and with_sw:
            # FIXME non-uniform sample weights not yet supported
            warnings.warn("non-uniform sample weights unsupported for svd, "
                          "forcing usage of eigen")
            gcv_mode = 'eigen'

        if gcv_mode == 'eigen':
            _pre_compute = self._pre_compute
            _errors = self._errors
            _values = self._values
        elif gcv_mode == 'svd':
            # assert n_samples >= n_features
            _pre_compute = self._pre_compute_svd
            _errors = self._errors_svd
            _values = self._values_svd
        else:
            raise ValueError('bad gcv_mode "%s"' % gcv_mode)

        v, Q, QT_y = _pre_compute(X, y)
        n_y = 1 if len(y.shape) == 1 else y.shape[1]
        cv_values = np.zeros((n_samples * n_y, len(self.alphas)))
        C = []

        error = self.score_func is None and self.loss_func is None

        for i, alpha in enumerate(self.alphas):
            if error:
                out, c = _errors(sample_weight * alpha, y, v, Q, QT_y)
            else:
                out, c = _values(sample_weight * alpha, y, v, Q, QT_y)
            cv_values[:, i] = out.ravel()
            C.append(c)

        if error:
            best = cv_values.mean(axis=0).argmin()
        else:
            func = self.score_func if self.score_func else self.loss_func
            out = [
                func(y.ravel(), cv_values[:, i])
                for i in range(len(self.alphas))
            ]
            best = np.argmax(out) if self.score_func else np.argmin(out)

        self.best_alpha = self.alphas[best]
        self.dual_coef_ = C[best]
        self.coef_ = safe_sparse_dot(self.dual_coef_.T, X)

        self._set_intercept(X_mean, y_mean, X_std)

        if self.store_cv_values:
            if len(y.shape) == 1:
                cv_values_shape = n_samples, len(self.alphas)
            else:
                cv_values_shape = n_samples, n_y, len(self.alphas)
            self.cv_values_ = cv_values.reshape(cv_values_shape)

        return self
Esempio n. 19
0
 [ 0.26551972, -0.52748602],
 [-0.71613885, -1.67219803],
 [-1.1508754,  -0.84463093],
 [ 0.57204618, -0.11031915],
 [ 2.32416009, -1.88625748],
 [ 1.88126616,  1.54628757],
 [ 0.71425139,  2.07374798],
 [-0.52405951, -1.02460704],
 [ 1.00767888,  0.93742334],
 [-1.41500546, -1.68304715],
 [ 0.23472175,  0.75076249],
 [ 0.38353232,  0.52369016],
 [-1.08196752, -1.00545824],
 [-0.0435215,  -0.9002513 ]];


#X = [[ 0.71425139,  2.07374798],
    #[-0.52405951, -1.02460704],
    #[ 1.00767888,  0.93742334],
    #[-1.41500546, -1.68304715],
    #[ 0.23472175,  0.75076249],
    #[ 0.38353232,  0.52369016],
    #[-1.08196752, -1.00545824],
    #[-0.0435215,  -0.9002513 ]];

X = safe_asarray(X);
#bandwidth = estimate_bandwidth(X, quantile=0.2, n_samples=10)
bandwidth = 0.830430037949;
ms = MeanShift(bandwidth=bandwidth, bin_seeding=True);
ms.fit(X)