Esempio n. 1
0
def lasso_classification(table, alpha=0.3):
    '''
    '''
    from scikits.learn.linear_model import Lasso
    X = table[:, 1:]
    Y = table[:, 0]
    #    n_samples, n_features = 50, 200
    #    X = np.random.randn(n_samples, n_features)
    #    coef = 3*np.random.randn(n_features)
    #    coef[10:] = 0 # sparsify coef
    #    Y = np.dot(X, coef)
    #
    #    # add noise
    #    Y += 0.01*np.random.normal((n_samples,))

    # Split data in train set and test set
    n_samples = X.shape[0]
    items = np.random.permutation(n_samples)
    training_items = items[:n_samples / 2]
    testing_items = items[n_samples / 2:]
    X_train, y_train = X[training_items], Y[training_items]
    X_test, y_test = X[testing_items], Y[testing_items]

    lasso = Lasso(alpha=alpha, fit_intercept=True)
    lasso_fit = lasso.fit(X_train, y_train)
    print lasso_fit.coef_

    y_pred_lasso = lasso_fit.predict(X_test)
    y_collapsed = np.zeros_like(y_pred_lasso)
    collapsed_1 = y_pred_lasso >= 0.5
    y_collapsed[collapsed_1] = 1
    test = y_collapsed == y_test
    return float(test.sum()) / test.shape[0]
Esempio n. 2
0
def compute_bench(alpha, n_samples, n_features):

    lasso_results = []
    larslasso_results = []
    larslasso_gram_results = []

    n_tests = 1000
    it = 0

    for ns in n_samples:
        for nf in n_features:
            it += 1
            print '=================='
            print 'Iteration %s of %s' % (it, max(len(n_samples), len(n_features)))
            print '=================='
            k = nf // 10
            X, Y, X_test, Y_test, coef_ = make_data(
                n_samples=ns, n_tests=n_tests, n_features=nf,
                noise=0.1, k=k)

            X /= np.sqrt(np.sum(X**2, axis=0)) # Normalize data

            gc.collect()
            print "benching Lasso: "
            clf = Lasso(alpha=alpha, fit_intercept=False)
            tstart = time()
            clf.fit(X, Y)
            lasso_results.append(time() - tstart)

            gc.collect()
            print "benching LassoLARS: "
            clf = LassoLARS(alpha=alpha, fit_intercept=False)
            tstart = time()
            clf.fit(X, Y, normalize=False, precompute=False)
            larslasso_results.append(time() - tstart)

            gc.collect()
            print "benching LassoLARS (precomp. Gram): "
            clf = LassoLARS(alpha=alpha, fit_intercept=False)
            tstart = time()
            clf.fit(X, Y, normalize=False, precompute=True)
            larslasso_gram_results.append(time() - tstart)

    return lasso_results, larslasso_results, larslasso_gram_results
Esempio n. 3
0
def compute_bench(alpha, n_samples, n_features, precompute):

    lasso_results = []
    larslasso_results = []

    n_test_samples = 0
    it = 0

    for ns in n_samples:
        for nf in n_features:
            it += 1
            print '=================='
            print 'Iteration %s of %s' % (it, max(len(n_samples),
                                          len(n_features)))
            print '=================='
            n_informative = nf // 10
            X, Y, _, _, coef = make_regression_dataset(
                n_train_samples=ns, n_test_samples=n_test_samples,
                n_features=nf, noise=0.1, n_informative = n_informative)

            X /= np.sqrt(np.sum(X**2, axis=0)) # Normalize data

            gc.collect()
            print "- benching Lasso"
            clf = Lasso(alpha=alpha, fit_intercept=False)
            tstart = time()
            clf.fit(X, Y, precompute=precompute)
            lasso_results.append(time() - tstart)

            gc.collect()
            print "- benching LassoLARS"
            clf = LassoLARS(alpha=alpha, fit_intercept=False)
            tstart = time()
            clf.fit(X, Y, normalize=False, precompute=precompute)
            larslasso_results.append(time() - tstart)

    return lasso_results, larslasso_results
Esempio n. 4
0
def compute_bench(alpha, n_samples, n_features, precompute):

    lasso_results = []
    lars_lasso_results = []

    n_test_samples = 0
    it = 0

    for ns in n_samples:
        for nf in n_features:
            it += 1
            print '=================='
            print 'Iteration %s of %s' % (it, max(len(n_samples),
                                          len(n_features)))
            print '=================='
            n_informative = nf // 10
            X, Y, coef_ = make_regression(n_samples=ns, n_features=nf, 
                                          n_informative=n_informative, 
                                          noise=0.1, coef=True)

            X /= np.sqrt(np.sum(X**2, axis=0)) # Normalize data

            gc.collect()
            print "- benching Lasso"
            clf = Lasso(alpha=alpha, fit_intercept=False)
            tstart = time()
            clf.fit(X, Y, precompute=precompute)
            lasso_results.append(time() - tstart)

            gc.collect()
            print "- benching LassoLars"
            clf = LassoLars(alpha=alpha, fit_intercept=False)
            tstart = time()
            clf.fit(X, Y, normalize=False, precompute=precompute)
            lars_lasso_results.append(time() - tstart)

    return lasso_results, lars_lasso_results
y = np.dot(X, coef)

# add noise
y += 0.01*np.random.normal((n_samples,))

# Split data in train set and test set
n_samples = X.shape[0]
X_train, y_train = X[:n_samples/2], y[:n_samples/2]
X_test, y_test = X[n_samples/2:], y[n_samples/2:]

################################################################################
# Lasso
from scikits.learn.linear_model import Lasso

alpha = 0.1
lasso = Lasso(alpha=alpha)

y_pred_lasso = lasso.fit(X_train, y_train).predict(X_test)
print lasso
print "r^2 on test data : %f" % (1 - np.linalg.norm(y_test - y_pred_lasso)**2
                                      / np.linalg.norm(y_test)**2)

################################################################################
# ElasticNet
from scikits.learn.linear_model import ElasticNet

enet = ElasticNet(alpha=alpha, rho=0.7)

y_pred_enet = enet.fit(X_train, y_train).predict(X_test)
print enet
print "r^2 on test data : %f" % (1 - np.linalg.norm(y_test - y_pred_enet)**2
Esempio n. 6
0
from scikits.learn.linear_model.sparse import Lasso as SparseLasso
from scikits.learn.linear_model import Lasso as DenseLasso

###############################################################################
# The two Lasso implementations on Dense data
print "--- Dense matrices"

n_samples, n_features = 200, 10000
np.random.seed(0)
y = np.random.randn(n_samples)
X = np.random.randn(n_samples, n_features)

alpha = 1
sparse_lasso = SparseLasso(alpha=alpha, fit_intercept=False)
dense_lasso = DenseLasso(alpha=alpha, fit_intercept=False)

t0 = time()
sparse_lasso.fit(X, y, max_iter=1000)
print "Sparse Lasso done in %fs" % (time() - t0)

t0 = time()
dense_lasso.fit(X, y, max_iter=1000)
print "Dense Lasso done in %fs" % (time() - t0)

print "Distance between coefficients : %s" % linalg.norm(sparse_lasso.coef_ -
                                                         dense_lasso.coef_)

###############################################################################
# The two Lasso implementations on Sparse data
print "--- Sparse matrices"
from scikits.learn.linear_model.sparse import Lasso as SparseLasso
from scikits.learn.linear_model import Lasso as DenseLasso


###############################################################################
# The two Lasso implementations on Dense data
print "--- Dense matrices"

n_samples, n_features = 200, 10000
np.random.seed(0)
y = np.random.randn(n_samples)
X = np.random.randn(n_samples, n_features)

alpha = 1
sparse_lasso = SparseLasso(alpha=alpha, fit_intercept=False)
dense_lasso = DenseLasso(alpha=alpha, fit_intercept=False)

t0 = time()
sparse_lasso.fit(X, y, maxit=1000)
print "Sparse Lasso done in %fs" % (time() - t0)

t0 = time()
dense_lasso.fit(X, y, maxit=1000)
print "Dense Lasso done in %fs" % (time() - t0)

print "Distance between coefficients : %s" % linalg.norm(sparse_lasso.coef_
                                                        - dense_lasso.coef_)

###############################################################################
# The two Lasso implementations on Sparse data
print "--- Sparse matrices"
from scikits.learn.linear_model.sparse import Lasso as SparseLasso
from scikits.learn.linear_model import Lasso as DenseLasso


###############################################################################
# The two Lasso implementations on Dense data
print "--- Dense matrices"

n_samples, n_features = 200, 10000
np.random.seed(0)
y = np.random.randn(n_samples)
X = np.random.randn(n_samples, n_features)

alpha = 1
sparse_lasso = SparseLasso(alpha=alpha, fit_intercept=False)
dense_lasso = DenseLasso(alpha=alpha, fit_intercept=False)

t0 = time()
sparse_lasso.fit(X, y, max_iter=1000)
print "Sparse Lasso done in %fs" % (time() - t0)

t0 = time()
dense_lasso.fit(X, y, max_iter=1000)
print "Dense Lasso done in %fs" % (time() - t0)

print "Distance between coefficients : %s" % linalg.norm(sparse_lasso.coef_
                                                        - dense_lasso.coef_)

###############################################################################
# The two Lasso implementations on Sparse data
print "--- Sparse matrices"
y = np.dot(X, coef)

# add noise
y += 0.01 * np.random.normal((n_samples, ))

# Split data in train set and test set
n_samples = X.shape[0]
X_train, y_train = X[:n_samples / 2], y[:n_samples / 2]
X_test, y_test = X[n_samples / 2:], y[n_samples / 2:]

################################################################################
# Lasso
from scikits.learn.linear_model import Lasso

alpha = 0.1
lasso = Lasso(alpha=alpha)

y_pred_lasso = lasso.fit(X_train, y_train).predict(X_test)
print lasso
print "r^2 on test data : %f" % (
    1 - np.linalg.norm(y_test - y_pred_lasso)**2 / np.linalg.norm(y_test)**2)

################################################################################
# ElasticNet
from scikits.learn.linear_model import ElasticNet

enet = ElasticNet(alpha=alpha, rho=0.7)

y_pred_enet = enet.fit(X_train, y_train).predict(X_test)
print enet
print "r^2 on test data : %f" % (
Esempio n. 10
0
def fitRateSpectrum(Times, Data, Rates, w, Lnorm='ridge', standardizeData=True, CalcNdof=False, rho=0.5):
    """Using pseudo-inverse, with Tikhonov regularization (w parameter) to solve the inverse lapace tranform.
    Returns coefficients A_k, residual sum of squares (rss), and number of degrees of freedom, for each relaxation rate.
    """

    
    if Lnorm == 'lasso':
        # Use L1-norm Lasso regression
        try:
            from scikits.learn.linear_model import Lasso 
        except:
            print 'Error: could NOT import Lasso from scikits.learn.linear_model.  Using L2 norm (ridge).'
            Lnorm = 'ridge'

    if Lnorm == 'enet':
        # Use L1-L2-mixture norm Lasso regression
        try:
            from scikits.learn.linear_model import ElasticNet
        except:
            print 'Error: could NOT import ElasticNet from scikits.learn.linear_model.  Using L2 norm (ridge).'
            Lnorm = 'ridge'


    if Lnorm == 'lasso':

        lasso = Lasso(alpha = w, fit_intercept=False) # assume the data is already "centered" -- i.e. no zero rate
        X, Xmean = Xsubmatrix(Rates, Times, standardizeData=standardizeData)
        #print 'X.shape', X.shape, 'Data.shape', Data.shape
        lasso.fit(X, Data, max_iter=1e6, tol=1e-7)
        A = lasso.coef_

        # Compute "residual sum of squares" (note loss function is different for L1-norm)
        y_pred_lasso = lasso.predict(X)
        diff = y_pred_lasso - Data


    elif Lnorm == 'enet':

        # NOTE: The convention for rho is backwards in scikits.learn, instead of rho we must send (1-rho)
        enet = ElasticNet(alpha = w, rho=(1.-rho), fit_intercept=False) # assume the data is already "centered" -- i.e. no zero rate
        X, Xmean = Xsubmatrix(Rates, Times, standardizeData=standardizeData)
        #print 'X.shape', X.shape, 'Data.shape', Data.shape
        #enet.fit(X, Data, max_iter=1e6, tol=1e-7)
        enet.fit(X, Data, max_iter=1e6, tol=1e-3)  # for testing
        A = enet.coef_

        # Compute "residual sum of squares" (note loss function is different for L1-norm)
        y_pred_enet = enet.predict(X)
        diff = y_pred_enet - Data


    elif Lnorm == 'ridge':
        X, Xmean = Xmatrix(Rates, Times, w, standardizeData=standardizeData )
        Xinv = linalg.pinv(X)

        y = np.array( Data.tolist() + [0. for k in Rates] )
        if standardizeData:
            y - y.mean()
        A = np.dot(Xinv, y)

        # Compute "residual sum of squares" (note loss function is different for L1-norm)
        diff = SumSpectra(A, Rates, Times) - Data

    rss = np.dot(diff,diff)  # Residual sum of squares

    if CalcNdof:
        Xsub, Xmean = Xsubmatrix(Rates, Times, standardizeData=standardizeData)
        XT = np.transpose(Xsub)
        I_XT = np.eye(XT.shape[0])
        I_X = np.eye(Xsub.shape[0])
        Xtemp = np.dot(Xsub, np.linalg.inv(np.dot(XT,Xsub) + w*I_XT))
        ndof = np.trace(I_X - np.dot(Xtemp,XT))
    else:
        ndof = None

    return A, rss, ndof