Example #1
0
def compute_bench(alpha, n_samples, n_features, precompute):

    lasso_results = []
    larslasso_results = []

    n_test_samples = 0
    it = 0

    for ns in n_samples:
        for nf in n_features:
            it += 1
            print '=================='
            print 'Iteration %s of %s' % (it, max(len(n_samples),
                                          len(n_features)))
            print '=================='
            n_informative = nf // 10
            X, Y, _, _, coef = make_regression_dataset(
                n_train_samples=ns, n_test_samples=n_test_samples,
                n_features=nf, noise=0.1, n_informative = n_informative)

            X /= np.sqrt(np.sum(X**2, axis=0)) # Normalize data

            gc.collect()
            print "- benching Lasso"
            clf = Lasso(alpha=alpha, fit_intercept=False)
            tstart = time()
            clf.fit(X, Y, precompute=precompute)
            lasso_results.append(time() - tstart)

            gc.collect()
            print "- benching LassoLARS"
            clf = LassoLARS(alpha=alpha, fit_intercept=False)
            tstart = time()
            clf.fit(X, Y, normalize=False, precompute=precompute)
            larslasso_results.append(time() - tstart)

    return lasso_results, larslasso_results
def compute_bench(alpha, n_samples, n_features, precompute):

    lasso_results = []
    lars_lasso_results = []

    n_test_samples = 0
    it = 0

    for ns in n_samples:
        for nf in n_features:
            it += 1
            print '=================='
            print 'Iteration %s of %s' % (it, max(len(n_samples),
                                          len(n_features)))
            print '=================='
            n_informative = nf // 10
            X, Y, _, _, coef = make_regression_dataset(
                n_train_samples=ns, n_test_samples=n_test_samples,
                n_features=nf, noise=0.1, n_informative = n_informative)

            X /= np.sqrt(np.sum(X**2, axis=0)) # Normalize data

            gc.collect()
            print "- benching Lasso"
            clf = Lasso(alpha=alpha, fit_intercept=False)
            tstart = time()
            clf.fit(X, Y, precompute=precompute)
            lasso_results.append(time() - tstart)

            gc.collect()
            print "- benching LassoLars"
            clf = LassoLars(alpha=alpha, fit_intercept=False)
            tstart = time()
            clf.fit(X, Y, normalize=False, precompute=precompute)
            lars_lasso_results.append(time() - tstart)

    return lasso_results, lars_lasso_results
def compute_bench(samples_range, features_range):

    it = 0

    results = defaultdict(lambda: [])

    max_it = len(samples_range) * len(features_range)
    for n_samples in samples_range:
        for n_features in features_range:
            it += 1
            print '===================='
            print 'Iteration %03d of %03d' % (it, max_it)
            print '===================='
            dataset_kwargs = {
                'n_train_samples': n_samples,
                'n_test_samples': 2,
                'n_features': n_features,
                'n_informative': n_features / 10,
                'effective_rank': min(n_samples, n_features) / 10,
                #'effective_rank': None,
                'bias': 0.0,
            }
            print "n_samples: %d" % n_samples
            print "n_features: %d" % n_features
            X, y, _, _, _ = make_regression_dataset(**dataset_kwargs)

            gc.collect()
            print "benching lars_path (with Gram):",
            sys.stdout.flush()
            tstart = time()
            G = np.dot(X.T, X) # precomputed Gram matrix
            Xy = np.dot(X.T, y)
            lars_path(X, y, Xy=Xy, Gram=G, method='lasso')
            delta = time() - tstart
            print "%0.3fs" % delta
            results['lars_path (with Gram)'].append(delta)

            gc.collect()
            print "benching lars_path (without Gram):",
            sys.stdout.flush()
            tstart = time()
            lars_path(X, y, method='lasso')
            delta = time() - tstart
            print "%0.3fs" % delta
            results['lars_path (without Gram)'].append(delta)

            gc.collect()
            print "benching lasso_path (with Gram):",
            sys.stdout.flush()
            tstart = time()
            lasso_path(X, y, precompute=True)
            delta = time() - tstart
            print "%0.3fs" % delta
            results['lasso_path (with Gram)'].append(delta)

            gc.collect()
            print "benching lasso_path (without Gram):",
            sys.stdout.flush()
            tstart = time()
            lasso_path(X, y, precompute=False)
            delta = time() - tstart
            print "%0.3fs" % delta
            results['lasso_path (without Gram)'].append(delta)

    return results
from scikits.learn.metrics import mean_square_error
from scikits.learn.datasets.samples_generator import make_regression_dataset

if __name__ == "__main__":
    list_n_samples = np.linspace(100, 10000, 5).astype(np.int)
    list_n_features = [10, 100, 1000]
    n_test = 1000
    noise = 0.1
    alpha = 0.01
    sgd_results = np.zeros((len(list_n_samples), len(list_n_features), 2))
    elnet_results = np.zeros((len(list_n_samples), len(list_n_features), 2))
    ridge_results = np.zeros((len(list_n_samples), len(list_n_features), 2))
    for i, n_train in enumerate(list_n_samples):
        for j, n_features in enumerate(list_n_features):
            X_train, y_train, X_test, y_test, coef = make_regression_dataset(
                n_train_samples=n_train, n_test_samples=n_test,
                n_features=n_features, noise=noise)
            print "======================="
            print "Round %d %d" % (i, j)
            print "n_features:", n_features
            print "n_samples:", n_train

            # Shuffle data
            idx = np.arange(n_train)
            np.random.seed(13)
            np.random.shuffle(idx)
            X_train = X_train[idx]
            y_train = y_train[idx]

            std = X_train.std(axis=0)
            mean = X_train.mean(axis=0)
from scikits.learn.datasets.samples_generator import make_regression_dataset

if __name__ == "__main__":
    list_n_samples = np.linspace(100, 10000, 5).astype(np.int)
    list_n_features = [10, 100, 1000]
    n_test = 1000
    noise = 0.1
    alpha = 0.01
    sgd_results = np.zeros((len(list_n_samples), len(list_n_features), 2))
    elnet_results = np.zeros((len(list_n_samples), len(list_n_features), 2))
    ridge_results = np.zeros((len(list_n_samples), len(list_n_features), 2))
    for i, n_train in enumerate(list_n_samples):
        for j, n_features in enumerate(list_n_features):
            X_train, y_train, X_test, y_test, coef = make_regression_dataset(
                n_train_samples=n_train,
                n_test_samples=n_test,
                n_features=n_features,
                noise=noise)
            print "======================="
            print "Round %d %d" % (i, j)
            print "n_features:", n_features
            print "n_samples:", n_train

            # Shuffle data
            idx = np.arange(n_train)
            np.random.seed(13)
            np.random.shuffle(idx)
            X_train = X_train[idx]
            y_train = y_train[idx]

            std = X_train.std(axis=0)
Example #6
0
    import pylab as pl

    scikit_results = []
    glmnet_results = []
    n = 20
    step = 500
    n_features = 1000
    n_informative = n_features / 10
    n_test_samples = 1000
    for i in range(1, n + 1):
        print '=================='
        print 'Iteration %s of %s' % (i, n)
        print '=================='
        X, Y, X_test, Y_test, coef = make_regression_dataset(
            n_train_samples=(i * step),
            n_test_samples=n_test_samples,
            n_features=n_features,
            noise=0.1,
            n_informative=n_informative)

        print "benching scikit: "
        scikit_results.append(bench(ScikitLasso, X, Y, X_test, Y_test, coef))
        print "benching glmnet: "
        glmnet_results.append(bench(GlmnetLasso, X, Y, X_test, Y_test, coef))

    pl.clf()
    xx = range(0, n * step, step)
    pl.title('Lasso regression on sample dataset (%d features)' % n_features)
    pl.plot(xx, scikit_results, 'b-', label='scikit-learn')
    pl.plot(xx, glmnet_results, 'r-', label='glmnet')
    pl.legend()
    pl.xlabel('number of samples to classify')
Example #7
0
def compute_bench(samples_range, features_range):

    it = 0

    results = defaultdict(lambda: [])

    max_it = len(samples_range) * len(features_range)
    for n_samples in samples_range:
        for n_features in features_range:
            it += 1
            print '===================='
            print 'Iteration %03d of %03d' % (it, max_it)
            print '===================='
            dataset_kwargs = {
                'n_train_samples': n_samples,
                'n_test_samples': 2,
                'n_features': n_features,
                'n_informative': n_features / 10,
                'effective_rank': min(n_samples, n_features) / 10,
                #'effective_rank': None,
                'bias': 0.0,
            }
            print "n_samples: %d" % n_samples
            print "n_features: %d" % n_features
            X, y, _, _, _ = make_regression_dataset(**dataset_kwargs)

            gc.collect()
            print "benching lars_path (with Gram):",
            sys.stdout.flush()
            tstart = time()
            G = np.dot(X.T, X)  # precomputed Gram matrix
            Xy = np.dot(X.T, y)
            lars_path(X, y, Xy=Xy, Gram=G, method='lasso')
            delta = time() - tstart
            print "%0.3fs" % delta
            results['lars_path (with Gram)'].append(delta)

            gc.collect()
            print "benching lars_path (without Gram):",
            sys.stdout.flush()
            tstart = time()
            lars_path(X, y, method='lasso')
            delta = time() - tstart
            print "%0.3fs" % delta
            results['lars_path (without Gram)'].append(delta)

            gc.collect()
            print "benching lasso_path (with Gram):",
            sys.stdout.flush()
            tstart = time()
            lasso_path(X, y, precompute=True)
            delta = time() - tstart
            print "%0.3fs" % delta
            results['lasso_path (with Gram)'].append(delta)

            gc.collect()
            print "benching lasso_path (without Gram):",
            sys.stdout.flush()
            tstart = time()
            lasso_path(X, y, precompute=False)
            delta = time() - tstart
            print "%0.3fs" % delta
            results['lasso_path (without Gram)'].append(delta)

    return results
Example #8
0
    # Delayed import of pylab
    import pylab as pl

    scikit_results = []
    glmnet_results = []
    n = 20
    step = 500
    n_features = 1000
    n_informative = n_features / 10
    n_test_samples = 1000
    for i in range(1, n + 1):
        print '=================='
        print 'Iteration %s of %s' % (i, n)
        print '=================='
        X, Y, X_test, Y_test, coef = make_regression_dataset(
            n_train_samples=(i * step), n_test_samples=n_test_samples,
            n_features=n_features, noise=0.1, n_informative=n_informative)

        print "benching scikit: "
        scikit_results.append(bench(ScikitLasso, X, Y, X_test, Y_test, coef))
        print "benching glmnet: "
        glmnet_results.append(bench(GlmnetLasso, X, Y, X_test, Y_test, coef))

    pl.clf()
    xx = range(0, n*step, step)
    pl.title('Lasso regression on sample dataset (%d features)' % n_features)
    pl.plot(xx, scikit_results, 'b-', label='scikit-learn')
    pl.plot(xx, glmnet_results,'r-', label='glmnet')
    pl.legend()
    pl.xlabel('number of samples to classify')
    pl.ylabel('time (in seconds)')