def SciKits(featureValues, classLabels):

    # TODO: check how non-convergence is reported

    (ns, nf) = featureValues.shape
    assert classLabels.shape == (ns, )
    #NOTE: There is a version issue
    try:
        from scikits.learn.linear_model import LogisticRegression
    except:
        #scikits version >= 0.9
        from sklearn.linear_model       import LogisticRegression
    RegularisationParameter = 1.0e+30 # For us, higher is better (no regularisation!)
    Tolerance               = 1.0e-30 # Smaller is better

    # From the documentation page at
    #
    #     http://scikit-learn.sourceforge.net/modules/generated/scikits.learn.linear_model.LogisticRegression.html
    #
    # "The underlying C implementation uses a random number generator to select features when fitting the model.
    #  It is thus not uncommon, to have slightly different results for the same input data.
    #  If that happens, try with a smaller tol parameter."

    classifier = LogisticRegression(penalty = 'l1', C = RegularisationParameter, tol = Tolerance)
    classifier.fit(featureValues, classLabels)

    beta = -classifier.raw_coef_[0,:]
    beta = numpy.append(beta[-1], beta[:-1])

    if not all(numpy.isfinite(beta)):
        return None

    return beta
Ejemplo n.º 2
0
def train_readout_logit_onevsall(stimset, samples):
    
    (train_data, test_data, index_to_sym) = get_np_dataset(stimset, samples)    
    models = {}
    
    for sym_index,sym in index_to_sym.iteritems():
        
        #train a logistic regression model on just this stim class vs. the rest
        mdata = copy.deepcopy(data)
        data[data[:, -1] != sym_index, -1] = 0
        data[data[:, -1] == sym_index, -1] = 1
        print ''
        print list(data)
        print ''
        
        N = data.shape[1]-1
        
        logr = LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1)
        logr.fit(train_data[:, :N], train_data[:, -1])
        
        test_pred = logr.predict(test_data[:, 0:N])
        pred_diff = np.abs(test_pred - test_data[:, -1])
        zero_one_loss = pred_diff.sum() / test_data.shape[0]
        print 'Stim class %s loss: %0.3f' % (sym, zero_one_loss)
        
        models[sym] = logr
Ejemplo n.º 3
0
def train_readout_logit(stimset, samples):
    
    (train_data, test_data, index_to_sym) = get_np_dataset(stimset, samples)
        
    N = train_data.shape[1]-1
    logr = LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1)
    logr.fit(train_data[:, :N], train_data[:, -1])
        
    test_pred = logr.predict(test_data[:, 0:N])
    pred_diff = test_pred - test_data[:, -1]
    percent_correct = len((pred_diff == 0).nonzero()[0]) / float(len(test_pred))
    print 'Logit Percent correct: %0.3f' % percent_correct
    return percent_correct
def train_lrpipe(trainX, trainY, params):
    """ trains LogisiticRegression model with params
        logreg_C specified by params 
        """
    lrpipe = Pipeline([('logreg', LogisticRegression(penalty="l1", C=1))])
    lrpipe = lrpipe.fit(trainX, trainY, **params)
    return lrpipe
    def pairwise_regress( thetas,cats, ii, jj , reg_type, reg_param):


        en_cats = enumerate( cats )
        set0 = [idx for (idx,cat) in en_cats if cat==ii]
        en_cats = enumerate( cats )
        set1 = [idx for (idx,cat) in en_cats if cat==jj]

        X0 = thetas[set0,:]
        Y0 = np.zeros(X0.shape[0])
        X1 = thetas[set1,:]
        Y1 = np.ones(X1.shape[0])

        X = np.vstack( [X0,X1] )
        Y = np.concatenate( [Y0, Y1] )

        lr = LogisticRegression(penalty=reg_type, tol=0.00000000001, C=reg_param)
        lr.fit( X, Y)

        return lr
    def pairwise_regress(thetas, cats, ii, jj, reg_type, reg_param):

        en_cats = enumerate(cats)
        set0 = [idx for (idx, cat) in en_cats if cat == ii]
        en_cats = enumerate(cats)
        set1 = [idx for (idx, cat) in en_cats if cat == jj]

        X0 = thetas[set0, :]
        Y0 = np.zeros(X0.shape[0])
        X1 = thetas[set1, :]
        Y1 = np.ones(X1.shape[0])

        X = np.vstack([X0, X1])
        Y = np.concatenate([Y0, Y1])

        lr = LogisticRegression(penalty=reg_type,
                                tol=0.00000000001,
                                C=reg_param)
        lr.fit(X, Y)

        return lr
def main():
    notes, min_pitch = get_notes()
    duration, n_pitches = notes.shape
    data = np.zeros((n_pitches, duration, n_pitches))
    classes = np.zeros((n_pitches, duration))
    state = np.zeros((n_pitches))

    for t, pitches in enumerate(notes):
        for i, on in enumerate(pitches):
            data[i, t] = state
            classes[i, t] = on
        
        for i, on in enumerate(pitches):
            state[i] = activation(state[i], on)

    models = []
    for i in xrange(n_pitches):
        model = LogisticRegression('l2', tol=0.1)
        train_data, target = repeat_classes(data[i, :], classes[i, :], 10, 10)
        model.fit(train_data, target)
        models.append(model)

    duration *= 3
    predicted = np.zeros((n_pitches, duration))
    state = np.zeros((n_pitches))
    state[21] = 1
    for t in xrange(1, duration):
        sys.stdout.write('%d\r' % t)
        sys.stdout.flush()
        current_state = state.reshape((1, state.shape[0]))
        for i in xrange(n_pitches):
            on = models[i].predict(current_state)[0]
            predicted[i, t] = on
            state[i] = activation(state[i], on)

        state += (np.random.random((n_pitches)) - .5) * .1

    write(predicted, min_pitch)
def do_grid_search(X, Y, gs_params):
    """ Given data (X,Y) will perform a grid search on g_params
        for a LogisticRegression called logreg
        """
    lrpipe = Pipeline([('logreg', LogisticRegression())])
    gs = GridSearchCV(lrpipe, gs_params, n_jobs=-1)
    #print gs
    gs = gs.fit(X, Y)

    best_parameters, score = max(gs.grid_scores_, key=lambda x: x[1])
    logger.info("best_parameters: " + str(best_parameters))
    logger.info("expected score: " + str(score))

    return best_parameters
Ejemplo n.º 9
0
def test_auto_weight():
    """Test class weights for imbalanced data"""
    from scikits.learn.linear_model import LogisticRegression
    # we take as dataset a the two-dimensional projection of iris so
    # that it is not separable and remove half of predictors from
    # class 1
    from scikits.learn.svm.base import _get_class_weight
    X, y = iris.data[:, :2], iris.target
    unbalanced = np.delete(np.arange(y.size), np.where(y > 1)[0][::2])

    assert np.argmax(_get_class_weight('auto', y[unbalanced])[0]) == 2

    for clf in (svm.SVC(kernel='linear'), svm.LinearSVC(), LogisticRegression()):
        # check that score is better when class='auto' is set.
        y_pred = clf.fit(X[unbalanced], y[unbalanced],
                         class_weight={}).predict(X)
        y_pred_balanced = clf.fit(X[unbalanced], y[unbalanced],
                                  class_weight='auto').predict(X)
        assert metrics.f1_score(y, y_pred) <= metrics.f1_score(y, y_pred_balanced)
Ejemplo n.º 10
0
def _fit_logistic_regression(X, y, C=0.1):
    classifier = LogisticRegression(C=C)
    classifier.fit(X, y)
    return classifier
Ejemplo n.º 11
0
def logistic_regression(state_matrix, teacher_matrix):
    for i in range(state_matrix.shape[1]):
        clf = LogisticRegression(tol=0.05)
        clf.fit(state_matrix, teacher_matrix[:, i])
        self.output_weights[i, :] = clf.coef_
        print i
Ejemplo n.º 12
0
        dstask[ds][t] = ctr
        ctr = ctr + 1

Y = N.zeros(len(copedata))
for x in range(len(copedata)):
    Y[x] = dstask[copedata[x, 0]][copedata[x, 1]]

X = melodic_mix[usedata == 1, :]

loo = LeaveOneOut(len(Y))

predclass = N.zeros(len(Y))

for train, test in loo:
    X_train, X_test, y_train, y_test = X[train], X[test], Y[train], Y[test]
    clf = LogisticRegression(C=0.1, penalty='l1')
    clf.fit(X_train, y_train)
    predclass[test] = clf.predict(X_test)

print 'Mean accuracy=%0.3f' % N.mean(predclass == Y)

# randomize labels 1000 times and store accuracy
nruns = 500
randacc = N.zeros(nruns)

for r in range(nruns):
    N.random.shuffle(Y)
    for train, test in loo:
        X_train, X_test, y_train, y_test = X[train], X[test], Y[train], Y[test]
        clf = LogisticRegression(C=1, penalty='l2')
        clf.fit(X_train, y_train)
Ejemplo n.º 13
0
import json

import matplotlib.pyplot as plt
import numpy as np
from scikits.learn.linear_model import LogisticRegression

from src.data_interface import d, L_clean, L
from src.utils import get_path, bool_to_color, sigmoid


path = get_path(__file__) + '/..'
L = list(L)

X = d.view()[:,3:]
y = d.view()[:,2]

# Learning rate when estimating parameters
C = 0.1

classifier = LogisticRegression(C=C, penalty='l2')

training_rows = range(int(1e5))

classifier.fit(X[training_rows,:], y[training_rows,:])

coef_dict = dict(zip(L[3:], list(classifier.coef_[0])))
coef_dict['intercept'] = classifier.intercept_[0]

with open('{0}/data/coefs_train_0-1e5.json'.format(path), 'w') as f:
    json.dump(coef_dict, f, indent=4, sort_keys=True)
Ejemplo n.º 14
0
def logistic_regression(state_matrix, teacher_matrix):
    for i in range(state_matrix.shape[1]):
        clf = LogisticRegression(tol=0.05)
        clf.fit(state_matrix, teacher_matrix[:, i])
        self.output_weights[i, :] = clf.coef_
        print i
Ejemplo n.º 15
0
# Author: Alexandre Gramfort <*****@*****.**>
# License: BSD Style.

import numpy as np

from scikits.learn.linear_model import LogisticRegression
from scikits.learn import datasets

iris = datasets.load_iris()
X = iris.data
y = iris.target

# Set regularization parameter
C = 0.1

classifier_l1_LR = LogisticRegression(C=C, penalty='l1')
classifier_l2_LR = LogisticRegression(C=C, penalty='l2')
classifier_l1_LR.fit(X, y)
classifier_l2_LR.fit(X, y)

hyperplane_coefficients_l1_LR = classifier_l1_LR.coef_[:]
hyperplane_coefficients_l2_LR = classifier_l2_LR.coef_[:]

# hyperplane_coefficients_l1_LR contains zeros due to the
# L1 sparsity inducing norm

pct_non_zeros_l1_LR = np.mean(hyperplane_coefficients_l1_LR != 0) * 100
pct_non_zeros_l2_LR = np.mean(hyperplane_coefficients_l2_LR != 0) * 100

print "Percentage of non zeros coefficients (L1) : %f" % pct_non_zeros_l1_LR
print "Percentage of non zeros coefficients (L2) : %f" % pct_non_zeros_l2_LR
Ejemplo n.º 16
0
        ctr=ctr+1

Y=N.zeros(len(copedata))
for x in range(len(copedata)):
    Y[x]=dstask[copedata[x,0]][copedata[x,1]]


X=melodic_mix[usedata==1,:]

loo = LeaveOneOut(len(Y))

predclass=N.zeros(len(Y))

for train, test in loo:
    X_train, X_test, y_train, y_test = X[train], X[test], Y[train], Y[test]
    clf=LogisticRegression(C=0.1,penalty='l1')
    clf.fit(X_train,y_train)
    predclass[test]=clf.predict(X_test)

print 'Mean accuracy=%0.3f'%N.mean(predclass==Y)

# randomize labels 1000 times and store accuracy
nruns=500
randacc=N.zeros(nruns)

for r in range(nruns):
    N.random.shuffle(Y)
    for train, test in loo:
        X_train, X_test, y_train, y_test = X[train], X[test], Y[train], Y[test]
        clf=LogisticRegression(C=1,penalty='l2')
        clf.fit(X_train,y_train)
Ejemplo n.º 17
0
from rpy2.robjects import FloatVector as rfloat
import numpy as np

iris = datasets.load_iris()

def test_algorithm(algorithm, results, train_data, train_target, test_data):
    algorithm.fit(train_data, train_target)
    y_pred = algorithm.predict(test_data)
    results.append(precision(y_pred))

def precision(y_pred):
    prec = sum(y_pred == test_target)
    return float(prec) / len(test_target)

svmclf = svm.SVC()
logisticclf = LogisticRegression()
nnclf= neighbors.Neighbors()
svmli = []
logli = []
nnli = []

cv = StratifiedKFold(iris.target, 20)
for train_index, test_index in cv:
    train_data = iris.data[train_index]
    train_target = iris.target[train_index]
    test_data = iris.data[test_index]
    test_target = iris.target[test_index]

    #svm
    test_algorithm(svmclf, svmli, train_data, train_target, test_data)
Ejemplo n.º 18
0
from scikits.learn.linear_model import LogisticRegression
from scikits.learn.svm import SVC
from scikits.learn import datasets

iris = datasets.load_iris()
X = iris.data[:, :2] # we only take the first two features for visualization
y = iris.target

n_features = X.shape[1]

C = 1.0

# Create different classifiers. The logistic regression cannot do
# multiclass out of the box.
classifiers = {
                'L1 logistic': LogisticRegression(C=C, penalty='l1'),
                'L2 logistic': LogisticRegression(C=C, penalty='l2'),
                'Linear SVC': SVC(kernel='linear', C=C, probability=True),
              }

n_classifiers = len(classifiers)

pl.figure(figsize=(3*2, n_classifiers*2))
pl.subplots_adjust(bottom=.2, top=.95)

for index, (name, classifier) in enumerate(classifiers.iteritems()):
    classifier.fit(X, y)

    y_pred = classifier.predict(X)
    classif_rate = np.mean(y_pred.ravel() == y.ravel()) * 100
    print  "classif_rate for %s : %f " % (name, classif_rate)
Ejemplo n.º 19
0
#          Mathieu Blondel <*****@*****.**>
# License: BSD Style.

import numpy as np

from scikits.learn.linear_model import LogisticRegression
from scikits.learn import datasets

# FIXME: the iris dataset has only 4 features!
iris = datasets.load_iris()
X = iris.data
y = iris.target

# Set regularization parameter
for C in (0.1, 1, 10):
    clf_l1_LR = LogisticRegression(C=C, penalty='l1')
    clf_l2_LR = LogisticRegression(C=C, penalty='l2')
    clf_l1_LR.fit(X, y)
    clf_l2_LR.fit(X, y)

    coef_l1_LR = clf_l1_LR.coef_[:]
    coef_l2_LR = clf_l2_LR.coef_[:]

    # coef_l1_LR contains zeros due to the
    # L1 sparsity inducing norm

    sparsity_l1_LR = np.mean(coef_l1_LR == 0) * 100
    sparsity_l2_LR = np.mean(coef_l2_LR == 0) * 100

    print "C=%f" % C
    print "Sparsity with L1 penalty: %f" % sparsity_l1_LR