def SciKits(featureValues, classLabels): # TODO: check how non-convergence is reported (ns, nf) = featureValues.shape assert classLabels.shape == (ns, ) #NOTE: There is a version issue try: from scikits.learn.linear_model import LogisticRegression except: #scikits version >= 0.9 from sklearn.linear_model import LogisticRegression RegularisationParameter = 1.0e+30 # For us, higher is better (no regularisation!) Tolerance = 1.0e-30 # Smaller is better # From the documentation page at # # http://scikit-learn.sourceforge.net/modules/generated/scikits.learn.linear_model.LogisticRegression.html # # "The underlying C implementation uses a random number generator to select features when fitting the model. # It is thus not uncommon, to have slightly different results for the same input data. # If that happens, try with a smaller tol parameter." classifier = LogisticRegression(penalty = 'l1', C = RegularisationParameter, tol = Tolerance) classifier.fit(featureValues, classLabels) beta = -classifier.raw_coef_[0,:] beta = numpy.append(beta[-1], beta[:-1]) if not all(numpy.isfinite(beta)): return None return beta
def train_readout_logit_onevsall(stimset, samples): (train_data, test_data, index_to_sym) = get_np_dataset(stimset, samples) models = {} for sym_index,sym in index_to_sym.iteritems(): #train a logistic regression model on just this stim class vs. the rest mdata = copy.deepcopy(data) data[data[:, -1] != sym_index, -1] = 0 data[data[:, -1] == sym_index, -1] = 1 print '' print list(data) print '' N = data.shape[1]-1 logr = LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1) logr.fit(train_data[:, :N], train_data[:, -1]) test_pred = logr.predict(test_data[:, 0:N]) pred_diff = np.abs(test_pred - test_data[:, -1]) zero_one_loss = pred_diff.sum() / test_data.shape[0] print 'Stim class %s loss: %0.3f' % (sym, zero_one_loss) models[sym] = logr
def train_readout_logit(stimset, samples): (train_data, test_data, index_to_sym) = get_np_dataset(stimset, samples) N = train_data.shape[1]-1 logr = LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1) logr.fit(train_data[:, :N], train_data[:, -1]) test_pred = logr.predict(test_data[:, 0:N]) pred_diff = test_pred - test_data[:, -1] percent_correct = len((pred_diff == 0).nonzero()[0]) / float(len(test_pred)) print 'Logit Percent correct: %0.3f' % percent_correct return percent_correct
def train_lrpipe(trainX, trainY, params): """ trains LogisiticRegression model with params logreg_C specified by params """ lrpipe = Pipeline([('logreg', LogisticRegression(penalty="l1", C=1))]) lrpipe = lrpipe.fit(trainX, trainY, **params) return lrpipe
def pairwise_regress( thetas,cats, ii, jj , reg_type, reg_param): en_cats = enumerate( cats ) set0 = [idx for (idx,cat) in en_cats if cat==ii] en_cats = enumerate( cats ) set1 = [idx for (idx,cat) in en_cats if cat==jj] X0 = thetas[set0,:] Y0 = np.zeros(X0.shape[0]) X1 = thetas[set1,:] Y1 = np.ones(X1.shape[0]) X = np.vstack( [X0,X1] ) Y = np.concatenate( [Y0, Y1] ) lr = LogisticRegression(penalty=reg_type, tol=0.00000000001, C=reg_param) lr.fit( X, Y) return lr
def pairwise_regress(thetas, cats, ii, jj, reg_type, reg_param): en_cats = enumerate(cats) set0 = [idx for (idx, cat) in en_cats if cat == ii] en_cats = enumerate(cats) set1 = [idx for (idx, cat) in en_cats if cat == jj] X0 = thetas[set0, :] Y0 = np.zeros(X0.shape[0]) X1 = thetas[set1, :] Y1 = np.ones(X1.shape[0]) X = np.vstack([X0, X1]) Y = np.concatenate([Y0, Y1]) lr = LogisticRegression(penalty=reg_type, tol=0.00000000001, C=reg_param) lr.fit(X, Y) return lr
def main(): notes, min_pitch = get_notes() duration, n_pitches = notes.shape data = np.zeros((n_pitches, duration, n_pitches)) classes = np.zeros((n_pitches, duration)) state = np.zeros((n_pitches)) for t, pitches in enumerate(notes): for i, on in enumerate(pitches): data[i, t] = state classes[i, t] = on for i, on in enumerate(pitches): state[i] = activation(state[i], on) models = [] for i in xrange(n_pitches): model = LogisticRegression('l2', tol=0.1) train_data, target = repeat_classes(data[i, :], classes[i, :], 10, 10) model.fit(train_data, target) models.append(model) duration *= 3 predicted = np.zeros((n_pitches, duration)) state = np.zeros((n_pitches)) state[21] = 1 for t in xrange(1, duration): sys.stdout.write('%d\r' % t) sys.stdout.flush() current_state = state.reshape((1, state.shape[0])) for i in xrange(n_pitches): on = models[i].predict(current_state)[0] predicted[i, t] = on state[i] = activation(state[i], on) state += (np.random.random((n_pitches)) - .5) * .1 write(predicted, min_pitch)
def do_grid_search(X, Y, gs_params): """ Given data (X,Y) will perform a grid search on g_params for a LogisticRegression called logreg """ lrpipe = Pipeline([('logreg', LogisticRegression())]) gs = GridSearchCV(lrpipe, gs_params, n_jobs=-1) #print gs gs = gs.fit(X, Y) best_parameters, score = max(gs.grid_scores_, key=lambda x: x[1]) logger.info("best_parameters: " + str(best_parameters)) logger.info("expected score: " + str(score)) return best_parameters
def test_auto_weight(): """Test class weights for imbalanced data""" from scikits.learn.linear_model import LogisticRegression # we take as dataset a the two-dimensional projection of iris so # that it is not separable and remove half of predictors from # class 1 from scikits.learn.svm.base import _get_class_weight X, y = iris.data[:, :2], iris.target unbalanced = np.delete(np.arange(y.size), np.where(y > 1)[0][::2]) assert np.argmax(_get_class_weight('auto', y[unbalanced])[0]) == 2 for clf in (svm.SVC(kernel='linear'), svm.LinearSVC(), LogisticRegression()): # check that score is better when class='auto' is set. y_pred = clf.fit(X[unbalanced], y[unbalanced], class_weight={}).predict(X) y_pred_balanced = clf.fit(X[unbalanced], y[unbalanced], class_weight='auto').predict(X) assert metrics.f1_score(y, y_pred) <= metrics.f1_score(y, y_pred_balanced)
def _fit_logistic_regression(X, y, C=0.1): classifier = LogisticRegression(C=C) classifier.fit(X, y) return classifier
def logistic_regression(state_matrix, teacher_matrix): for i in range(state_matrix.shape[1]): clf = LogisticRegression(tol=0.05) clf.fit(state_matrix, teacher_matrix[:, i]) self.output_weights[i, :] = clf.coef_ print i
dstask[ds][t] = ctr ctr = ctr + 1 Y = N.zeros(len(copedata)) for x in range(len(copedata)): Y[x] = dstask[copedata[x, 0]][copedata[x, 1]] X = melodic_mix[usedata == 1, :] loo = LeaveOneOut(len(Y)) predclass = N.zeros(len(Y)) for train, test in loo: X_train, X_test, y_train, y_test = X[train], X[test], Y[train], Y[test] clf = LogisticRegression(C=0.1, penalty='l1') clf.fit(X_train, y_train) predclass[test] = clf.predict(X_test) print 'Mean accuracy=%0.3f' % N.mean(predclass == Y) # randomize labels 1000 times and store accuracy nruns = 500 randacc = N.zeros(nruns) for r in range(nruns): N.random.shuffle(Y) for train, test in loo: X_train, X_test, y_train, y_test = X[train], X[test], Y[train], Y[test] clf = LogisticRegression(C=1, penalty='l2') clf.fit(X_train, y_train)
import json import matplotlib.pyplot as plt import numpy as np from scikits.learn.linear_model import LogisticRegression from src.data_interface import d, L_clean, L from src.utils import get_path, bool_to_color, sigmoid path = get_path(__file__) + '/..' L = list(L) X = d.view()[:,3:] y = d.view()[:,2] # Learning rate when estimating parameters C = 0.1 classifier = LogisticRegression(C=C, penalty='l2') training_rows = range(int(1e5)) classifier.fit(X[training_rows,:], y[training_rows,:]) coef_dict = dict(zip(L[3:], list(classifier.coef_[0]))) coef_dict['intercept'] = classifier.intercept_[0] with open('{0}/data/coefs_train_0-1e5.json'.format(path), 'w') as f: json.dump(coef_dict, f, indent=4, sort_keys=True)
# Author: Alexandre Gramfort <*****@*****.**> # License: BSD Style. import numpy as np from scikits.learn.linear_model import LogisticRegression from scikits.learn import datasets iris = datasets.load_iris() X = iris.data y = iris.target # Set regularization parameter C = 0.1 classifier_l1_LR = LogisticRegression(C=C, penalty='l1') classifier_l2_LR = LogisticRegression(C=C, penalty='l2') classifier_l1_LR.fit(X, y) classifier_l2_LR.fit(X, y) hyperplane_coefficients_l1_LR = classifier_l1_LR.coef_[:] hyperplane_coefficients_l2_LR = classifier_l2_LR.coef_[:] # hyperplane_coefficients_l1_LR contains zeros due to the # L1 sparsity inducing norm pct_non_zeros_l1_LR = np.mean(hyperplane_coefficients_l1_LR != 0) * 100 pct_non_zeros_l2_LR = np.mean(hyperplane_coefficients_l2_LR != 0) * 100 print "Percentage of non zeros coefficients (L1) : %f" % pct_non_zeros_l1_LR print "Percentage of non zeros coefficients (L2) : %f" % pct_non_zeros_l2_LR
ctr=ctr+1 Y=N.zeros(len(copedata)) for x in range(len(copedata)): Y[x]=dstask[copedata[x,0]][copedata[x,1]] X=melodic_mix[usedata==1,:] loo = LeaveOneOut(len(Y)) predclass=N.zeros(len(Y)) for train, test in loo: X_train, X_test, y_train, y_test = X[train], X[test], Y[train], Y[test] clf=LogisticRegression(C=0.1,penalty='l1') clf.fit(X_train,y_train) predclass[test]=clf.predict(X_test) print 'Mean accuracy=%0.3f'%N.mean(predclass==Y) # randomize labels 1000 times and store accuracy nruns=500 randacc=N.zeros(nruns) for r in range(nruns): N.random.shuffle(Y) for train, test in loo: X_train, X_test, y_train, y_test = X[train], X[test], Y[train], Y[test] clf=LogisticRegression(C=1,penalty='l2') clf.fit(X_train,y_train)
from rpy2.robjects import FloatVector as rfloat import numpy as np iris = datasets.load_iris() def test_algorithm(algorithm, results, train_data, train_target, test_data): algorithm.fit(train_data, train_target) y_pred = algorithm.predict(test_data) results.append(precision(y_pred)) def precision(y_pred): prec = sum(y_pred == test_target) return float(prec) / len(test_target) svmclf = svm.SVC() logisticclf = LogisticRegression() nnclf= neighbors.Neighbors() svmli = [] logli = [] nnli = [] cv = StratifiedKFold(iris.target, 20) for train_index, test_index in cv: train_data = iris.data[train_index] train_target = iris.target[train_index] test_data = iris.data[test_index] test_target = iris.target[test_index] #svm test_algorithm(svmclf, svmli, train_data, train_target, test_data)
from scikits.learn.linear_model import LogisticRegression from scikits.learn.svm import SVC from scikits.learn import datasets iris = datasets.load_iris() X = iris.data[:, :2] # we only take the first two features for visualization y = iris.target n_features = X.shape[1] C = 1.0 # Create different classifiers. The logistic regression cannot do # multiclass out of the box. classifiers = { 'L1 logistic': LogisticRegression(C=C, penalty='l1'), 'L2 logistic': LogisticRegression(C=C, penalty='l2'), 'Linear SVC': SVC(kernel='linear', C=C, probability=True), } n_classifiers = len(classifiers) pl.figure(figsize=(3*2, n_classifiers*2)) pl.subplots_adjust(bottom=.2, top=.95) for index, (name, classifier) in enumerate(classifiers.iteritems()): classifier.fit(X, y) y_pred = classifier.predict(X) classif_rate = np.mean(y_pred.ravel() == y.ravel()) * 100 print "classif_rate for %s : %f " % (name, classif_rate)
# Mathieu Blondel <*****@*****.**> # License: BSD Style. import numpy as np from scikits.learn.linear_model import LogisticRegression from scikits.learn import datasets # FIXME: the iris dataset has only 4 features! iris = datasets.load_iris() X = iris.data y = iris.target # Set regularization parameter for C in (0.1, 1, 10): clf_l1_LR = LogisticRegression(C=C, penalty='l1') clf_l2_LR = LogisticRegression(C=C, penalty='l2') clf_l1_LR.fit(X, y) clf_l2_LR.fit(X, y) coef_l1_LR = clf_l1_LR.coef_[:] coef_l2_LR = clf_l2_LR.coef_[:] # coef_l1_LR contains zeros due to the # L1 sparsity inducing norm sparsity_l1_LR = np.mean(coef_l1_LR == 0) * 100 sparsity_l2_LR = np.mean(coef_l2_LR == 0) * 100 print "C=%f" % C print "Sparsity with L1 penalty: %f" % sparsity_l1_LR