def SciKits(featureValues, classLabels): # TODO: check how non-convergence is reported (ns, nf) = featureValues.shape assert classLabels.shape == (ns, ) #NOTE: There is a version issue try: from scikits.learn.linear_model import LogisticRegression except: #scikits version >= 0.9 from sklearn.linear_model import LogisticRegression RegularisationParameter = 1.0e+30 # For us, higher is better (no regularisation!) Tolerance = 1.0e-30 # Smaller is better # From the documentation page at # # http://scikit-learn.sourceforge.net/modules/generated/scikits.learn.linear_model.LogisticRegression.html # # "The underlying C implementation uses a random number generator to select features when fitting the model. # It is thus not uncommon, to have slightly different results for the same input data. # If that happens, try with a smaller tol parameter." classifier = LogisticRegression(penalty = 'l1', C = RegularisationParameter, tol = Tolerance) classifier.fit(featureValues, classLabels) beta = -classifier.raw_coef_[0,:] beta = numpy.append(beta[-1], beta[:-1]) if not all(numpy.isfinite(beta)): return None return beta
def train_readout_logit_onevsall(stimset, samples): (train_data, test_data, index_to_sym) = get_np_dataset(stimset, samples) models = {} for sym_index,sym in index_to_sym.iteritems(): #train a logistic regression model on just this stim class vs. the rest mdata = copy.deepcopy(data) data[data[:, -1] != sym_index, -1] = 0 data[data[:, -1] == sym_index, -1] = 1 print '' print list(data) print '' N = data.shape[1]-1 logr = LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1) logr.fit(train_data[:, :N], train_data[:, -1]) test_pred = logr.predict(test_data[:, 0:N]) pred_diff = np.abs(test_pred - test_data[:, -1]) zero_one_loss = pred_diff.sum() / test_data.shape[0] print 'Stim class %s loss: %0.3f' % (sym, zero_one_loss) models[sym] = logr
def train_readout_logit(stimset, samples): (train_data, test_data, index_to_sym) = get_np_dataset(stimset, samples) N = train_data.shape[1]-1 logr = LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1) logr.fit(train_data[:, :N], train_data[:, -1]) test_pred = logr.predict(test_data[:, 0:N]) pred_diff = test_pred - test_data[:, -1] percent_correct = len((pred_diff == 0).nonzero()[0]) / float(len(test_pred)) print 'Logit Percent correct: %0.3f' % percent_correct return percent_correct
def pairwise_regress( thetas,cats, ii, jj , reg_type, reg_param): en_cats = enumerate( cats ) set0 = [idx for (idx,cat) in en_cats if cat==ii] en_cats = enumerate( cats ) set1 = [idx for (idx,cat) in en_cats if cat==jj] X0 = thetas[set0,:] Y0 = np.zeros(X0.shape[0]) X1 = thetas[set1,:] Y1 = np.ones(X1.shape[0]) X = np.vstack( [X0,X1] ) Y = np.concatenate( [Y0, Y1] ) lr = LogisticRegression(penalty=reg_type, tol=0.00000000001, C=reg_param) lr.fit( X, Y) return lr
def pairwise_regress(thetas, cats, ii, jj, reg_type, reg_param): en_cats = enumerate(cats) set0 = [idx for (idx, cat) in en_cats if cat == ii] en_cats = enumerate(cats) set1 = [idx for (idx, cat) in en_cats if cat == jj] X0 = thetas[set0, :] Y0 = np.zeros(X0.shape[0]) X1 = thetas[set1, :] Y1 = np.ones(X1.shape[0]) X = np.vstack([X0, X1]) Y = np.concatenate([Y0, Y1]) lr = LogisticRegression(penalty=reg_type, tol=0.00000000001, C=reg_param) lr.fit(X, Y) return lr
def main(): notes, min_pitch = get_notes() duration, n_pitches = notes.shape data = np.zeros((n_pitches, duration, n_pitches)) classes = np.zeros((n_pitches, duration)) state = np.zeros((n_pitches)) for t, pitches in enumerate(notes): for i, on in enumerate(pitches): data[i, t] = state classes[i, t] = on for i, on in enumerate(pitches): state[i] = activation(state[i], on) models = [] for i in xrange(n_pitches): model = LogisticRegression('l2', tol=0.1) train_data, target = repeat_classes(data[i, :], classes[i, :], 10, 10) model.fit(train_data, target) models.append(model) duration *= 3 predicted = np.zeros((n_pitches, duration)) state = np.zeros((n_pitches)) state[21] = 1 for t in xrange(1, duration): sys.stdout.write('%d\r' % t) sys.stdout.flush() current_state = state.reshape((1, state.shape[0])) for i in xrange(n_pitches): on = models[i].predict(current_state)[0] predicted[i, t] = on state[i] = activation(state[i], on) state += (np.random.random((n_pitches)) - .5) * .1 write(predicted, min_pitch)
Y=N.zeros(len(copedata)) for x in range(len(copedata)): Y[x]=dstask[copedata[x,0]][copedata[x,1]] X=melodic_mix[usedata==1,:] loo = LeaveOneOut(len(Y)) predclass=N.zeros(len(Y)) for train, test in loo: X_train, X_test, y_train, y_test = X[train], X[test], Y[train], Y[test] clf=LogisticRegression(C=0.1,penalty='l1') clf.fit(X_train,y_train) predclass[test]=clf.predict(X_test) print 'Mean accuracy=%0.3f'%N.mean(predclass==Y) # randomize labels 1000 times and store accuracy nruns=500 randacc=N.zeros(nruns) for r in range(nruns): N.random.shuffle(Y) for train, test in loo: X_train, X_test, y_train, y_test = X[train], X[test], Y[train], Y[test] clf=LogisticRegression(C=1,penalty='l2') clf.fit(X_train,y_train) predclass[test]=clf.predict(X_test)
def _fit_logistic_regression(X, y, C=0.1): classifier = LogisticRegression(C=C) classifier.fit(X, y) return classifier
ctr = ctr + 1 Y = N.zeros(len(copedata)) for x in range(len(copedata)): Y[x] = dstask[copedata[x, 0]][copedata[x, 1]] X = melodic_mix[usedata == 1, :] loo = LeaveOneOut(len(Y)) predclass = N.zeros(len(Y)) for train, test in loo: X_train, X_test, y_train, y_test = X[train], X[test], Y[train], Y[test] clf = LogisticRegression(C=0.1, penalty='l1') clf.fit(X_train, y_train) predclass[test] = clf.predict(X_test) print 'Mean accuracy=%0.3f' % N.mean(predclass == Y) # randomize labels 1000 times and store accuracy nruns = 500 randacc = N.zeros(nruns) for r in range(nruns): N.random.shuffle(Y) for train, test in loo: X_train, X_test, y_train, y_test = X[train], X[test], Y[train], Y[test] clf = LogisticRegression(C=1, penalty='l2') clf.fit(X_train, y_train) predclass[test] = clf.predict(X_test)
import json import matplotlib.pyplot as plt import numpy as np from scikits.learn.linear_model import LogisticRegression from src.data_interface import d, L_clean, L from src.utils import get_path, bool_to_color, sigmoid path = get_path(__file__) + '/..' L = list(L) X = d.view()[:,3:] y = d.view()[:,2] # Learning rate when estimating parameters C = 0.1 classifier = LogisticRegression(C=C, penalty='l2') training_rows = range(int(1e5)) classifier.fit(X[training_rows,:], y[training_rows,:]) coef_dict = dict(zip(L[3:], list(classifier.coef_[0]))) coef_dict['intercept'] = classifier.intercept_[0] with open('{0}/data/coefs_train_0-1e5.json'.format(path), 'w') as f: json.dump(coef_dict, f, indent=4, sort_keys=True)
# License: BSD Style. import numpy as np from scikits.learn.linear_model import LogisticRegression from scikits.learn import datasets iris = datasets.load_iris() X = iris.data y = iris.target # Set regularization parameter C = 0.1 classifier_l1_LR = LogisticRegression(C=C, penalty='l1') classifier_l2_LR = LogisticRegression(C=C, penalty='l2') classifier_l1_LR.fit(X, y) classifier_l2_LR.fit(X, y) hyperplane_coefficients_l1_LR = classifier_l1_LR.coef_[:] hyperplane_coefficients_l2_LR = classifier_l2_LR.coef_[:] # hyperplane_coefficients_l1_LR contains zeros due to the # L1 sparsity inducing norm pct_non_zeros_l1_LR = np.mean(hyperplane_coefficients_l1_LR != 0) * 100 pct_non_zeros_l2_LR = np.mean(hyperplane_coefficients_l2_LR != 0) * 100 print "Percentage of non zeros coefficients (L1) : %f" % pct_non_zeros_l1_LR print "Percentage of non zeros coefficients (L2) : %f" % pct_non_zeros_l2_LR
def logistic_regression(state_matrix, teacher_matrix): for i in range(state_matrix.shape[1]): clf = LogisticRegression(tol=0.05) clf.fit(state_matrix, teacher_matrix[:, i]) self.output_weights[i, :] = clf.coef_ print i
import numpy as np from scikits.learn.linear_model import LogisticRegression from scikits.learn import datasets # FIXME: the iris dataset has only 4 features! iris = datasets.load_iris() X = iris.data y = iris.target # Set regularization parameter for C in (0.1, 1, 10): clf_l1_LR = LogisticRegression(C=C, penalty='l1') clf_l2_LR = LogisticRegression(C=C, penalty='l2') clf_l1_LR.fit(X, y) clf_l2_LR.fit(X, y) coef_l1_LR = clf_l1_LR.coef_[:] coef_l2_LR = clf_l2_LR.coef_[:] # coef_l1_LR contains zeros due to the # L1 sparsity inducing norm sparsity_l1_LR = np.mean(coef_l1_LR == 0) * 100 sparsity_l2_LR = np.mean(coef_l2_LR == 0) * 100 print "C=%f" % C print "Sparsity with L1 penalty: %f" % sparsity_l1_LR print "Sparsity with L2 penalty: %f" % sparsity_l2_LR