import matplotlib.pyplot as plt from matplotlib import pylab from mpl_toolkits.mplot3d import Axes3D import numpy as np from scikits.learn.pca import PCA from src.data_interface import d, L_clean, L from src.utils import get_path, bool_to_color path = get_path(__file__) + '/..' L = list(L) # Remove trial_id, obsnum and is alert # I change notation here from D to X X = d.view()[:,3:] pca = PCA(n_components=30) pca.fit(X) plt.plot(np.cumsum(pca.explained_variance_ratio_), marker='o') ax = plt.gca() plt.title('Cumulative percentage of total variation explained by principal components') ax.set_xlabel('Principal component') ax.set_ylabel('% of total variation') plt.savefig('{0}/plots/pca-variation-explained.pdf'.format(path), papertype='a4', format='pdf') plt.cla() W = pca.components_[:,0:3] X_p = np.dot(X,W)
import itertools as it import matplotlib.pyplot as plt import numpy as np from src.data_interface import d, L from src.utils import get_path L = list(L) D = d.view() path = get_path(__file__) + '/..' savepath_template = '{0}/plots/scatterplots/{1}-{2}.pdf' rows = np.random.random_integers(1,200000,400) data = D[rows,:] def is_alert_colors(is_alert): return 'blue' if is_alert==1 else 'red' colors = map(is_alert_colors, data[:,L.index('IsAlert')]) #features = ['P6', 'V1', 'V3', 'V6'] features = L[4:] for f1, f2 in it.combinations(features, 2): idx1, idx2 = L.index(f1), L.index(f2) plt.title('Feature {0} vs {1}'.format(f1, f2), {'size': 20}) plt.scatter(data[:,idx1], data[:,idx2], c=colors) plt.gca().set_xlabel(f1, {'size': 18})
import json import matplotlib.pyplot as plt import numpy as np from scikits.learn.linear_model import LogisticRegression from src.data_interface import d, L_clean, L from src.utils import get_path, bool_to_color, sigmoid path = get_path(__file__) + '/..' L = list(L) X = d.view()[:,3:] y = d.view()[:,2] # Learning rate when estimating parameters C = 0.1 classifier = LogisticRegression(C=C, penalty='l2') training_rows = range(int(1e5)) classifier.fit(X[training_rows,:], y[training_rows,:]) coef_dict = dict(zip(L[3:], list(classifier.coef_[0]))) coef_dict['intercept'] = classifier.intercept_[0] with open('{0}/data/coefs_train_0-1e5.json'.format(path), 'w') as f: json.dump(coef_dict, f, indent=4, sort_keys=True)