from src.data_interface import d, L from src.utils import get_path L = list(L) D = d.view() path = get_path(__file__) + '/..' savepath_template = '{0}/plots/scatterplots/{1}-{2}.pdf' rows = np.random.random_integers(1,200000,400) data = D[rows,:] def is_alert_colors(is_alert): return 'blue' if is_alert==1 else 'red' colors = map(is_alert_colors, data[:,L.index('IsAlert')]) #features = ['P6', 'V1', 'V3', 'V6'] features = L[4:] for f1, f2 in it.combinations(features, 2): idx1, idx2 = L.index(f1), L.index(f2) plt.title('Feature {0} vs {1}'.format(f1, f2), {'size': 20}) plt.scatter(data[:,idx1], data[:,idx2], c=colors) plt.gca().set_xlabel(f1, {'size': 18}) plt.gca().set_ylabel(f2, {'size': 18}) plt.savefig(savepath_template.format(path,f1,f2), format='pdf', papertype='a4') plt.cla()
pca.fit(X) plt.plot(np.cumsum(pca.explained_variance_ratio_), marker='o') ax = plt.gca() plt.title('Cumulative percentage of total variation explained by principal components') ax.set_xlabel('Principal component') ax.set_ylabel('% of total variation') plt.savefig('{0}/plots/pca-variation-explained.pdf'.format(path), papertype='a4', format='pdf') plt.cla() W = pca.components_[:,0:3] X_p = np.dot(X,W) rnd_rows = np.random.random_integers(0, X.shape[0], 120) colors = map(bool_to_color, d.view()[rnd_rows,L.index('IsAlert')]) plt.scatter(X_p[rnd_rows,0], X_p[rnd_rows,1], c=colors) plt.title('Scatter plot of 1. and 2. pricipal component') ax = plt.gca() ax.set_xlabel('1. Pricipal component') ax.set_ylabel('2. Principal component') plt.savefig('{0}/plots/scatter-principal-components.pdf'.format(path), papertype='a4', format='pdf') plt.cla() for i in range(8): rnd_rows = np.random.random_integers(0, X.shape[0], 120) fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.scatter(X_p[rnd_rows,0], X_p[rnd_rows,1], X_p[rnd_rows,2], c=colors) plt.title('Scatter of 1., 2. and 3. pricipal component') ax = plt.gca()