import matplotlib.pyplot as plt
from matplotlib import pylab
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
from scikits.learn.pca import PCA

from src.data_interface import d, L_clean, L
from src.utils import get_path, bool_to_color


path = get_path(__file__) + '/..'
L = list(L)

# Remove trial_id, obsnum and is alert
# I change notation here from D to X
X = d.view()[:,3:]

pca = PCA(n_components=30)
pca.fit(X)

plt.plot(np.cumsum(pca.explained_variance_ratio_), marker='o')
ax = plt.gca()
plt.title('Cumulative percentage of total variation explained by principal components')
ax.set_xlabel('Principal component')
ax.set_ylabel('% of total variation')
plt.savefig('{0}/plots/pca-variation-explained.pdf'.format(path), papertype='a4', format='pdf')
plt.cla()

W = pca.components_[:,0:3]
X_p = np.dot(X,W)
import itertools as it

import matplotlib.pyplot as plt
import numpy as np

from src.data_interface import d, L
from src.utils import get_path

L = list(L)
D = d.view()

path = get_path(__file__) + '/..'
savepath_template = '{0}/plots/scatterplots/{1}-{2}.pdf'

rows = np.random.random_integers(1,200000,400)

data = D[rows,:]

def is_alert_colors(is_alert):
    return 'blue' if is_alert==1 else 'red'

colors = map(is_alert_colors, data[:,L.index('IsAlert')])

#features = ['P6', 'V1', 'V3', 'V6']
features = L[4:]

for f1, f2 in it.combinations(features, 2):
    idx1, idx2 = L.index(f1), L.index(f2)
    plt.title('Feature {0} vs {1}'.format(f1, f2), {'size': 20})
    plt.scatter(data[:,idx1], data[:,idx2], c=colors)
    plt.gca().set_xlabel(f1, {'size': 18})
import json

import matplotlib.pyplot as plt
import numpy as np
from scikits.learn.linear_model import LogisticRegression

from src.data_interface import d, L_clean, L
from src.utils import get_path, bool_to_color, sigmoid


path = get_path(__file__) + '/..'
L = list(L)

X = d.view()[:,3:]
y = d.view()[:,2]

# Learning rate when estimating parameters
C = 0.1

classifier = LogisticRegression(C=C, penalty='l2')

training_rows = range(int(1e5))

classifier.fit(X[training_rows,:], y[training_rows,:])

coef_dict = dict(zip(L[3:], list(classifier.coef_[0])))
coef_dict['intercept'] = classifier.intercept_[0]

with open('{0}/data/coefs_train_0-1e5.json'.format(path), 'w') as f:
    json.dump(coef_dict, f, indent=4, sort_keys=True)