#Script to classify Iris flowers using logistic regression

import numpy as np
import multiclass_logistic_reg as mclf
import time

NCLASSES = 3
CLASSES = {'Iris-setosa':0, 'Iris-versicolor':1, 'Iris-virginica':2}
EPOCHS = 50

#Load the data
DATA = np.loadtxt('Iris.csv', delimiter=',', dtype=str, skiprows=1)
#Separate the inputs
X = np.array(DATA[:,:5], dtype=float)
C = DATA[:,5]
T = np.zeros((X.shape[0], NCLASSES))

for i in range(X.shape[0]):
    idx = CLASSES[C[i]]
    T[i,idx] = 1

#Initialize the classifier
clf = mclf.classifier(X.shape[1], NCLASSES)

#Train using SGD for 50 epochs
for i in range(EPOCHS):
    clf.SGD(X, T, batch_size=10, epochs=1, eta=0.0001)
    clf.evalData(X, T)
    time.sleep(1)
#Perform PCA on the data
#print("Performing PCA on the data...")
#Xtr = dim_reduce(Xtr, NDIM)
#Xte = dim_reduce(Xte, NDIM)
#print("Done")

#Try out logistic regression on it

#First normalize data to prevent overflows
MAX = np.max(Xtr)
Xtr = Xtr/MAX
Xte = Xte/MAX

print("Attempting Logistic Regression on the training data and checking error rate...")
clf = mclf.classifier(Xtr.shape[1], 40)

EPOCHS = 1500

#Arrays to store the data
costs = np.zeros(EPOCHS)
train_acc = np.zeros(EPOCHS)
test_acc = np.zeros(EPOCHS)

for i in range(EPOCHS):
    clf.SGD(Xtr, Ttr, batch_size=50, epochs=1, eta=0.002)
    teacc = clf.evalData(Xte, Tte)
    tracc = clf.evalData(Xtr, Ttr)
    cost = clf.costf(Xte, Tte)

    costs[i] = cost