from preprocess import splitColumns
import numpy as np
from NBCClassifier import featureVector
from cvFunctions import getTrainData
#from lr import lr
from svm import svm
#from nbc import nbc
import math
#from plot import plot

# Defaults
trainData = 'Data\yelp_data.csv'
percentage = 100

# Get data as array
train = getLines(trainData, 100)
"""
 words is now a list of (word,frequency) tuples, ordered by descending order
 of frequency
 
 wordList is a list of all unique words in the training data
"""
"""
 The 100 most frequent words have been removed from words
 
 wordList is now a list of the words in desc order of frequency
"""

cv = []
#cv = [[[0 for z in range(4001)] for y in range(200)] for x in range(10)]
#cv = np.array(cv);
    def changeDepth(self, depths):

        # Get data as array
        train = getLines(self.trainData, 100)

        cv = []
        train = np.array(train)
        np.random.shuffle(train)

        for i in range(10):
            cv.append(train[i * 200:(i + 1) * 200])

        zoltempdt = [0 for xtemp in range(10)]
        zoltemprf = [0 for xtemp in range(10)]
        zoltempbag = [0 for xtemp in range(10)]
        zoltempsvm = [0 for xtemp in range(10)]

        w = 1000
        it = 10

        avgzoldt = [0 for xtemp in range(len(depths))]
        avgzolrf = [0 for xtemp in range(len(depths))]
        avgzolbag = [0 for xtemp in range(len(depths))]
        avgzolsvm = [0 for xtemp in range(len(depths))]

        stddevzoldt = [0 for xtemp in range(len(depths))]
        stddevzolrf = [0 for xtemp in range(len(depths))]
        stddevzolbag = [0 for xtemp in range(len(depths))]
        stddevzolsvm = [0 for xtemp in range(len(depths))]

        stderrzoldt = [0 for xtemp in range(len(depths))]
        stderrzolrf = [0 for xtemp in range(len(depths))]
        stderrzolbag = [0 for xtemp in range(len(depths))]
        stderrzolsvm = [0 for xtemp in range(len(depths))]

        testnew = []
        trainnew = []

        for r in range(len(depths)):

            for i in range(it):
                trainnew = []
                testnew = cv[i]

                for j in range(it):
                    if j != i:
                        for k in range(200):
                            trainnew.append(cv[j][k])

                temptrain = trainnew
                trainDataset = getTrainData(temptrain, 0.25)

                rid_train, x_train, y_train = splitColumns(trainDataset)
                rid_test, x_test, y_test = splitColumns(testnew)

                # Pre-processing data
                x_train = preprocess(x_train)
                x_test = preprocess(x_test)

                # Creating dictionary from x_train
                words, wordList = getWordList(x_train)

                # Removing most frequent 100 words
                for _ in range(100):
                    words.pop(0)

                wordList = [x for x, _ in words]

                # Forming feature vector, calculating Conditional probabilities, applying bag
                trainfv, trainfv0, trainfv1 = featureVector(
                    wordList[:w], x_train, y_train)
                testfv, testfv0, testfv1 = featureVector(
                    wordList[:w], x_test, y_test)

                zoltempdt[i] = decisionTree(trainfv, testfv, depths[r])
                zoltemprf[i] = randomForest(trainfv, testfv, depths[r])
                zoltempbag[i] = bagging(trainfv, testfv, depths[r])
#                    zoltempsvm[i] = svm(trainfv,testfv)

            avgzoldt[r] = np.mean(zoltempdt)
            avgzolrf[r] = np.mean(zoltemprf)
            avgzolbag[r] = np.mean(zoltempbag)
            avgzolsvm[r] = np.mean(zoltempsvm)

            stddevzoldt[r] = np.std(zoltempdt)
            stddevzolrf[r] = np.std(zoltemprf)
            stddevzolbag[r] = np.std(zoltempbag)
            stddevzolsvm[r] = np.std(zoltempsvm)

            stderrzoldt[r] = stddevzoldt[r] / math.sqrt(it)
            stderrzolrf[r] = stddevzolrf[r] / math.sqrt(it)
            stderrzolbag[r] = stddevzolbag[r] / math.sqrt(it)
            stderrzolsvm[r] = stddevzolsvm[r] / math.sqrt(it)

        print avgzoldt
        print avgzolbag
        print avgzolrf
        #            print avgzolsvm

        print stddevzoldt
        print stddevzolbag
        print stddevzolrf
        #            print stddevzolsvm

        print stderrzoldt
        print stderrzolbag
        print stderrzolrf
        #            print stderrzolsvm

        f = open(self.file, "a+")
        f.write("\n AVERAGE ZERO ONE LOSS")
        f.write("\n 1. Decision Tree")
        f.write(str(avgzoldt))
        f.write("\n 2. Bagging")
        f.write(str(avgzolbag))
        f.write("\n 3. Random forest")
        f.write(str(avgzolrf))
        #            f.write("\n 4. SVM")
        #            f.write(str(avgzolsvm))

        f.write("\n STANDARD DEVIATION ZERO ONE LOSS")
        f.write("\n 1. Decision Tree")
        f.write(str(stddevzoldt))
        f.write("\n 2. Bagging")
        f.write(str(stddevzolbag))
        f.write("\n 3. Random forest")
        f.write(str(stddevzolrf))
        #            f.write("\n 4. SVM")
        #            f.write(str(stddevzolsvm))

        f.write("\n STANDARD ERROR ZERO ONE LOSS")
        f.write("\n 1. Decision Tree")
        f.write(str(stderrzoldt))
        f.write("\n 2. Bagging")
        f.write(str(stderrzolbag))
        f.write("\n 3. Random forest")
        f.write(str(stderrzolrf))
        #            f.write("\n 4. SVM")
        #            f.write(str(stderrzolsvm))
        f.close()