def main():
	extractData = False
	extractTestingData = False
	helper = helperClass.Helper()
	path_to_training_directory = "data/Train"
	path_to_testing_directory = "data/Test"
	path_to_training_labels = "data/Train/GroundTruth/groundTruth.txt"
	path_to_testing_labels = "data/Test/GroundTruth/groundTruth.txt"

	if(extractData):
		truths = open(path_to_training_labels, "r").read().split("\n")

		print "Extracting user training data..."

		userData = []
		for i in range(1, len(truths)):
			userData.append(reader.readData(i, helper, path_to_training_directory))
			sys.stdout.write("\r%5.2f%% (%i/%i)" %((float(i)/(len(truths)-1)*100), i, len(truths)-1))
			sys.stdout.flush()
		print "\r"

		pickle.dump(userData, open("userData.pkl", "wb"))
	else:
		userData = pickle.load(open("userData.pkl", "rb"))


	allWords = set()
	userWords = {}
	
	print "Extracting unique words from user data..."
	for i in range(0, len(userData)):
		userWords[i] = {}
		for j in userData[i]:
			userWords[i][j] = helper.getUserWords(userData[i], j)
			allWords = allWords.union(userWords[i][j])
			sys.stdout.write("\r%5.2f%% (%i/%i)" %((float(i+1)/len(userData)*100), i+1, len(userData)))
			sys.stdout.flush()

	print "\n"+str(len(allWords))+" unique words found.\n"
	# print allWords
	helper.setFeatureList(sorted(allWords))
	
	with open('allWords.txt', 'w') as outfile:
		json.dump(sorted(allWords), outfile)

	featureVectors = {}
	print "Generating feature vectors..."

	for j in userData[0]:
		featureVectors[j] = []

	for i in range(0, len(userData)):
		for j in userData[i]:
			featureVectors[j].append(helper.getFeatureVector(userWords[i][j]))
			sys.stdout.write("\r%5.2f%% (%i/%i)" %((float(i+1)/len(userData)*100), i+1, len(userData)))
			sys.stdout.flush()

	# for j in range(0, len(userData[0])):
	# 	featureVectors[j] = []
	# 	for i in range(0, len(userData)):
	# 		featureVectors[j].append(helper.getFeatureVector(userWords[i]))
	# 		sys.stdout.write("\r%5.2f%% (%i/%i)" %((float(i+1)/len(userData)*100), i+1, len(userData)))
	# 		sys.stdout.flush()

	print "\r"
	labelVectors = helper.getLabelVectors(path_to_training_labels)

	print "Training SVM models..."
	params = svm_parameter()
	params.C = 10
	params.kernel_type = LINEAR

	# labels = labelVectors[0]
	models = {}


	# CREATE ONE MODEL FOR EACH category and data source
	# Userdata is an array of objects, each object containing three objects with data from each source

	for i in range(0, len(labelVectors)):
		# Loop 1-20 (Each category)
		models[i] = {}
		for j in userData[0]:
			# Loop through 1-3 (each data source)
			problem = svm_problem(labelVectors[i], featureVectors[j])
			models[i][j] = svm_train(problem, params)


	pprint(models)
	# problem = svm_problem(labels, featureVectors)
	# model = svm_train(problem, params)

	if(extractTestingData):
		truths = open(path_to_testing_labels , "r").read().split("\n")

		print "Extracting user testing data..."
		userIdPattern = re.compile("U(\d*?)gnd.txt")
		userIDs = userIdPattern.findall(" ".join(os.listdir(path_to_testing_directory+"/GroundTruth")))
		userIDs = map(int, userIDs)

		userData = []
		for i in range(0, len(userIDs)):
			userData.append(reader.readData(userIDs[i], helper, path_to_testing_directory))
			sys.stdout.write("\r%5.2f%% (%i/%i)" %((float(i+1)/(len(userIDs))*100), i+1, len(userIDs)))
			sys.stdout.flush()
		print "\r"

		pickle.dump(userData, open("userTestingData.pkl", "wb"))
	else:
		userData = pickle.load(open("userTestingData.pkl", "rb"))


	print "Generating feature vectors..."


	featureVectors = {}
	# Feature vectors should be an object containing three arrays, one for each data source
	for i in userData[0]:
		featureVectors[i] = []

	for i in range(0, len(userData)):
		for j in userData[i]:
			featureVectors[j].append(helper.getFeatureVector(userWords[i][j]))
			sys.stdout.write("\r%5.2f%% (%i/%i)" %((float(i+1)/len(userData)*100), i+1, len(userData)))
			sys.stdout.flush()

	# for j in range(0, len(userData[0])):	
	# 	featureVectors[j] = []
	# 	print "Generating feature vectors for "+str(j)
	# 	for i in range(0, len(userData)):
	# 		featureVectors[j].append(helper.getFeatureVector(helper.getUserWords(userData[i][j])))
	# 		sys.stdout.write("\r%5.2f%% (%i/%i)" %((float(i+1)/len(userData)*100), i+1, len(userData)))
	# 		sys.stdout.flush()

	print "\r"
	labelVectors = helper.getLabelVectors(path_to_testing_labels)
	
	avgAcc = 0.0
	# labelContainer = []
	labelContainer = {}
	for i in models[0]:
		labelContainer[i] = []

	print "Classifying dataset..."
	for i in range(0, len(models)):
		for j in models[i]:
			p_labels, p_accs, p_vals = svm_predict(labelVectors[i], featureVectors[j], models[i][j])
			labelContainer[j].append(p_labels)
			avgAcc = avgAcc+p_accs[0]

	avgAcc = avgAcc/(len(models)*3)
	print "Average accuracy: "+str(avgAcc)+"%"

	for category in labelContainer:
		reader.saveOutput(labelContainer[category], 'data/outputLabels-'+category+".csv")
		
	# reader.saveOutput(labelContainer, 'data/outputLabels.csv')		
	reader.getSaK()
	pickle.dump(labelContainer, open("outputLabels.pkl", "wb"))
def main():
    extractData = False
    extractTestingData = False
    helper = helperClass.Helper()
    path_to_training_directory = "data/Train"
    # path_to_testing_directory = "data/Test"
    path_to_testing_directory = "multi-view-online-testing"
    path_to_training_labels = "data/Train/GroundTruth/groundTruth.txt"
    path_to_testing_labels = "multi-view-online-testing/GroundTruth/groundTruth.txt"
    # path_to_testing_labels = "data/Test/GroundTruth/groundTruth.txt"

    if extractData:
        truths = open(path_to_training_labels, "r").read().split("\n")

        print "Extracting user training data..."

        userData = []
        for i in range(1, len(truths)):
            userData.append(reader.readData(i, helper, path_to_training_directory))
            sys.stdout.write("\r%5.2f%% (%i/%i)" % ((float(i) / (len(truths) - 1) * 100), i, len(truths) - 1))
            sys.stdout.flush()
        print "\r"

        pickle.dump(userData, open("userData.pkl", "wb"))
    else:
        userData = pickle.load(open("userData.pkl", "rb"))

    allWords = set()
    userWords = {}

    print "Extracting unique words from user data..."
    for i in range(0, len(userData)):
        userWords[i] = helper.getUserWords(userData[i])
        allWords = allWords.union(userWords[i])
        sys.stdout.write("\r%5.2f%% (%i/%i)" % ((float(i + 1) / len(userData) * 100), i + 1, len(userData)))
        sys.stdout.flush()

    print "\n" + str(len(allWords)) + " unique words found.\n"
    # print allWords
    helper.setFeatureList(sorted(allWords))

    with open("allWords.txt", "w") as outfile:
        json.dump(sorted(allWords), outfile)

    featureVectors = []
    print "Generating feature vectors..."

    for i in range(0, len(userData)):
        featureVectors.append(helper.getFeatureVector(userWords[i]))
        sys.stdout.write("\r%5.2f%% (%i/%i)" % ((float(i + 1) / len(userData) * 100), i + 1, len(userData)))
        sys.stdout.flush()

    print "\r"
    labelVectors = helper.getLabelVectors(path_to_training_labels)

    print "Training SVM models..."
    params = svm_parameter()
    params.C = 10
    params.kernel_type = LINEAR

    # labels = labelVectors[0]
    models = {}
    for i in range(0, len(labelVectors)):
        problem = svm_problem(labelVectors[i], featureVectors)
        models[i] = svm_train(problem, params)

        # problem = svm_problem(labels, featureVectors)
        # model = svm_train(problem, params)

    if extractTestingData:
        truths = open(path_to_testing_labels, "r").read().split("\n")

        print "Extracting user testing data..."
        userIdPattern = re.compile("U(\d*?)gnd.txt")
        userIDs = userIdPattern.findall(" ".join(os.listdir(path_to_testing_directory + "/GroundTruth")))
        userIDs = map(int, userIDs)

        userData = []
        for i in range(0, len(userIDs)):
            userData.append(reader.readData(userIDs[i], helper, path_to_testing_directory))
            sys.stdout.write("\r%5.2f%% (%i/%i)" % ((float(i + 1) / (len(userIDs)) * 100), i + 1, len(userIDs)))
            sys.stdout.flush()
        print "\r"

        pickle.dump(userData, open("userTestingData.pkl", "wb"))
    else:
        userData = pickle.load(open("userTestingData.pkl", "rb"))

    print "Generating feature vectors..."
    featureVectors = []
    for i in range(0, len(userData)):
        featureVectors.append(helper.getFeatureVector(helper.getUserWords(userData[i])))
        sys.stdout.write("\r%5.2f%% (%i/%i)" % ((float(i + 1) / len(userData) * 100), i + 1, len(userData)))
        sys.stdout.flush()

    print "\r"
    labelVectors = helper.getLabelVectors(path_to_testing_labels)

    avgAcc = 0.0
    labelContainer = []
    print "Classifying dataset..."
    for i in range(0, len(models)):
        p_labels, p_accs, p_vals = svm_predict(labelVectors[i], featureVectors, models[i])
        labelContainer.append(p_labels)
        avgAcc = avgAcc + p_accs[0]

    avgAcc = avgAcc / (len(models))
    print "Average accuracy: " + str(avgAcc) + "%"

    reader.saveOutput(labelContainer, "data/outputLabels.csv")
    pickle.dump(labelContainer, open("outputLabels.pkl", "wb"))
    reader.getSaK()