def findCharacterWords(author1, author2):
	rownames = util.loadRowNames()
	print 'Row name loaded'
	# find the most distinct words
	return mostDistinct(pickle.load(open(util.modelDir+author1+'_sematic.p')), 
				 		pickle.load(open(util.modelDir+author2+'_sematic.p')), 
				 		rownames, 5, dumpName = 'example.p')
def trainWeight(train, trainY, maxIter = 100, alpha = .1):
	rownames = util.loadRowNames()
	trainedWeight = dict()
	for author in util.authors:
		trainedWeight[aurthor] = dict()
		for word in rownames:
			trainedWeight[author][word] = rand()

	for i in range(maxIter):
		for idx in range(len(train)):
			text = train[idx]
			rlt = classify(text, wetight = getTrainedWeight)
			for word in text.split():
				if rlt == trainY[idx]:
					trainedWeight[rlt][word] += 0.1
				else:
					trainedWeight[rlt][word] -= 0.1

	pickle.dump(trainedWeight, open('trainedWeight.p', 'wb'))