def findCharacterWords(author1, author2): rownames = util.loadRowNames() print 'Row name loaded' # find the most distinct words return mostDistinct(pickle.load(open(util.modelDir+author1+'_sematic.p')), pickle.load(open(util.modelDir+author2+'_sematic.p')), rownames, 5, dumpName = 'example.p')
def trainWeight(train, trainY, maxIter = 100, alpha = .1): rownames = util.loadRowNames() trainedWeight = dict() for author in util.authors: trainedWeight[aurthor] = dict() for word in rownames: trainedWeight[author][word] = rand() for i in range(maxIter): for idx in range(len(train)): text = train[idx] rlt = classify(text, wetight = getTrainedWeight) for word in text.split(): if rlt == trainY[idx]: trainedWeight[rlt][word] += 0.1 else: trainedWeight[rlt][word] -= 0.1 pickle.dump(trainedWeight, open('trainedWeight.p', 'wb'))