def classifyData(date=None): """ Retrain the learning model and save to file, optionally also categorize new comments from 'date' with user input Returns trained model """ # load training data try: with open('trainingset','r') as myFile: trainingSet = pickle.load(myFile) except: print 'Creating new training set' trainingSet = [] # if requested, add new examples from 'date' to the training set if date != None: if len(trainingSet) > 100: myModel = LearningModel(trainingSet) else: myModel = None commentList = map(lambda x: x.comment, trainingSet) myComments = TimesComments(date) i = 0 for comment in myComments.iterComments(): i += 1 if comment[0] in commentList: print 'comment already found!' else: print str(i) + '/' + str(len(myComments.myComments)) newpt = LabeledData(comment[0],comment[1],comment[2],myModel) # if we have a trained learning model, only add manually classified points if newpt.predProb == None or newpt.manuallyClassified: trainingSet.append(newpt) with open('trainingset','w') as myFile: pickle.dump(trainingSet,myFile) myModel = LearningModel(trainingSet) return myModel
trainingSet = pickle.load(myFile) except: print 'Creating new training set' trainingSet = [] # if requested, add new examples from 'date' to the training set if trainNewExamples: if len(trainingSet) > 100: myModel = LearningModel(trainingSet) else: myModel = None commentList = map(lambda x: x.comment, trainingSet) myComments = TimesComments(date,restore) i = 0 for comment in myComments.iterComments(): i += 1 if comment[0] in commentList: print 'comment already found!' else: print str(i) + '/' + str(len(myComments.myComments)) newpt = LabeledData(comment[0],comment[1],comment[2],myModel) # if we have a trained learning model, only add manually classified points if newpt.predProb == None or newpt.manuallyClassified: trainingSet.append(newpt) with open('trainingset','w') as myFile: pickle.dump(trainingSet,myFile) myModel = LearningModel(trainingSet)