def findPoems(self,saveToFile=False): """ Find and optionally save poems to file using learning model """ if saveToFile: with open('poems'+self.date,'a') as f: f.write('Comment poems from %s\n\n' % self.date) for commentProperties in self.myComments.myComments: comment = commentProperties['comment'] #comment = re.sub('\n+', '\n', comment) predProb = self.myModel.predictNewPoem(TimesComments.features(comment)) # display everything with 20%+ chance of being a poem if predProb > 0.2: print 'Possible poem w/ probability=%f\n\n' % predProb print comment print '\n%s?comments#permid=%s' % (commentProperties['url'],commentProperties['id']) print '\n\n\n--------\n\n\n' if saveToFile: with open('poems'+self.date,'a') as f: f.write(comment+'\n') f.write('\nPossible poem w/ probability=%f\n' % predProb) f.write('%s?comments#permid=%s\n' % (commentProperties['url'],commentProperties['id'])) f.write('\n\n\n--------\n\n\n\n')
def classifyData(date=None): """ Retrain the learning model and save to file, optionally also categorize new comments from 'date' with user input Returns trained model """ # load training data try: with open('trainingset','r') as myFile: trainingSet = pickle.load(myFile) except: print 'Creating new training set' trainingSet = [] # if requested, add new examples from 'date' to the training set if date != None: if len(trainingSet) > 100: myModel = LearningModel(trainingSet) else: myModel = None commentList = map(lambda x: x.comment, trainingSet) myComments = TimesComments(date) i = 0 for comment in myComments.iterComments(): i += 1 if comment[0] in commentList: print 'comment already found!' else: print str(i) + '/' + str(len(myComments.myComments)) newpt = LabeledData(comment[0],comment[1],comment[2],myModel) # if we have a trained learning model, only add manually classified points if newpt.predProb == None or newpt.manuallyClassified: trainingSet.append(newpt) with open('trainingset','w') as myFile: pickle.dump(trainingSet,myFile) myModel = LearningModel(trainingSet) return myModel
def findPoems(self, saveToFile=False): """ Find and optionally save poems to file using learning model """ self.myModel = LearningModel() if saveToFile: with open("poems%s.txt" % self.date, "w") as f: f.write("Comment poems from %s\n\n" % self.date) foundPoems = [] for commentProperties in self.myComments.myComments: comment = commentProperties["comment"] # comment = re.sub('\n+', '\n', comment) predProb = self.myModel.predictNewPoem(TimesComments.features(comment)) # display everything with 20%+ chance of being a poem if predProb > 0.2: foundPoems.append( (predProb, comment, "%s?comments#permid=%s\n" % (commentProperties["url"], commentProperties["id"])) ) if self.verbose: print "Possible poem w/ probability=%f\n\n" % predProb print comment print "\n%s?comments#permid=%s" % (commentProperties["url"], commentProperties["id"]) print "\n\n\n--------\n\n\n" if saveToFile: with open("poems%s.txt" % self.date, "a") as f: f.write(comment + "\n") f.write("\nPossible poem w/ probability=%f\n" % predProb) f.write("%s?comments#permid=%s\n" % (commentProperties["url"], commentProperties["id"])) f.write("\n\n\n--------\n\n\n\n") print "Found %d poems!\n\n" % len(foundPoems) return foundPoems
try: with open('trainingset','r') as myFile: trainingSet = pickle.load(myFile) except: print 'Creating new training set' trainingSet = [] # if requested, add new examples from 'date' to the training set if trainNewExamples: if len(trainingSet) > 100: myModel = LearningModel(trainingSet) else: myModel = None commentList = map(lambda x: x.comment, trainingSet) myComments = TimesComments(date,restore) i = 0 for comment in myComments.iterComments(): i += 1 if comment[0] in commentList: print 'comment already found!' else: print str(i) + '/' + str(len(myComments.myComments)) newpt = LabeledData(comment[0],comment[1],comment[2],myModel) # if we have a trained learning model, only add manually classified points if newpt.predProb == None or newpt.manuallyClassified: trainingSet.append(newpt) with open('trainingset','w') as myFile: pickle.dump(trainingSet,myFile)