Example #1
0
    def findPoems(self,saveToFile=False):
        """
        Find and optionally save poems to file using learning model
        """
        if saveToFile:
            with open('poems'+self.date,'a') as f:
                f.write('Comment poems from %s\n\n' % self.date)

        for commentProperties in self.myComments.myComments:
            comment = commentProperties['comment']
            #comment = re.sub('\n+', '\n', comment)
           
            predProb = self.myModel.predictNewPoem(TimesComments.features(comment))
                
            # display everything with 20%+ chance of being a poem
            if predProb > 0.2:
                print 'Possible poem w/ probability=%f\n\n' % predProb
                print comment
                print '\n%s?comments#permid=%s' % (commentProperties['url'],commentProperties['id'])
                print '\n\n\n--------\n\n\n'
                if saveToFile:
                    with open('poems'+self.date,'a') as f:
                        f.write(comment+'\n')
                        f.write('\nPossible poem w/ probability=%f\n' % predProb)
                        f.write('%s?comments#permid=%s\n' % (commentProperties['url'],commentProperties['id']))
                        f.write('\n\n\n--------\n\n\n\n')
Example #2
0
def classifyData(date=None):
    """
    Retrain the learning model and save to file, optionally also categorize new comments from 'date' with user input
    Returns trained model
    """
    
    # load training data
    try:
        with open('trainingset','r') as myFile:
            trainingSet = pickle.load(myFile)
    except:
        print 'Creating new training set'
        trainingSet = []
        
    # if requested, add new examples from 'date' to the training set
    if date != None:
        if len(trainingSet) > 100:
            myModel = LearningModel(trainingSet)
        else:
            myModel = None
            
        commentList = map(lambda x: x.comment, trainingSet)
        myComments = TimesComments(date)
        
        i = 0
        for comment in myComments.iterComments():
            i += 1
            if comment[0] in commentList:
                print 'comment already found!'
            else:
                print str(i) + '/' + str(len(myComments.myComments))
                newpt = LabeledData(comment[0],comment[1],comment[2],myModel)
                # if we have a trained learning model, only add manually classified points
                if newpt.predProb == None or newpt.manuallyClassified:
                    trainingSet.append(newpt)
        
        with open('trainingset','w') as myFile:
            pickle.dump(trainingSet,myFile)
    
    myModel = LearningModel(trainingSet)
    return myModel
    def findPoems(self, saveToFile=False):
        """
        Find and optionally save poems to file using learning model
        """
        self.myModel = LearningModel()

        if saveToFile:
            with open("poems%s.txt" % self.date, "w") as f:
                f.write("Comment poems from %s\n\n" % self.date)

        foundPoems = []
        for commentProperties in self.myComments.myComments:
            comment = commentProperties["comment"]
            # comment = re.sub('\n+', '\n', comment)

            predProb = self.myModel.predictNewPoem(TimesComments.features(comment))

            # display everything with 20%+ chance of being a poem
            if predProb > 0.2:
                foundPoems.append(
                    (predProb, comment, "%s?comments#permid=%s\n" % (commentProperties["url"], commentProperties["id"]))
                )
                if self.verbose:
                    print "Possible poem w/ probability=%f\n\n" % predProb
                    print comment
                    print "\n%s?comments#permid=%s" % (commentProperties["url"], commentProperties["id"])
                    print "\n\n\n--------\n\n\n"
                if saveToFile:
                    with open("poems%s.txt" % self.date, "a") as f:
                        f.write(comment + "\n")
                        f.write("\nPossible poem w/ probability=%f\n" % predProb)
                        f.write("%s?comments#permid=%s\n" % (commentProperties["url"], commentProperties["id"]))
                        f.write("\n\n\n--------\n\n\n\n")

        print "Found %d poems!\n\n" % len(foundPoems)
        return foundPoems
Example #4
0
 try:
     with open('trainingset','r') as myFile:
         trainingSet = pickle.load(myFile)
 except:
     print 'Creating new training set'
     trainingSet = []
     
 # if requested, add new examples from 'date' to the training set
 if trainNewExamples:
     if len(trainingSet) > 100:
         myModel = LearningModel(trainingSet)
     else:
         myModel = None
         
     commentList = map(lambda x: x.comment, trainingSet)
     myComments = TimesComments(date,restore)
     
     i = 0
     for comment in myComments.iterComments():
         i += 1
         if comment[0] in commentList:
             print 'comment already found!'
         else:
             print str(i) + '/' + str(len(myComments.myComments))
             newpt = LabeledData(comment[0],comment[1],comment[2],myModel)
             # if we have a trained learning model, only add manually classified points
             if newpt.predProb == None or newpt.manuallyClassified:
                 trainingSet.append(newpt)
     
     with open('trainingset','w') as myFile:
         pickle.dump(trainingSet,myFile)