Esempio n. 1
0
def classifyData(date=None):
    """
    Retrain the learning model and save to file, optionally also categorize new comments from 'date' with user input
    Returns trained model
    """
    
    # load training data
    try:
        with open('trainingset','r') as myFile:
            trainingSet = pickle.load(myFile)
    except:
        print 'Creating new training set'
        trainingSet = []
        
    # if requested, add new examples from 'date' to the training set
    if date != None:
        if len(trainingSet) > 100:
            myModel = LearningModel(trainingSet)
        else:
            myModel = None
            
        commentList = map(lambda x: x.comment, trainingSet)
        myComments = TimesComments(date)
        
        i = 0
        for comment in myComments.iterComments():
            i += 1
            if comment[0] in commentList:
                print 'comment already found!'
            else:
                print str(i) + '/' + str(len(myComments.myComments))
                newpt = LabeledData(comment[0],comment[1],comment[2],myModel)
                # if we have a trained learning model, only add manually classified points
                if newpt.predProb == None or newpt.manuallyClassified:
                    trainingSet.append(newpt)
        
        with open('trainingset','w') as myFile:
            pickle.dump(trainingSet,myFile)
    
    myModel = LearningModel(trainingSet)
    return myModel
Esempio n. 2
0
         trainingSet = pickle.load(myFile)
 except:
     print 'Creating new training set'
     trainingSet = []
     
 # if requested, add new examples from 'date' to the training set
 if trainNewExamples:
     if len(trainingSet) > 100:
         myModel = LearningModel(trainingSet)
     else:
         myModel = None
         
     commentList = map(lambda x: x.comment, trainingSet)
     myComments = TimesComments(date,restore)
     
     i = 0
     for comment in myComments.iterComments():
         i += 1
         if comment[0] in commentList:
             print 'comment already found!'
         else:
             print str(i) + '/' + str(len(myComments.myComments))
             newpt = LabeledData(comment[0],comment[1],comment[2],myModel)
             # if we have a trained learning model, only add manually classified points
             if newpt.predProb == None or newpt.manuallyClassified:
                 trainingSet.append(newpt)
     
     with open('trainingset','w') as myFile:
         pickle.dump(trainingSet,myFile)
 
 myModel = LearningModel(trainingSet)