def buildClassifier(eventName):
    classifierFileName = eventName+'_NBClassifier.p'    
    posURLsFileName = eventName+'Pos.txt'
    negURlsFileName = eventName+'Neg.txt'
    
    posURLs = eu.readFileLines(posURLsFileName)
    negURLs = eu.readFileLines(negURlsFileName)
    
    eu.train_SaveClassifier(posURLs, negURLs, classifierFileName)
 def buildClassifierFolder(self,posFile,negFolder,classifierFileName):
     #negURLsFile = 'negFile.txt'
     try:
         classifierFile = open(classifierFileName,"rb")
         self.classifier = pickle.load(classifierFile)
         classifierFile.close()
         
     except:
         posURLs = readFileLines(posFile)
         posLen = len(posURLs)
         negFiles = os.listdir(negFolder)
         negFiles = [os.path.join(negFolder,f) for f in negFiles if f.endswith(".txt")]
         #print negFiles
         negFilesURLs = [readFileLines(f) for f in negFiles]
         
         num = int(round(1.0* posLen/len(negFiles)))
         negURLs = []
         for nfu in negFilesURLs:
             #print len(nfu)
             if num < len(nfu):
                 #negURLs.extend(nfu[:num] )
                 negURLs.append(nfu[:num] )
             else:
                 #negURLs.extend(nfu )
                 negURLs.append(nfu )
         #print len(negURLs)
         #self.classifier = train_SaveClassifierRandom(posURLs, negURLs, classifierFileName)
         self.classifier = train_SaveClassifier(posURLs, negURLs, classifierFileName)
Example #3
0
 def buildClassifierFolder(self,posFile,negFolder,classifierFileName):
     #negURLsFile = 'negFile.txt'
     try:
         classifierFile = open(classifierFileName,"rb")
         self.classifier = pickle.load(classifierFile)
         classifierFile.close()
         
     except:
         posURLs = readFileLines(posFile)
         posLen = len(posURLs)
         negFiles = os.listdir(negFolder)
         negFiles = [os.path.join(negFolder,f) for f in negFiles if f.endswith(".txt")]
         #print negFiles
         negFilesURLs = [readFileLines(f) for f in negFiles]
         
         num = int(round(1.0* posLen/len(negFiles)))
         negURLs = []
         for nfu in negFilesURLs:
             #print len(nfu)
             if num < len(nfu):
                 #negURLs.extend(nfu[:num] )
                 negURLs.append(nfu[:num] )
             else:
                 #negURLs.extend(nfu )
                 negURLs.append(nfu )
         #print len(negURLs)
         #self.classifier = train_SaveClassifierRandom(posURLs, negURLs, classifierFileName)
         self.classifier = train_SaveClassifier(posURLs, negURLs, classifierFileName)
 def buildClassifier(self,posFile,negFile,classifierFileName):
     #negURLsFile = 'negFile.txt'
     try:
         classifierFile = open(classifierFileName,"rb")
         self.classifier = pickle.load(classifierFile)
         classifierFile.close()
         
     except:
         posURLs = readFileLines(posFile)
         negURLs = readFileLines(negFile)
         self.classifier = train_SaveClassifier(posURLs, negURLs, classifierFileName)
Example #5
0
 def buildClassifier(self,posFile,negFile,classifierFileName):
     #negURLsFile = 'negFile.txt'
     try:
         classifierFile = open(classifierFileName,"rb")
         self.classifier = pickle.load(classifierFile)
         classifierFile.close()
         
     except:
         posURLs = readFileLines(posFile)
         negURLs = readFileLines(negFile)
         self.classifier = train_SaveClassifier(posURLs, negURLs, classifierFileName)