def buildClassifier(eventName): classifierFileName = eventName+'_NBClassifier.p' posURLsFileName = eventName+'Pos.txt' negURlsFileName = eventName+'Neg.txt' posURLs = eu.readFileLines(posURLsFileName) negURLs = eu.readFileLines(negURlsFileName) eu.train_SaveClassifier(posURLs, negURLs, classifierFileName)
def buildClassifierFolder(self,posFile,negFolder,classifierFileName): #negURLsFile = 'negFile.txt' try: classifierFile = open(classifierFileName,"rb") self.classifier = pickle.load(classifierFile) classifierFile.close() except: posURLs = readFileLines(posFile) posLen = len(posURLs) negFiles = os.listdir(negFolder) negFiles = [os.path.join(negFolder,f) for f in negFiles if f.endswith(".txt")] #print negFiles negFilesURLs = [readFileLines(f) for f in negFiles] num = int(round(1.0* posLen/len(negFiles))) negURLs = [] for nfu in negFilesURLs: #print len(nfu) if num < len(nfu): #negURLs.extend(nfu[:num] ) negURLs.append(nfu[:num] ) else: #negURLs.extend(nfu ) negURLs.append(nfu ) #print len(negURLs) #self.classifier = train_SaveClassifierRandom(posURLs, negURLs, classifierFileName) self.classifier = train_SaveClassifier(posURLs, negURLs, classifierFileName)
def buildClassifier(self,posFile,negFile,classifierFileName): #negURLsFile = 'negFile.txt' try: classifierFile = open(classifierFileName,"rb") self.classifier = pickle.load(classifierFile) classifierFile.close() except: posURLs = readFileLines(posFile) negURLs = readFileLines(negFile) self.classifier = train_SaveClassifier(posURLs, negURLs, classifierFileName)