def scanDirs(self, directories): """Recursively walk directory to train classifier""" self.directories = directories for directory in directories: print "Scanning directory " + directory scanDir.scanDir(directory, lambda path,content: self._handleFile(path, content))
def scanDirs(self, directories): """Recursively walk directory to train classifier""" self.directories = directories for directory in directories: print "Scanning directory " + directory scanDir.scanDir( directory, lambda path, content: self._handleFile(path, content))
def main(): fpss = getCameraFps(-1) # ========================================================= width = 1280 height = 720 boxRate = 0.001 # min moving box moveArea = math.ceil(width * height * boxRate) maxVideoLengthMinut = 30 wattingMinut = 5 sNowDaytime = datetime.datetime.now() endrecordTime = sNowDaytime + datetime.timedelta(minutes=-5) maxVdoLenTime = sNowDaytime + datetime.timedelta(minutes=-5) cleanfile = True # ========================================================= # blur ksize ksize = (4, 4) # open close kernel kernel = np.ones((5, 5), np.uint8) # ========================================================= # 開啟網路攝影機 cap = cv2.VideoCapture(0, cv2.CAP_DSHOW) # mp4 fourcc = cv2.VideoWriter_fourcc(*'mp4v') #fourcc = cv2.VideoWriter_fourcc(*'XVID') # 設定擷取影像的尺寸大小 cap.set(cv2.CAP_PROP_FRAME_WIDTH, width) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height) #outVdo = cv2.VideoWriter(creatNewFilename(), fourcc, fpss, (width, height)) # 初始化平均影像 ret, frame = cap.read() avg = cv2.blur(frame, ksize) avg_float = np.float32(avg) # ========================================================= while (cap.isOpened()): # 讀取一幅影格 ret, frame = cap.read() # 模糊處理 blur = cv2.blur(frame, ksize) # 計算目前影格與平均影像的差異值 diff = cv2.absdiff(avg, blur) # 將圖片轉為灰階 gray = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY) # 篩選出變動程度大於門檻值的區域 ret, thresh = cv2.threshold(gray, 16, 255, cv2.THRESH_BINARY) # 使用型態轉換函數去除雜訊 thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2) thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=2) # cv.findContours # contours, hierarchy = cv.findContours(image, mode, method[, contours[, hierarchy[, offset]]]) # 產生等高線 cnts, cntImg = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) for c in cnts: # 忽略太小的區域 if cv2.contourArea(c) >= moveArea: endrecordTime = datetime.datetime.now() + datetime.timedelta( minutes=wattingMinut) # 計算等高線的外框範圍 (x, y, w, h) = cv2.boundingRect(c) # 畫出外框 cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2) insNow = datetime.datetime.now() if (endrecordTime > insNow): if (maxVdoLenTime < insNow): maxVdoLenTime = insNow + \ datetime.timedelta(minutes=maxVideoLengthMinut) outVdo = cv2.VideoWriter(creatNewFilename(), fourcc, fpss, (width, height)) outVdo.write(frame) # 畫出等高線(除錯用) cv2.drawContours(frame, cnts, -1, (0, 255, 255), 2) # 顯示偵測結果影像 cv2.imshow('saveVdo', frame) if int(round(insNow.second)) == int(round(0.0)) and cleanfile == True: clnfilss.scanDir(vdoRoot) # print(insNow) # print(insNow.microsecond) cleanfile = False elif int(round(insNow.second)) == int(round(1.0)): cleanfile = True if cv2.waitKey(1) & 0xFF == ord('q'): break # 更新平均影像 cv2.accumulateWeighted(blur, avg_float, 0.01) avg = cv2.convertScaleAbs(avg_float) # ========================================================== cap.release() outVdo.release() cv2.destroyAllWindows() return 0
def benchmark(self, dataset): tokenCount = [0] identifierCount = [0] fileList = [] def addTokens(path, content): if not self.matchesExtension(path): return fileList.append(path) words = list(tokenizer.tokenize(path, content)) identifierCount[0] += sum([1 for word in words if tokenizer.isIdentifier(word[0])]) tokenCount[0] += len(words) scanDir.scanDir(dataset, addTokens) trainTokens = self.trainTokens if self.trainTokens \ else int(self.trainTokensPct * tokenCount[0]) metrics = self.metrics logFile = Logger.logFile('log') random.shuffle(fileList) def train(path, content, maxTokens): logFile.log('Training on {}'.format(path)) return self.classifier.train(path, content, maxTokens, False, 1) def weightTrain(path, content, maxTokens): return self.classifier.train(path, content, maxTokens, True, self.sample) def benchmark(path, content, maxTokens): return self.classifier.benchmark(path, content, metrics, self.sample, maxTokens, self.filters) phases = [_Phase('Training', train, trainTokens)] if self.weightTraining > 0: phases.append(_Phase('WeightTraining', weightTrain, self.weightTraining)) finalPhase = _Phase('Testing', benchmark, self.maxSamples if self.maxSamples else \ self.testingFrac*tokenCount[0]) if not self.startAt: phases.append(finalPhase) phaseStart = [datetime.datetime.now()] phaseIdx = [0] phaseTokenCounter = [0] logFile.start(phases[phaseIdx[0]].name) def handleFile(path): f = open(path) # FixMe: [performance] Would it be better to iterate through # lines to avoid loading file into memory? content = f.read() f.close() path = os.path.relpath(path, dataset) if phaseIdx[0] == len(phases): return phase = phases[phaseIdx[0]] if phaseTokenCounter[0] < phase.tokens: sys.stdout.write("\r{} {:.2%}".format(phase.name, phaseTokenCounter[0] / float(phase.tokens))) sys.stdout.flush() phaseTokenCounter[0] += phase.func(path, content, phase.tokens - phaseTokenCounter[0]) else: self.classifier.logParams('post' + phase.name) nextPhaseStart = datetime.datetime.now() phaseTime = nextPhaseStart - phaseStart[0] phaseStart[0] = nextPhaseStart lowerName = phase.name[:1].lower() + phase.name[1:] performanceMap.put(lowerName + 'Time', phaseTime.total_seconds()) outputMap.put(lowerName + 'Tokens', phaseTokenCounter[0]) sys.stdout.write("\r{} 100.00%\n".format(phase.name)) sys.stdout.flush() phaseIdx[0] += 1 phaseTokenCounter[0] = 0 logFile.end() if phaseIdx[0] < len(phases): logFile.start(phases[phaseIdx[0]].name) outputMap = Logger.mapFile('output') performanceMap = Logger.mapFile('performance') killed = False outputMap.put("totalTokens", tokenCount[0]) outputMap.put("totalIdentifiers", identifierCount[0]) startTime = datetime.datetime.now() try: for path in fileList: handleFile(path) if self.startAt: phases.append(finalPhase) logFile.start(phases[phaseIdx[0]].name) for path in fileList[int(self.startAt*len(fileList)):]: handleFile(path) except KeyboardInterrupt: print '^C received, stopping benchmark' killed = True endTime = datetime.datetime.now() totalTime = endTime - startTime performanceMap.put('totalTime', totalTime.total_seconds()) performanceMap.put('memory', utils.memUsage.memory()) performanceMap.put('stackSize', utils.memUsage.stacksize()) performanceMap.put('resident', utils.memUsage.resident()) outputMap.put("tokensTrained", self.classifier.tokensTrained) outputMap.put("uniqueTokens", len(self.classifier.words)) outputMap.put("uniqueIdentifiers", sum([1 for word in self.classifier.words if tokenizer.isIdentifier(word)])) for metric in metrics: metric.output(outputMap) return killed
def benchmark(self, dataset): tokenCount = [0] identifierCount = [0] fileList = [] def addTokens(path, content): if not self.matchesExtension(path): return fileList.append(path) words = list(tokenizer.tokenize(path, content)) identifierCount[0] += sum( [1 for word in words if tokenizer.isIdentifier(word[0])]) tokenCount[0] += len(words) scanDir.scanDir(dataset, addTokens) trainTokens = self.trainTokens if self.trainTokens \ else int(self.trainTokensPct * tokenCount[0]) metrics = self.metrics logFile = Logger.logFile('log') random.shuffle(fileList) def train(path, content, maxTokens): logFile.log('Training on {}'.format(path)) return self.classifier.train(path, content, maxTokens, False, 1) def weightTrain(path, content, maxTokens): return self.classifier.train(path, content, maxTokens, True, self.sample) def benchmark(path, content, maxTokens): return self.classifier.benchmark(path, content, metrics, self.sample, maxTokens, self.filters) phases = [_Phase('Training', train, trainTokens)] if self.weightTraining > 0: phases.append( _Phase('WeightTraining', weightTrain, self.weightTraining)) finalPhase = _Phase('Testing', benchmark, self.maxSamples if self.maxSamples else \ self.testingFrac*tokenCount[0]) if not self.startAt: phases.append(finalPhase) phaseStart = [datetime.datetime.now()] phaseIdx = [0] phaseTokenCounter = [0] logFile.start(phases[phaseIdx[0]].name) def handleFile(path): f = open(path) # FixMe: [performance] Would it be better to iterate through # lines to avoid loading file into memory? content = f.read() f.close() path = os.path.relpath(path, dataset) if phaseIdx[0] == len(phases): return phase = phases[phaseIdx[0]] if phaseTokenCounter[0] < phase.tokens: sys.stdout.write("\r{} {:.2%}".format( phase.name, phaseTokenCounter[0] / float(phase.tokens))) sys.stdout.flush() phaseTokenCounter[0] += phase.func( path, content, phase.tokens - phaseTokenCounter[0]) else: self.classifier.logParams('post' + phase.name) nextPhaseStart = datetime.datetime.now() phaseTime = nextPhaseStart - phaseStart[0] phaseStart[0] = nextPhaseStart lowerName = phase.name[:1].lower() + phase.name[1:] performanceMap.put(lowerName + 'Time', phaseTime.total_seconds()) outputMap.put(lowerName + 'Tokens', phaseTokenCounter[0]) sys.stdout.write("\r{} 100.00%\n".format(phase.name)) sys.stdout.flush() phaseIdx[0] += 1 phaseTokenCounter[0] = 0 logFile.end() if phaseIdx[0] < len(phases): logFile.start(phases[phaseIdx[0]].name) outputMap = Logger.mapFile('output') performanceMap = Logger.mapFile('performance') killed = False outputMap.put("totalTokens", tokenCount[0]) outputMap.put("totalIdentifiers", identifierCount[0]) startTime = datetime.datetime.now() try: for path in fileList: handleFile(path) if self.startAt: phases.append(finalPhase) logFile.start(phases[phaseIdx[0]].name) for path in fileList[int(self.startAt * len(fileList)):]: handleFile(path) except KeyboardInterrupt: print '^C received, stopping benchmark' killed = True endTime = datetime.datetime.now() totalTime = endTime - startTime performanceMap.put('totalTime', totalTime.total_seconds()) performanceMap.put('memory', utils.memUsage.memory()) performanceMap.put('stackSize', utils.memUsage.stacksize()) performanceMap.put('resident', utils.memUsage.resident()) outputMap.put("tokensTrained", self.classifier.tokensTrained) outputMap.put("uniqueTokens", len(self.classifier.words)) outputMap.put( "uniqueIdentifiers", sum([ 1 for word in self.classifier.words if tokenizer.isIdentifier(word) ])) for metric in metrics: metric.output(outputMap) return killed