def detectImg(self, imageName): cascPath1 = os.path.join(XML_PATH, "haarcascade_frontalface_default.xml") cascPath2 = os.path.join(XML_PATH, "haarcascade_profileface.xml") face_type = "front" result = self.faceDetect(imageName, cascPath1, self.imagePathPrefix) result.update({"face_type": face_type}) response = {"error_code": "0", "ex_data": result} if result['face_num'] == 0: LOGGER.info("正脸检测不到人脸,对%s做侧脸检测", imageName) result2 = self.faceDetect(imageName, cascPath2, self.imagePathPrefix) if result2['face_num'] == 0: LOGGER.info("侧脸检测不到人脸,对%s做水平翻转后侧脸检测", imageName) imageHName = flip.flipHorizontal(imageName, self.imageHPathPrefix, self.imagePathPrefix) result3 = self.faceDetect(imageHName, cascPath2, self.imageHPathPrefix) if result3['face_num'] == 0: result.update(result3) else: face_type = "profile_h" result.update(result3) else: face_type = "profile" result.update(result2) result.update({ "suggest_msg": self.getSuggestMsg(result, face_type), "face_type": face_type }) response.update({"error_code": self.getErrorCode(result)}) return response
def memory_guard(self): LOGGER.info('memory usage: %.1f%%, RESPAWN_LIMIT: %.1f%%', psutil.virtual_memory()[2], self.respawn_memory_pct) if psutil.virtual_memory()[2] > self.respawn_memory_pct: LOGGER.warn( 'releasing memory now! kill iterator processes and restart!') self.restart()
def _sanity_check(self, all_tasks): total_clf = 0 failed_clf = 0 Xt, Yt = mnist_reader.load_mnist(path=DATA_DIR, kind='t10k') Xt = preprocessing.StandardScaler().fit_transform(Xt.astype(float)) Xs, Ys = shuffle(Xt, Yt) num_dummy = 10 Xs = Xs[:num_dummy] Ys = [j for j in range(10)] valid_jobs = [] for v in all_tasks: clf_name = list(v.keys())[0] clf_par = list(v.values())[0] total_clf += 1 try: globals()[clf_name](**clf_par).fit(Xs, Ys) valid_jobs.append( PredictJob(clf_name, clf_par, self.num_repeat)) except Exception as e: failed_clf += 1 LOGGER.error( 'Can not create classifier "%s" with parameter "%s". Reason: %s' % (clf_name, clf_par, e)) LOGGER.info('%d classifiers to test, %d fail to create!' % (total_clf, failed_clf)) return valid_jobs
def run(self) -> None: while True: cur_job = self.pending_q.get() # type: PredictJob LOGGER.info( 'job received! repeat: %d classifier: "%s" parameter: "%s"' % (cur_job.num_repeat, cur_job.clf_name, cur_job.clf_par)) if cur_job.clf_name in globals(): try: acc = [] cur_job.start_time = now_int() for j in range(cur_job.num_repeat): cur_score = self.get_accuracy(cur_job.clf_name, cur_job.clf_par, j) acc.append(cur_score) if len(acc) == 2 and abs(acc[0] - cur_score) < 1e-3: LOGGER.info( '%s is invariant to training data shuffling, will stop repeating!' % cur_job.clf_name) break cur_job.done_time = now_int() test_info = { 'name': cur_job.clf_name, 'parameter': cur_job.clf_par, 'score': acc, 'start_time': cur_job.start_time, 'done_time': cur_job.done_time, 'num_repeat': len(acc), 'mean_accuracy': np.array(acc).mean(), 'std_accuracy': np.array(acc).std() * 2, 'time_per_repeat': int((cur_job.done_time - cur_job.start_time) / len(acc)) } JSON_LOGGER.info(json.dumps(test_info, sort_keys=True)) LOGGER.info( 'done! acc: %0.3f (+/- %0.3f) repeated: %d classifier: "%s" ' 'parameter: "%s" ' % (np.array(acc).mean(), np.array(acc).std() * 2, len(acc), cur_job.clf_name, cur_job.clf_par)) except Exception as e: LOGGER.error('%s with %s failed! reason: %s' % (cur_job.clf_name, cur_job.clf_par, e)) else: LOGGER.error( 'Can not found "%s" in scikit-learn, missing import?' % cur_job.clf_name)
def get_accuracy(self, clf_name, clf_par, id): start_time = time.clock() clf = globals()[clf_name](**clf_par) Xs, Ys = shuffle(self.X, self.Y) cur_score = clf.fit(Xs, Ys).score(self.Xt, self.Yt) duration = time.clock() - start_time LOGGER.info( '#test: %d acc: %0.3f time: %.3fs classifier: "%s" parameter: "%s"' % (id, cur_score, duration, clf_name, clf_par)) return cur_score
def get_accuracy(self, clf_name, clf_par, id): start_time = time.clock() clf = globals()[clf_name](**clf_par) Xs, Ys = shuffle(self.X, self.Y) cur_score = clf.fit(Xs, Ys).score(self.Xt, self.Yt) duration = time.clock() - start_time LOGGER.info('#test: %d acc: %0.3f time: %.3fs classifier: "%s" parameter: "%s"' % (id, cur_score, duration, clf_name, clf_par)) return cur_score
def get_accuracy(self, processor_name, processor_par, clf_name, clf_par, topic_name, topic_par, id): start_time = time.clock() Xs, Ys = shuffle(self.X, self.Y) clf = make_pipeline(globals()[processor_name](**processor_par), DenseTransformer(), globals()[topic_name](**topic_par), globals()[clf_name](**clf_par)) scores = cross_val_score(clf, Xs, Ys, cv=5, scoring='f1_macro') cur_score = scores.mean() duration = time.clock() - start_time LOGGER.info( '#test: %d acc: %0.3f time: %.3fs classifier: "%s" parameter: "%s" processor: "%s" processor_parameter: "%s"' % (id, cur_score, duration, clf_name, clf_par, processor_name, processor_par)) return cur_score
def api_upload(): file_dir = os.path.join(basedir, app.config['UPLOAD_FOLDER']) if not os.path.exists(file_dir): os.makedirs(file_dir) face_detect = FaceDetect(os.path.join(WEB_PATH, 'upload'), os.path.join(WEB_PATH, 'upload_flip'), os.path.join(basedir, 'static', 'result')) if request.files: f = request.files['nettyFile'] if 'nettyFile' in request.files.keys( ) else None if f and allowed_file(f.filename): # 判断是否是允许上传的文件类型 f_name = secure_filename(f.filename) ext = f_name.rsplit('.', 1) # 获取文件后缀 unix_time = int(time.time()) new_filename = ext[0] + '_' + str(unix_time) + '.' + ext[ 1] # 修改了上传的文件名 f.save(os.path.join(file_dir, new_filename)) # 保存文件到upload目录 faces = face_detect.detectImg(new_filename) return jsonify(faces) else: return jsonify({"error_code": -2, "error_msg": "上传失败"}) file_base64 = None # 保存文件扩展名 用户userid fileKey userip device log_dict = {} if request.form: for key in request.form.keys(): if key == 'nettyFile': # 文件base64解码并且保存 其他参数日志打印 f = request.form.get(key) file_base64 = base64.b64decode(f) else: log_dict[key] = request.form.get(key) # 从表单的file字段获取文件,myfile为该表单的name值 LOGGER.info(log_dict) if file_base64 is not None: file_name = log_dict['fileKey'] + '_' + str(int( time.time())) + '.' + log_dict['fileType'] with open(os.path.join(file_dir, file_name), 'wb') as f: f.write(file_base64) f.close() faces = face_detect.detectImg(file_name) z = log_dict.copy() z.update(faces) LOGGER.info("result:" + json.dumps(z)) return jsonify(faces) else: return jsonify({"error_code": -1, "error_msg": "未获取到文件"})
def faceDetect(self, imageName, haarxml, imagePathPrefix="image" + os.sep): LOGGER.info(imageName) exData = {"image_name": imageName} exData.update({"image_path_name": imagePathPrefix + imageName}) # Create the haar cascade faceCascade = cv2.CascadeClassifier(haarxml) # Read the image image = cv2.imread(imagePathPrefix + imageName) LOGGER.info("----图片大小,长:%s ---宽:%s", str(image.shape[1]), str(image.shape[0])) exData.update({ "image_width": image.shape[0], "image_height": image.shape[1] }) gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # Detect faces in the image faces = faceCascade.detectMultiScale(gray, scaleFactor=self.scaleFactor, minNeighbors=5, minSize=(30, 30), flags=cv2.CASCADE_SCALE_IMAGE) LOGGER.info("----Found %s faces!", len(faces)) resultImg = image detected_face = [] faceXywh = [] # Draw a rectangle around the faces for index, (x, y, w, h) in enumerate(faces): x = int(x) y = int(y) w = int(w) h = int(h) faceXywh.append([x, y, w, h]) ul = (x, y) ll = (x, y + h) ur = (x + w, y) lr = (x + w, y + h) LOGGER.info("----face[%s]位置%s, %s, %s, %s", str(index + 1), ul, ll, ur, lr) detected_face.append([ul, ll, ur, lr]) resultImg = cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2) exData.update({"face_num": len(faces)}) exData.update({"detected_face": detected_face}) exData.update({"faces": faceXywh}) if len(faces) != 0: cv2.imwrite( flip.reName(self.resultPathPrefix + imageName, "-result"), resultImg) return exData
def run(self) -> None: while True: cur_job = self.pending_q.get() # type: PredictJob LOGGER.info('job received! repeat: %d classifier: "%s" parameter: "%s"' % (cur_job.num_repeat, cur_job.clf_name, cur_job.clf_par)) if cur_job.clf_name in globals(): try: acc = [] cur_job.start_time = now_int() for j in range(cur_job.num_repeat): cur_score = self.get_accuracy(cur_job.clf_name, cur_job.clf_par, j) acc.append(cur_score) if len(acc) == 2 and abs(acc[0] - cur_score) < 1e-3: LOGGER.info('%s is invariant to training data shuffling, will stop repeating!' % cur_job.clf_name) break cur_job.done_time = now_int() test_info = { 'name': cur_job.clf_name, 'parameter': cur_job.clf_par, 'score': acc, 'start_time': cur_job.start_time, 'done_time': cur_job.done_time, 'num_repeat': len(acc), 'mean_accuracy': np.array(acc).mean(), 'std_accuracy': np.array(acc).std() * 2, 'time_per_repeat': int((cur_job.done_time - cur_job.start_time) / len(acc)) } JSON_LOGGER.info(json.dumps(test_info, sort_keys=True)) LOGGER.info('done! acc: %0.3f (+/- %0.3f) repeated: %d classifier: "%s" ' 'parameter: "%s" ' % (np.array(acc).mean(), np.array(acc).std() * 2, len(acc), cur_job.clf_name, cur_job.clf_par)) except Exception as e: LOGGER.error('%s with %s failed! reason: %s' % (cur_job.clf_name, cur_job.clf_par, e)) else: LOGGER.error('Can not found "%s" in scikit-learn, missing import?' % cur_job.clf_name)
def _sanity_check(self, all_tasks): total_clf = 0 failed_clf = 0 Xt, Yt = mnist_reader.load_mnist(path=DATA_DIR, kind='t10k') Xt = preprocessing.StandardScaler().fit_transform(Xt) Xs, Ys = shuffle(Xt, Yt) num_dummy = 10 Xs = Xs[:num_dummy] Ys = [j for j in range(10)] valid_jobs = [] for v in all_tasks: clf_name = list(v.keys())[0] clf_par = list(v.values())[0] total_clf += 1 try: globals()[clf_name](**clf_par).fit(Xs, Ys) valid_jobs.append(PredictJob(clf_name, clf_par, self.num_repeat)) except Exception as e: failed_clf += 1 LOGGER.error('Can not create classifier "%s" with parameter "%s". Reason: %s' % (clf_name, clf_par, e)) LOGGER.info('%d classifiers to test, %d fail to create!' % (total_clf, failed_clf)) return valid_jobs
def _sanity_check(self, all_tasks): total_clf = 0 failed_clf = 0 newsgroups = fetch_20newsgroups(subset='train', shuffle=True, random_state=2019, remove=('headers', 'footers', 'quotes')) Xt, Yt = newsgroups.data, newsgroups.target Xs, Ys = shuffle(Xt, Yt) num_dummy = 10 Xs = Xs[:num_dummy] Ys = Ys[:num_dummy] valid_jobs = [] for v in all_tasks: processor_name = list(v[0].keys())[0] processor_par = list(v[0].values())[0] clf_name = list(v[1].keys())[0] clf_par = list(v[1].values())[0] topic_name = list(v[2].keys())[0] topic_par = list(v[2].values())[0] total_clf += 1 try: make_pipeline(globals()[processor_name](**processor_par), DenseTransformer(), globals()[topic_name](**topic_par), globals()[clf_name](**clf_par)).fit(Xs, Ys) valid_jobs.append( PredictJob(processor_name, processor_par, clf_name, clf_par, topic_name, topic_par, self.num_repeat)) except Exception as e: failed_clf += 1 LOGGER.error( 'Can not create classifier "%s" with parameter "%s". Reason: %s' % (clf_name, clf_par, e)) LOGGER.info('%d classifiers to test, %d fail to create!' % (total_clf, failed_clf)) return valid_jobs
def startWithDir(self): imageDir = self.imagePathPrefix images = self.getFileList(imageDir) LOGGER.info("\n======人脸检测======\n\n") for i in range(images.__len__()): LOGGER.info(self.detectImg(images[i])) LOGGER.info("\n\n")
def upload_result_s3(): LOGGER.info("Syncing data to S3...") with open(LOG_PATH, 'a', 1) as logfile: proc = subprocess.Popen("bash %s %s" % (SYNC_SCRIPT_PATH, RESULT_PATH), shell=True, stdin=subprocess.PIPE, stdout=logfile, stderr=logfile, cwd=ROOT_DIR, env=os.environ) # we have to wait until the training data is downloaded try: outs, errs = proc.communicate(timeout=SYNC_TIMEOUT) if outs: LOGGER.info(outs) if errs: LOGGER.error(errs) except subprocess.TimeoutExpired: proc.kill()
def result(): query = request.args['sentence'] res = [(0, 0, 0)] mdls = ['Tf-Idf', 'BM25', 'FastText', 'Elmo'] try: model_id = int(request.args['model']) - 1 except: model_id = 0 LOGGER.info('Пользователь не выбрал модель, устанавливаем Tf-Idf') LOGGER.info(f'Запрос: {query}; модель: {mdls[model_id]}') if model_id == 0: models.init_tfidf() res = models.tfidf.search(query) elif model_id == 1: models.init_bm25() res = models.bm25.search(query) elif model_id == 2: models.init_fasttext() res = models.fasttext.search(query) elif model_id == 3: models.init_elmo() res = models.elmosearch.search(query) LOGGER.info(f'Результаты запроса: {res}') return render_template('result.html', results=res, query=query)
def memory_guard(self): LOGGER.info('memory usage: %.1f%%, RESPAWN_LIMIT: %.1f%%', psutil.virtual_memory()[2], self.respawn_memory_pct) if psutil.virtual_memory()[2] > self.respawn_memory_pct: LOGGER.warn('releasing memory now! kill iterator processes and restart!') self.restart()
import threading from benchmark.runner import JobManager from configs import LOGGER from utils.argparser import get_args_cli from utils.helper import UploadS3Thread def start_s3_sync(): stop_flag = threading.Event() upload_s3_thread = UploadS3Thread(stop_flag) upload_s3_thread.start() if __name__ == "__main__": arg_dict = get_args_cli() LOGGER.info('received task with args: %s' % arg_dict) start_s3_sync() jm = JobManager(**arg_dict) jm.start()