Beispiel #1
0
    def detectImg(self, imageName):
        cascPath1 = os.path.join(XML_PATH,
                                 "haarcascade_frontalface_default.xml")
        cascPath2 = os.path.join(XML_PATH, "haarcascade_profileface.xml")

        face_type = "front"
        result = self.faceDetect(imageName, cascPath1, self.imagePathPrefix)
        result.update({"face_type": face_type})
        response = {"error_code": "0", "ex_data": result}
        if result['face_num'] == 0:
            LOGGER.info("正脸检测不到人脸,对%s做侧脸检测", imageName)
            result2 = self.faceDetect(imageName, cascPath2,
                                      self.imagePathPrefix)
            if result2['face_num'] == 0:
                LOGGER.info("侧脸检测不到人脸,对%s做水平翻转后侧脸检测", imageName)
                imageHName = flip.flipHorizontal(imageName,
                                                 self.imageHPathPrefix,
                                                 self.imagePathPrefix)
                result3 = self.faceDetect(imageHName, cascPath2,
                                          self.imageHPathPrefix)
                if result3['face_num'] == 0:
                    result.update(result3)
                else:
                    face_type = "profile_h"
                    result.update(result3)
            else:
                face_type = "profile"
                result.update(result2)
        result.update({
            "suggest_msg": self.getSuggestMsg(result, face_type),
            "face_type": face_type
        })
        response.update({"error_code": self.getErrorCode(result)})
        return response
Beispiel #2
0
 def memory_guard(self):
     LOGGER.info('memory usage: %.1f%%, RESPAWN_LIMIT: %.1f%%',
                 psutil.virtual_memory()[2], self.respawn_memory_pct)
     if psutil.virtual_memory()[2] > self.respawn_memory_pct:
         LOGGER.warn(
             'releasing memory now! kill iterator processes and restart!')
         self.restart()
Beispiel #3
0
 def _sanity_check(self, all_tasks):
     total_clf = 0
     failed_clf = 0
     Xt, Yt = mnist_reader.load_mnist(path=DATA_DIR, kind='t10k')
     Xt = preprocessing.StandardScaler().fit_transform(Xt.astype(float))
     Xs, Ys = shuffle(Xt, Yt)
     num_dummy = 10
     Xs = Xs[:num_dummy]
     Ys = [j for j in range(10)]
     valid_jobs = []
     for v in all_tasks:
         clf_name = list(v.keys())[0]
         clf_par = list(v.values())[0]
         total_clf += 1
         try:
             globals()[clf_name](**clf_par).fit(Xs, Ys)
             valid_jobs.append(
                 PredictJob(clf_name, clf_par, self.num_repeat))
         except Exception as e:
             failed_clf += 1
             LOGGER.error(
                 'Can not create classifier "%s" with parameter "%s". Reason: %s'
                 % (clf_name, clf_par, e))
     LOGGER.info('%d classifiers to test, %d fail to create!' %
                 (total_clf, failed_clf))
     return valid_jobs
Beispiel #4
0
    def run(self) -> None:
        while True:
            cur_job = self.pending_q.get()  # type: PredictJob

            LOGGER.info(
                'job received! repeat: %d classifier: "%s" parameter: "%s"' %
                (cur_job.num_repeat, cur_job.clf_name, cur_job.clf_par))
            if cur_job.clf_name in globals():
                try:
                    acc = []
                    cur_job.start_time = now_int()
                    for j in range(cur_job.num_repeat):
                        cur_score = self.get_accuracy(cur_job.clf_name,
                                                      cur_job.clf_par, j)
                        acc.append(cur_score)
                        if len(acc) == 2 and abs(acc[0] - cur_score) < 1e-3:
                            LOGGER.info(
                                '%s is invariant to training data shuffling, will stop repeating!'
                                % cur_job.clf_name)
                            break
                    cur_job.done_time = now_int()
                    test_info = {
                        'name':
                        cur_job.clf_name,
                        'parameter':
                        cur_job.clf_par,
                        'score':
                        acc,
                        'start_time':
                        cur_job.start_time,
                        'done_time':
                        cur_job.done_time,
                        'num_repeat':
                        len(acc),
                        'mean_accuracy':
                        np.array(acc).mean(),
                        'std_accuracy':
                        np.array(acc).std() * 2,
                        'time_per_repeat':
                        int((cur_job.done_time - cur_job.start_time) /
                            len(acc))
                    }

                    JSON_LOGGER.info(json.dumps(test_info, sort_keys=True))

                    LOGGER.info(
                        'done! acc: %0.3f (+/- %0.3f) repeated: %d classifier: "%s" '
                        'parameter: "%s" ' %
                        (np.array(acc).mean(), np.array(acc).std() * 2,
                         len(acc), cur_job.clf_name, cur_job.clf_par))
                except Exception as e:
                    LOGGER.error('%s with %s failed! reason: %s' %
                                 (cur_job.clf_name, cur_job.clf_par, e))
            else:
                LOGGER.error(
                    'Can not found "%s" in scikit-learn, missing import?' %
                    cur_job.clf_name)
Beispiel #5
0
 def get_accuracy(self, clf_name, clf_par, id):
     start_time = time.clock()
     clf = globals()[clf_name](**clf_par)
     Xs, Ys = shuffle(self.X, self.Y)
     cur_score = clf.fit(Xs, Ys).score(self.Xt, self.Yt)
     duration = time.clock() - start_time
     LOGGER.info(
         '#test: %d acc: %0.3f time: %.3fs classifier: "%s" parameter: "%s"'
         % (id, cur_score, duration, clf_name, clf_par))
     return cur_score
Beispiel #6
0
 def get_accuracy(self, clf_name, clf_par, id):
     start_time = time.clock()
     clf = globals()[clf_name](**clf_par)
     Xs, Ys = shuffle(self.X, self.Y)
     cur_score = clf.fit(Xs, Ys).score(self.Xt, self.Yt)
     duration = time.clock() - start_time
     LOGGER.info('#test: %d acc: %0.3f time: %.3fs classifier: "%s" parameter: "%s"' % (id, cur_score,
                                                                                        duration,
                                                                                        clf_name,
                                                                                        clf_par))
     return cur_score
Beispiel #7
0
 def get_accuracy(self, processor_name, processor_par, clf_name, clf_par,
                  topic_name, topic_par, id):
     start_time = time.clock()
     Xs, Ys = shuffle(self.X, self.Y)
     clf = make_pipeline(globals()[processor_name](**processor_par),
                         DenseTransformer(),
                         globals()[topic_name](**topic_par),
                         globals()[clf_name](**clf_par))
     scores = cross_val_score(clf, Xs, Ys, cv=5, scoring='f1_macro')
     cur_score = scores.mean()
     duration = time.clock() - start_time
     LOGGER.info(
         '#test: %d acc: %0.3f time: %.3fs classifier: "%s" parameter: "%s" processor: "%s" processor_parameter: "%s"'
         % (id, cur_score, duration, clf_name, clf_par, processor_name,
            processor_par))
     return cur_score
Beispiel #8
0
def api_upload():
    file_dir = os.path.join(basedir, app.config['UPLOAD_FOLDER'])
    if not os.path.exists(file_dir):
        os.makedirs(file_dir)
    face_detect = FaceDetect(os.path.join(WEB_PATH, 'upload'),
                             os.path.join(WEB_PATH, 'upload_flip'),
                             os.path.join(basedir, 'static', 'result'))
    if request.files:
        f = request.files['nettyFile'] if 'nettyFile' in request.files.keys(
        ) else None
        if f and allowed_file(f.filename):  # 判断是否是允许上传的文件类型
            f_name = secure_filename(f.filename)
            ext = f_name.rsplit('.', 1)  # 获取文件后缀
            unix_time = int(time.time())
            new_filename = ext[0] + '_' + str(unix_time) + '.' + ext[
                1]  # 修改了上传的文件名
            f.save(os.path.join(file_dir, new_filename))  # 保存文件到upload目录
            faces = face_detect.detectImg(new_filename)
            return jsonify(faces)
        else:
            return jsonify({"error_code": -2, "error_msg": "上传失败"})
    file_base64 = None
    # 保存文件扩展名 用户userid fileKey userip device
    log_dict = {}
    if request.form:
        for key in request.form.keys():
            if key == 'nettyFile':
                # 文件base64解码并且保存 其他参数日志打印
                f = request.form.get(key)
                file_base64 = base64.b64decode(f)
            else:
                log_dict[key] = request.form.get(key)
    # 从表单的file字段获取文件,myfile为该表单的name值
    LOGGER.info(log_dict)
    if file_base64 is not None:
        file_name = log_dict['fileKey'] + '_' + str(int(
            time.time())) + '.' + log_dict['fileType']
        with open(os.path.join(file_dir, file_name), 'wb') as f:
            f.write(file_base64)
            f.close()
        faces = face_detect.detectImg(file_name)
        z = log_dict.copy()
        z.update(faces)
        LOGGER.info("result:" + json.dumps(z))
        return jsonify(faces)
    else:
        return jsonify({"error_code": -1, "error_msg": "未获取到文件"})
Beispiel #9
0
    def faceDetect(self, imageName, haarxml, imagePathPrefix="image" + os.sep):
        LOGGER.info(imageName)
        exData = {"image_name": imageName}
        exData.update({"image_path_name": imagePathPrefix + imageName})
        # Create the haar cascade
        faceCascade = cv2.CascadeClassifier(haarxml)

        # Read the image
        image = cv2.imread(imagePathPrefix + imageName)
        LOGGER.info("----图片大小,长:%s ---宽:%s", str(image.shape[1]),
                    str(image.shape[0]))
        exData.update({
            "image_width": image.shape[0],
            "image_height": image.shape[1]
        })
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

        # Detect faces in the image
        faces = faceCascade.detectMultiScale(gray,
                                             scaleFactor=self.scaleFactor,
                                             minNeighbors=5,
                                             minSize=(30, 30),
                                             flags=cv2.CASCADE_SCALE_IMAGE)

        LOGGER.info("----Found %s faces!", len(faces))

        resultImg = image
        detected_face = []
        faceXywh = []
        # Draw a rectangle around the faces
        for index, (x, y, w, h) in enumerate(faces):
            x = int(x)
            y = int(y)
            w = int(w)
            h = int(h)

            faceXywh.append([x, y, w, h])
            ul = (x, y)
            ll = (x, y + h)
            ur = (x + w, y)
            lr = (x + w, y + h)
            LOGGER.info("----face[%s]位置%s, %s, %s, %s", str(index + 1), ul, ll,
                        ur, lr)
            detected_face.append([ul, ll, ur, lr])
            resultImg = cv2.rectangle(image, (x, y), (x + w, y + h),
                                      (0, 255, 0), 2)
        exData.update({"face_num": len(faces)})
        exData.update({"detected_face": detected_face})
        exData.update({"faces": faceXywh})
        if len(faces) != 0:
            cv2.imwrite(
                flip.reName(self.resultPathPrefix + imageName, "-result"),
                resultImg)

        return exData
Beispiel #10
0
    def run(self) -> None:
        while True:
            cur_job = self.pending_q.get()  # type: PredictJob

            LOGGER.info('job received! repeat: %d classifier: "%s" parameter: "%s"' % (cur_job.num_repeat,
                                                                                       cur_job.clf_name,
                                                                                       cur_job.clf_par))
            if cur_job.clf_name in globals():
                try:
                    acc = []
                    cur_job.start_time = now_int()
                    for j in range(cur_job.num_repeat):
                        cur_score = self.get_accuracy(cur_job.clf_name, cur_job.clf_par, j)
                        acc.append(cur_score)
                        if len(acc) == 2 and abs(acc[0] - cur_score) < 1e-3:
                            LOGGER.info('%s is invariant to training data shuffling, will stop repeating!' %
                                        cur_job.clf_name)
                            break
                    cur_job.done_time = now_int()
                    test_info = {
                        'name': cur_job.clf_name,
                        'parameter': cur_job.clf_par,
                        'score': acc,
                        'start_time': cur_job.start_time,
                        'done_time': cur_job.done_time,
                        'num_repeat': len(acc),
                        'mean_accuracy': np.array(acc).mean(),
                        'std_accuracy': np.array(acc).std() * 2,
                        'time_per_repeat': int((cur_job.done_time - cur_job.start_time) / len(acc))
                    }

                    JSON_LOGGER.info(json.dumps(test_info, sort_keys=True))

                    LOGGER.info('done! acc: %0.3f (+/- %0.3f) repeated: %d classifier: "%s" '
                                'parameter: "%s" ' % (np.array(acc).mean(),
                                                      np.array(acc).std() * 2,
                                                      len(acc),
                                                      cur_job.clf_name,
                                                      cur_job.clf_par))
                except Exception as e:
                    LOGGER.error('%s with %s failed! reason: %s' % (cur_job.clf_name, cur_job.clf_par, e))
            else:
                LOGGER.error('Can not found "%s" in scikit-learn, missing import?' % cur_job.clf_name)
Beispiel #11
0
 def _sanity_check(self, all_tasks):
     total_clf = 0
     failed_clf = 0
     Xt, Yt = mnist_reader.load_mnist(path=DATA_DIR, kind='t10k')
     Xt = preprocessing.StandardScaler().fit_transform(Xt)
     Xs, Ys = shuffle(Xt, Yt)
     num_dummy = 10
     Xs = Xs[:num_dummy]
     Ys = [j for j in range(10)]
     valid_jobs = []
     for v in all_tasks:
         clf_name = list(v.keys())[0]
         clf_par = list(v.values())[0]
         total_clf += 1
         try:
             globals()[clf_name](**clf_par).fit(Xs, Ys)
             valid_jobs.append(PredictJob(clf_name, clf_par, self.num_repeat))
         except Exception as e:
             failed_clf += 1
             LOGGER.error('Can not create classifier "%s" with parameter "%s". Reason: %s' % (clf_name, clf_par, e))
     LOGGER.info('%d classifiers to test, %d fail to create!' % (total_clf, failed_clf))
     return valid_jobs
Beispiel #12
0
 def _sanity_check(self, all_tasks):
     total_clf = 0
     failed_clf = 0
     newsgroups = fetch_20newsgroups(subset='train',
                                     shuffle=True,
                                     random_state=2019,
                                     remove=('headers', 'footers',
                                             'quotes'))
     Xt, Yt = newsgroups.data, newsgroups.target
     Xs, Ys = shuffle(Xt, Yt)
     num_dummy = 10
     Xs = Xs[:num_dummy]
     Ys = Ys[:num_dummy]
     valid_jobs = []
     for v in all_tasks:
         processor_name = list(v[0].keys())[0]
         processor_par = list(v[0].values())[0]
         clf_name = list(v[1].keys())[0]
         clf_par = list(v[1].values())[0]
         topic_name = list(v[2].keys())[0]
         topic_par = list(v[2].values())[0]
         total_clf += 1
         try:
             make_pipeline(globals()[processor_name](**processor_par),
                           DenseTransformer(),
                           globals()[topic_name](**topic_par),
                           globals()[clf_name](**clf_par)).fit(Xs, Ys)
             valid_jobs.append(
                 PredictJob(processor_name, processor_par, clf_name,
                            clf_par, topic_name, topic_par,
                            self.num_repeat))
         except Exception as e:
             failed_clf += 1
             LOGGER.error(
                 'Can not create classifier "%s" with parameter "%s". Reason: %s'
                 % (clf_name, clf_par, e))
     LOGGER.info('%d classifiers to test, %d fail to create!' %
                 (total_clf, failed_clf))
     return valid_jobs
Beispiel #13
0
    def startWithDir(self):
        imageDir = self.imagePathPrefix

        images = self.getFileList(imageDir)

        LOGGER.info("\n======人脸检测======\n\n")
        for i in range(images.__len__()):
            LOGGER.info(self.detectImg(images[i]))
            LOGGER.info("\n\n")
Beispiel #14
0
def upload_result_s3():
    LOGGER.info("Syncing data to S3...")
    with open(LOG_PATH, 'a', 1) as logfile:
        proc = subprocess.Popen("bash %s %s" % (SYNC_SCRIPT_PATH, RESULT_PATH),
                                shell=True,
                                stdin=subprocess.PIPE,
                                stdout=logfile,
                                stderr=logfile,
                                cwd=ROOT_DIR,
                                env=os.environ)

        # we have to wait until the training data is downloaded
        try:
            outs, errs = proc.communicate(timeout=SYNC_TIMEOUT)
            if outs:
                LOGGER.info(outs)
            if errs:
                LOGGER.error(errs)
        except subprocess.TimeoutExpired:
            proc.kill()
Beispiel #15
0
def result():
    query = request.args['sentence']
    res = [(0, 0, 0)]
    mdls = ['Tf-Idf', 'BM25', 'FastText', 'Elmo']
    try:
        model_id = int(request.args['model']) - 1
    except:
        model_id = 0
        LOGGER.info('Пользователь не выбрал модель, устанавливаем Tf-Idf')
    LOGGER.info(f'Запрос: {query}; модель: {mdls[model_id]}')
    if model_id == 0:
        models.init_tfidf()
        res = models.tfidf.search(query)
    elif model_id == 1:
        models.init_bm25()
        res = models.bm25.search(query)
    elif model_id == 2:
        models.init_fasttext()
        res = models.fasttext.search(query)
    elif model_id == 3:
        models.init_elmo()
        res = models.elmosearch.search(query)
    LOGGER.info(f'Результаты запроса: {res}')
    return render_template('result.html', results=res, query=query)
Beispiel #16
0
 def memory_guard(self):
     LOGGER.info('memory usage: %.1f%%, RESPAWN_LIMIT: %.1f%%',
                 psutil.virtual_memory()[2], self.respawn_memory_pct)
     if psutil.virtual_memory()[2] > self.respawn_memory_pct:
         LOGGER.warn('releasing memory now! kill iterator processes and restart!')
         self.restart()
Beispiel #17
0
import threading

from benchmark.runner import JobManager
from configs import LOGGER
from utils.argparser import get_args_cli
from utils.helper import UploadS3Thread


def start_s3_sync():
    stop_flag = threading.Event()
    upload_s3_thread = UploadS3Thread(stop_flag)
    upload_s3_thread.start()


if __name__ == "__main__":
    arg_dict = get_args_cli()
    LOGGER.info('received task with args: %s' % arg_dict)
    start_s3_sync()
    jm = JobManager(**arg_dict)
    jm.start()