Ejemplo n.º 1
0
def success():
    try:
        if request.method == 'POST':
            for tmp_path in glob.glob(os.path.join(INPUT_DIR, "*.*")):
                os.remove(tmp_path)
            for tmp_path in glob.glob(os.path.join(UPLOAD_FOLDER, "*.*")):
                os.remove(tmp_path)

            if 'file' not in request.files:
                flash('No file part')
                return redirect(request.url)
            file = request.files['file']
            if file.filename == '':
                message = 'Click on Choose File button to select picture before uploading'
                return render_template('file_upload_form.html',
                                       messages=message,
                                       filename=None,
                                       data=None)
            file_path = os.path.join(INPUT_DIR, secure_filename(file.filename))
            file.save(file_path)

            filename, result_info = social_estimator.process_one_frame(
                frame_path=file_path)

            message = 'Image successfully uploaded and Estimated'
            return render_template('file_upload_form.html',
                                   messages=message,
                                   filename=filename,
                                   data=result_info)
    except Exception as e:
        log_print(info_str=e)
        return render_template('file_upload_form.html',
                               messages=e,
                               filename=None,
                               data=None)
Ejemplo n.º 2
0
    def perform_ocr(self):
        # upload_files_len = len(glob.glob(os.path.join(OUTPUT_DIR, "*.pdf")))
        # download_files_len = len(glob.glob(os.path.join(INPUT_DIR, "*.pdf")))
        while True:
            input_files = glob.glob(os.path.join(INPUT_DIR, "*.*"))
            for pdf_path in input_files:
                try:
                    pdf_name = ntpath.basename(pdf_path)
                    extension = pdf_name[pdf_name.rfind(".") + 1:]
                    if extension != "pdf":
                        continue
                    if pdf_name not in self.processed_files:
                        print(f"[INFO] {pdf_name} processing...")
                        extracted_info = self.pdf_extractor.main(
                            pdf_path=pdf_path)
                        output_pdf_path = self.pdf_creator.repopulate_pdf(
                            info=extracted_info, pdf_name=pdf_name)
                        self.s3_manager.upload_files(file_path=output_pdf_path)
                        self.processed_files.append(pdf_name)
                except Exception as e:
                    log_print(e)
            # upload_files_len = len(glob.glob(os.path.join(OUTPUT_DIR, "*.pdf")))
            # download_files_len = len(glob.glob(os.path.join(INPUT_DIR, "*.pdf")))

            content = ""
            for i, file_name in enumerate(self.processed_files):
                if i < len(self.processed_files) - 1:
                    content += file_name + "\n"
                else:
                    content += file_name
            save_file(content=content, filename=PROCESSED_FILE, method='w')
Ejemplo n.º 3
0
    def run(self):
        self.check_server()
        # self.show_green_string('yes')
        # self.scroll_message('test')
        # signall = "turn on yellow"
        # win.get_event()
        while True:
            try:
                if self.terminate_flag:
                    print(self.terminate_flag)
                    break
                self.receive_data()
                self.send_data()
            except Exception as e:
                log_print(info_str=e)
                time.sleep(0.05)
                connected = False
                print(" ### Client Disconnected ")
                while not connected:
                    try:
                        self.check_server()
                        connected = True
                        print(" ### Client Reconnected")
                    except socket.error:
                        pass

            if self.send_data_time is not None:
                now = time.time()
                if now - self.send_data_time > 5:
                    self.window.show_init_screen()
                    self.send_data_time = None

            time.sleep(0.01)
Ejemplo n.º 4
0
def save_recorded_video():
    try:
        video = request.files['video']
        file_name = request.form['info']
        video_path = os.path.join(VIDEO_INPUT_DIR, file_name)
        video.save(video_path)
        return json.dumps({'result': "success"})

    except Exception as e:
        log_print(e)
        return json.dumps({'result': "fail"})
Ejemplo n.º 5
0
    def main(self, pdf_files):
        self.multi_pdf_result = {}
        for p_file in pdf_files:
            p_file_name = ntpath.basename(p_file).replace(".pdf", "")
            try:
                invoice_result = self.invoice_extractor.run(pdf_path=p_file, pdf_file_name=p_file_name)
                self.multi_pdf_result[ntpath.basename(p_file)] = invoice_result
            except Exception as e:
                log_print(e)

        return self.multi_pdf_result
Ejemplo n.º 6
0
 def check_server(self):
     print("\n[!] Checking server")
     while True:
         try:
             self.connection = socket.socket(socket.AF_INET,
                                             socket.SOCK_STREAM)
             self.connection.connect((self.host, self.port))
             print("\n[!] Server is running!")
             break
         except Exception as e:
             print("\n[!] Server is not running!")
             log_print(info_str=e)
             time.sleep(0.2)
Ejemplo n.º 7
0
    def show_recognition_result(self, birth, age, statue):
        try:
            birth = birth.replace(",", ".")
        except Exception as e:
            log_print(info_str=e)
            pass
        self.birth_value.setText(birth)
        self.age_value.setText(str(age))
        self.statue_value.setText(statue)

        self.card_sys.send_card_recog_data_flag = True
        self.card_sys.card_recog_data = statue
        self.card_sys.card_face_image = self.card_face_image
Ejemplo n.º 8
0
def success():
    if request.method == 'POST':
        f = request.files['file']
        file_path = os.path.join(UPLOAD_DIR, secure_filename(f.filename))
        f.save(file_path)
        file_name_ext = ntpath.basename(file_path)
        file_name = file_name_ext[:file_name_ext.find(".")]
        saved_path = ocr_extractor.process_ocr_text(frame_path=file_path,
                                                    file_name=file_name)
        saved_file_name = ntpath.basename(saved_path)
        log_print(info_str="Created {}".format(saved_path))

        return send_file(os.path.join(OUTPUT_DIR, saved_file_name),
                         as_attachment=True)
Ejemplo n.º 9
0
def extract_roi_info(json_result, roi_left, roi_right, roi_top, roi_bottom, pdf_type, y_top=None, label=None):
    roi_info = []
    roi_info_txt = ""
    for j_info in json_result:
        try:
            if label == "Lieferschein Nr" and not j_info["description"].replace("-", "").isdigit():
                continue
            if pdf_type == "type7_B" and label == "Volume" and j_info["description"].isalpha():
                continue
            if pdf_type == "type11" and label == "Lieferschein Nr" and j_info["description"] == "8404":
                continue
            if roi_left <= j_info["boundingPoly"]["vertices"][0]["x"] < roi_right and \
                    roi_top <= j_info["boundingPoly"]["vertices"][0]["y"] < roi_bottom:
                roi_info.append(j_info)
        except Exception as e:
            # print(e)
            log_print(info_str=e)
            if y_top is not None:
                if roi_left <= j_info["boundingPoly"]["vertices"][0]["x"] < roi_right and \
                        roi_top < y_top <= roi_bottom:
                    roi_info.append(j_info)
    sorted_y_roi_info = sorted(roi_info, key=lambda k: k["boundingPoly"]["vertices"][0]["y"])
    bind_y_close = []
    tmp_line = []
    init_value = sorted_y_roi_info[0]["boundingPoly"]["vertices"][0]["y"]
    for s_y_info in sorted_y_roi_info:
        if abs(init_value - s_y_info["boundingPoly"]["vertices"][0]["y"]) < Y_BIND_THREAD:
            tmp_line.append(s_y_info)
        else:
            bind_y_close.append(tmp_line[:])
            tmp_line.clear()
            tmp_line.append(s_y_info)
            init_value = s_y_info["boundingPoly"]["vertices"][0]["y"]

    bind_y_close.append(tmp_line[:])

    for b_y_info in bind_y_close:
        sorted_x_info = sorted(b_y_info, key=lambda k: k["boundingPoly"]["vertices"][0]["x"])
        if pdf_type == "type5" and label == "DTS_Date":
            ret_date = False
            for candi in sorted_x_info:
                if ":" in candi["description"]:
                    ret_date = True
                    break
            if not ret_date:
                continue
        for candi in sorted_x_info:
            roi_info_txt += candi["description"] + " "

    return roi_info_txt.replace("..", ".")
Ejemplo n.º 10
0
    def check_face_from_db(self, face_img):
        try:
            from src.database.manager import DatabaseManager
            person_face_encoding = face_recognition.face_encodings(face_img)[0]

            records = DatabaseManager().select_info_from_db()
            print("records", records)
            db_face_encoding_list = []
            db_face_statue_list = []
            db_face_date_list = []

            if records.__len__() > 0:
                for record in records:
                    encoding = np.array(record[3].split(" "), dtype=float)
                    db_face_encoding_list.append(encoding)
                    db_face_statue_list.append(record[2])
                    db_face_date_list.append(record[1])
                print("record success")

                matches = face_recognition.compare_faces(
                    db_face_encoding_list, person_face_encoding)
                print(matches)
                if True in matches:
                    print("Faces match.")
                    match_index = matches.index(True)
                    saved_statue = db_face_statue_list[match_index]
                    saved_date = db_face_date_list[match_index]

                    self.parent.db_saved_date = saved_date
                    self.parent.db_saved_statue = saved_statue
                    self.parent.stop_flag = True
                    if saved_statue == "allow":
                        self.parent.message = "[Erlaubt!]"
                    else:
                        self.parent.message = "[Nicht erlaubt!]"

                    self.parent.age_guessed_time = time.time()
                else:
                    print("Faces not match.")
                    self.check_db_flag = True
            else:
                print("There is no saved face encoding in db")
                self.check_db_flag = True

        except Exception as e:
            print("Failed checking database, it is face encoding problem")
            log_print(info_str=e)
Ejemplo n.º 11
0
    def get_feature_token_words(self, text):
        sentences = self.text_processor.tokenize_sentence(text=text)
        text_features = []

        for sentence in sentences:
            token_words = self.text_processor.tokenize_word(
                sample=sentence.text)
            for t_word in token_words:
                try:
                    word_feature = self.model[t_word]
                    text_features.append(word_feature)
                except Exception as e:
                    log_print(e)

        text_feature = self.calculate_text_feature(word_features=text_features)

        return text_feature
Ejemplo n.º 12
0
    def recognize_card(self, image, face_right):
        try:
            date = extract_birthday(img=image,
                                    id_type=self.window.document_type,
                                    base_line=face_right)
            date = date.replace(".", "")

            if date != "":
                birth_year = 0
                if self.window.document_type == ID_TYPE[
                        1] or self.window.document_type == ID_TYPE[2]:
                    birth_year = int(date[-4:])
                if self.window.document_type == ID_TYPE[0]:
                    birth_year = 1900 + int(date[-2:])
                    date = date.replace(date[-2:], birth_year)
                birth_month = int(date[2:4])
                birth_day = int(date[0:2])

                init_real_age = int(datetime.date.today().year) - birth_year
                current_month = datetime.date.today().month
                current_day = datetime.date.today().day

                if current_month >= birth_month:
                    if current_day >= birth_day:
                        real_age = init_real_age
                    else:
                        real_age = init_real_age - 1
                else:
                    real_age = init_real_age - 1
                self.real_age = real_age
                if self.real_age >= 18:
                    self.age_statue = "Allow"
                else:
                    self.age_statue = "Not Allow"
                self.window.show_recognition_result(date, self.real_age,
                                                    self.age_statue)
                self.start_flag = False

        except Exception as e:
            log_print(info_str=e)
            pass

        time.sleep(0.01)
Ejemplo n.º 13
0
    def estimate_social_distance(self, frame):

        distance = {}
        for fid_i in self.person_trackers.keys():
            distance["person_{}".format(fid_i)] = {}
            for fid_j in self.person_trackers.keys():
                if fid_i == fid_j:
                    continue
                try:
                    geometry = calculate_real_distance_two_persons(self.person_attributes[fid_i]["box"],
                                                                   self.person_attributes[fid_j]["box"])
                except Exception as e:
                    log_print(info_str=e)
                    geometry = 0
                distance["person_{}".format(fid_i)][fid_j] = geometry

        for fid_i in self.person_trackers.keys():
            left, top, right, bottom = self.person_attributes[fid_i]["box"]
            # text = "person_" + str(fid_i)
            inter_dist = []
            inter_person_id = []
            close_ret = False
            for fid_j in distance["person_{}".format(fid_i)].keys():
                if distance["person_{}".format(fid_i)][fid_j] <= SAFE_DISTANCE:
                    inter_dist.append(distance["person_{}".format(fid_i)][fid_j])
                    inter_person_id.append(fid_j)
                    close_ret = True
            if close_ret:
                # min_dist = min(inter_dist)
                # min_person_id = inter_person_id[inter_dist.index(min_dist)]
                # warning_str = text + ";" + "person_" + str(min_person_id) + ":" + str(min_dist) + "cm"
                cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)
                # cv2.putText(frame, warning_str, (left, max(top - 10, 0)), cv2.FONT_HERSHEY_TRIPLEX, 1,
                #             (0, 0, 255), 2)
            else:
                cv2.rectangle(frame, (left, top), (right, bottom), (0, 255, 0), 2)
                # cv2.putText(frame, text, (left, max(top - 10, 0)), cv2.FONT_HERSHEY_TRIPLEX, 1,
                #             (0, 255, 0), 2)
            cv2.putText(frame, str(fid_i + 1), (left, max(top - 5, 0)), cv2.FONT_HERSHEY_TRIPLEX, 1,
                        (0, 255, 0), 2)

        return frame
Ejemplo n.º 14
0
    def set_detections(self, detections, timestamps):

        # Find the location among all recent face locations where this would belong

        for bbox, timestamp in zip(detections, timestamps):

            idx, dist = self.find_nearest_face(bbox)
            try:
                if dist is not None and dist < 100:

                    self.faces[int(idx)]['bboxes'].append(bbox)
                    self.faces[int(idx)]['timestamps'].append(timestamp)

                    if len(self.faces[int(idx)]['bboxes']) > 7:
                        self.faces[int(idx)]['bboxes'].pop(0)
                        self.faces[int(idx)]['timestamps'].pop(0)

                else:
                    # This is a new face not in the scene before
                    self.faces.append({
                        'timestamps': [timestamp],
                        'bboxes': [bbox]
                    })
            except Exception as e:
                log_print(info_str=e)

        # Clean old detections:

        now = time.time()
        faces_to_remove = []

        for i, face in enumerate(self.faces):
            if now - face['timestamps'][-1] > 0.5:
                faces_to_remove.append(i)

        for i in faces_to_remove:
            try:
                self.faces.pop(i)
            except Exception as e:
                # Face was deleted by other thread.
                log_print(info_str=e)
Ejemplo n.º 15
0
    def face_compare(self):
        try:
            from src.database.manager import DatabaseManager
            face_encoding1 = None
            for i in range(0, self.person_face_image_list.__len__()):
                try:
                    face_encoding1 = face_recognition.face_encodings(
                        self.person_face_image_list[i])[0]
                    break
                except Exception as e:
                    log_print(info_str=e)
                    print("can't find face feature from image")

            face_locations = face_recognition.face_locations(
                self.card_face_image)
            face_encoding2 = face_recognition.face_encodings(
                self.card_face_image, face_locations)

            matches = face_recognition.compare_faces(face_encoding2,
                                                     face_encoding1)
            print(matches)
            if True in matches:
                print("Faces match.")
                self.age_guess_thread.message = "[Erlaubt!]"
                self.age_guess_thread.age_guessed_time = time.time()

                DatabaseManager().save_face_encoding_to_db(
                    face_encoding1, "allow")
            else:
                print("Faces not match.")
                self.age_guess_thread.message = "[Nicht erlaubt!]"
                self.age_guess_thread.age_guessed_time = time.time()

                DatabaseManager().save_face_encoding_to_db(
                    face_encoding1, "not allow")
            self.person_face_image_list = []
        except Exception as e:
            print(
                "Faces not captured or can't not find features from face images!"
            )
            log_print(info_str=e)
Ejemplo n.º 16
0
    def main(self):
        self.socket_connection()
        while True:
            try:
                if self.terminate_flag:
                    break
                self.send_data()
                self.receive_data()

            except Exception as e:
                log_print(info_str=e)
                time.sleep(0.5)
                connected = False
                print(" Client Disconnected ")
                while not connected:
                    try:
                        self.socket_connection()
                        connected = True
                        print(" Client Reconnected")
                    except socket.error:
                        pass
            time.sleep(0.2)
Ejemplo n.º 17
0
    def run(self):
        similarity_values = []

        file_name = ntpath.basename(INPUT_EXCEL_PATH).replace(".xlsx", "")
        output_file_path = os.path.join(
            OUTPUT_DIR, f"{file_name}_{SHEET_NAME}_result.csv")

        input_df = pd.read_excel(INPUT_EXCEL_PATH, sheet_name=SHEET_NAME)
        master_key = input_df.iloc[1, 1]
        master_feature = self.feature_extractor.get_feature_token_words(
            text=master_key)
        statements = input_df.iloc[2:, 3].values.tolist()
        if statements:
            for s_des in statements:
                try:
                    s_des_feature = self.feature_extractor.get_feature_token_words(
                        text=s_des)
                    proximity = cosine_similarity([master_feature],
                                                  [s_des_feature])
                    similarity_values.append(proximity[0][0])
                except Exception as e:
                    similarity_values.append("None")
                    log_print(e)

            output_df = pd.DataFrame([[master_key], statements,
                                      similarity_values]).T
            output_df.to_csv(
                output_file_path,
                index=True,
                header=["Master Key", "Statements", "Proximity Score"],
                mode='w')
            print(f"[INFO] Successfully saved in {output_file_path}")
        else:
            print(
                f"[INFO] There are not any statements to estimate in {SHEET_NAME}"
            )

        return
Ejemplo n.º 18
0
def estimate_frame_rotation(json_result):
    rotation_res = None
    for j_res in json_result["textAnnotations"][1:]:
        try:
            if "Kies" in j_res["description"]:
                if j_res["description"].replace("Kies", "").isdigit():
                    j_res_vertices = j_res["boundingPoly"]["vertices"]
                    if abs(j_res_vertices[0]["y"] - j_res_vertices[1]["y"]) > ROTATION_Y_THREAD:
                        if j_res_vertices[0]["y"] > j_res_vertices[1]["y"]:
                            rotation_res = "clockwise"
                        else:
                            rotation_res = "anti_clockwise"
                    else:
                        if j_res_vertices[0]["x"] > j_res_vertices[1]["x"]:
                            rotation_res = "reflection"
                        else:
                            rotation_res = None
                    break
        except Exception as e:
            print(e)
            log_print(info_str=e)

    return rotation_res
Ejemplo n.º 19
0
    def main(self, pdf_path):
        self.pdf_info = {"report": {}, "unit": [], "motorist": {}, "occupant": {}}
        pdf_images = [np.array(page) for page in convert_from_path(pdf_path, 200)]
        file_name = ntpath.basename(pdf_path).replace(".pdf", "")
        for i, pdf_image in enumerate(pdf_images):
            try:
                pdf_frame_path = os.path.join(PDF_IMAGES_DIR, f"{file_name}_{i}.jpg")
                cv2.imwrite(pdf_frame_path, pdf_image)
                self.pdf_info = self.extract_page_info(pdf_page_frame_path=pdf_frame_path, file_name=file_name, index=i)
            except Exception as e:
                print(e)
                log_print(e)

        for info_key in self.pdf_info.keys():
            if "report_number" in self.pdf_info["report"].keys():
                if info_key == "unit":
                    for unit_info in self.pdf_info[info_key]:
                        unit_info["report_number"] = self.pdf_info["report"]["report_number"]
                elif info_key in ["motorist", "occupant"]:
                    if "unit_1" in self.pdf_info[info_key].keys():
                        self.pdf_info[info_key]["unit_1"]["report_number"] = self.pdf_info["report"]["report_number"]

        return self.pdf_info
Ejemplo n.º 20
0
    def get_feature_token_words(self, text, supported_vocab=None):
        sentences = self.text_processor.tokenize_sentence(text=text)
        text_features = []
        if supported_vocab is not None:
            vocabs = supported_vocab.split(";")
            for vocab in vocabs:
                try:
                    vocab_feature = self.model[vocab.replace(" ", "")]
                    text_features.append(vocab_feature)
                except Exception as e:
                    log_print(e)
        for sentence in sentences:
            token_words = self.text_processor.tokenize_word(
                sample=sentence.text)
            for t_word in token_words:
                try:
                    word_feature = self.model[t_word]
                    text_features.append(word_feature)
                except Exception as e:
                    log_print(e)

        text_feature = self.calculate_text_feature(word_features=text_features)

        return text_feature
Ejemplo n.º 21
0
def run():

    try:

        frame1_url = request.args.get('img1')
        frame2_url = request.args.get('img2')

        frame1_path = get_image_from_url(https_url=frame1_url, file_id="1")
        frame2_path = get_image_from_url(https_url=frame2_url, file_id="2")
        processed_frame1_path = preprocess_image(frame_path=frame1_path, file_id="1")
        processed_frame2_path = preprocess_image(frame_path=frame2_path, file_id="2")
        _, similarity = compare_two_images(frame1_path=processed_frame1_path, frame2_path=processed_frame2_path)
        ss = 1 - math.exp(EXP_CONST * similarity)
        data = {'score': ss}
        response = json.dumps(data)
        for path in glob.glob(os.path.join(CUR_DIR, 'utils', '*.jpg')):
            os.remove(path)

        return response
    except Exception as e:
        log_print(info_str=e)
        data = {'score': 0}
        response = json.dumps(data)
        return response
Ejemplo n.º 22
0
    def process_one_frame(self, frame_path):

        social_distance_result = {"danger": [], "safe": []}

        frame = cv2.imread(frame_path)
        height, width = frame.shape[:2]
        file_name = ntpath.basename(frame_path)

        st_time = time.time()
        boxes, confidences = self.person_detector.detect_person_yolo(
            frame=frame)
        filtered_idx, _ = non_max_suppression_slow(boxes=np.array(boxes),
                                                   keys=range(len(boxes)))
        # filtered_idx = cv2.dnn.NMSBoxes(boxes, confidences, DETECT_CONFIDENCE, OVERLAP_THRESH)
        print(time.time() - st_time)
        if len(filtered_idx) > 0:
            # idf = filtered_idx.flatten()
            center = []
            distance = {}
            for i in filtered_idx:
                (x1, y1) = (boxes[i][0], boxes[i][1])
                (x2, y2) = (boxes[i][2], boxes[i][3])
                center.append([x1, y1, x2, y2])
            for i in range(len(center)):
                distance["person_{}".format(i)] = {}
                for j in range(len(center)):
                    if i == j:
                        continue
                    try:
                        geometry = calculate_real_distance_two_persons(
                            center[i], center[j])
                    except Exception as e:
                        log_print(info_str=e)
                        geometry = 0
                    distance["person_{}".format(i)][j] = geometry

            for i in range(len(center)):
                left, top, right, bottom = center[i]
                text = "person_" + str(i + 1)
                inter_dist = []
                inter_person_id = []
                close_ret = False
                for j in distance["person_{}".format(i)].keys():
                    if i == j:
                        continue
                    if distance["person_{}".format(i)][j] <= SAFE_DISTANCE:
                        inter_dist.append(distance["person_{}".format(i)][j])
                        inter_person_id.append(j)
                        close_ret = True
                if close_ret:
                    min_dist = min(inter_dist)
                    min_person_id = inter_person_id[inter_dist.index(min_dist)]
                    warning_str = text + "; " + "person_" + str(
                        min_person_id + 1) + ":" + str(min_dist) + "cm"
                    social_distance_result["danger"].append(warning_str)
                    cv2.rectangle(frame, (left, top), (right, bottom),
                                  (0, 0, 255), 2)
                    # cv2.putText(frame, warning_str, (x, max(y - 10, 0)), cv2.FONT_HERSHEY_TRIPLEX, 1,
                    #             (0, 0, 255), 2)
                else:
                    social_distance_result["safe"].append(text)
                    cv2.rectangle(frame, (left, top), (right, bottom),
                                  (0, 255, 0), 2)
                cv2.putText(frame, str(i + 1), (left, max(top - 3, 0)),
                            cv2.FONT_HERSHEY_TRIPLEX, height / 1500,
                            (0, 255, 0), 3)

        print(social_distance_result)
        if width >= 800:
            fx = 800 / width
        else:
            fx = 1

        cv2.imwrite(os.path.join(UPLOAD_FOLDER, file_name),
                    cv2.resize(frame, None, fx=fx, fy=fx))
        # cv2.imshow("social distance", frame)
        # cv2.waitKey()

        return file_name, social_distance_result
Ejemplo n.º 23
0
    def detect_one_frame(self, img, csi_port):

        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        faces = self.face_cascade.detectMultiScale(img, 1.3, 5)

        if len(faces) == 0:

            self.face_manager.update_face_info(info=self.face_info)

        else:
            face_boxes = []
            ages = []
            genders = []
            for (x, y, w, h) in faces:
                if w > 90:  # ignore small faces

                    # mention detected face
                    """overlay = img.copy(); output = img.copy(); opacity = 0.6
                    cv2.rectangle(img,(x,y),(x+w,y+h),(128,128,128),cv2.FILLED) #draw rectangle to main image
                    cv2.addWeighted(overlay, opacity, img, 1 - opacity, 0, img)"""
                    cv2.rectangle(img, (x, y), (x + w, y + h), (128, 128, 128),
                                  1)  # draw rectangle to main image

                    # extract detected face
                    detected_face = img[int(y):int(y + h),
                                        int(x):int(x +
                                                   w)]  # crop detected face
                    face_box = [int(y), int(x + w), int(y + h), int(x)]

                    try:
                        # age gender data set has 40% margin around the face. expand detected face.
                        margin = 30
                        margin_x = int((w * margin) / 100)
                        margin_y = int((h * margin) / 100)
                        detected_face = img[
                            adjust_boundary_value(int(y - margin_y), 0):
                            adjust_boundary_value(int(y + h +
                                                      margin_y), img.shape[0]),
                            adjust_boundary_value(int(x - margin_x), 0):
                            adjust_boundary_value(int(x + w +
                                                      margin_x), img.shape[1])]
                        face_box = [
                            adjust_boundary_value(int(y - margin_y), 0),
                            adjust_boundary_value(int(x + w + margin_x),
                                                  img.shape[1]),
                            adjust_boundary_value(int(y + h + margin_y),
                                                  img.shape[0]),
                            adjust_boundary_value(int(x - margin_x), 0)
                        ]

                    except Exception as e:
                        log_print(e)
                        # print("detected face has no margin")
                        # print(e)

                    try:
                        # vgg-face expects inputs (224, 224, 3)
                        detected_face = cv2.resize(detected_face, (224, 224))

                        img_pixels = image.img_to_array(detected_face)
                        img_pixels = np.expand_dims(img_pixels, axis=0)
                        img_pixels /= 255

                        # find out age and gender
                        age_distributions = self.age_mdl.predict(img_pixels)
                        apparent_age = int(
                            np.floor(
                                np.sum(age_distributions * self.output_indexes,
                                       axis=1))[0])

                        gender_distribution = self.gender_mdl.predict(
                            img_pixels)[0]
                        gender_index = np.argmax(gender_distribution)
                        face_boxes.append(face_box)
                        ages.append(apparent_age)
                        if LOCAL:

                            enable_gender_icons = True
                            genders.append(gender_index)
                            if gender_index == 0:
                                gender = "F"
                            else:
                                gender = "M"

                            # background for age gender declaration
                            info_box_color = (46, 200, 255)
                            # triangle_cnt = np.array([(x+int(w/2), y+10), (x+int(w/2)-25, y-20),
                            # (x+int(w/2)+25, y-20)])
                            triangle_cnt = np.array([
                                (x + int(w / 2), y),
                                (x + int(w / 2) - 20, y - 20),
                                (x + int(w / 2) + 20, y - 20)
                            ])
                            cv2.drawContours(img, [triangle_cnt], 0,
                                             info_box_color, -1)
                            cv2.rectangle(img, (x + int(w / 2) - 50, y - 20),
                                          (x + int(w / 2) + 50, y - 90),
                                          info_box_color, cv2.FILLED)

                            # labels for age and gender
                            cv2.putText(img, apparent_age,
                                        (x + int(w / 2), y - 45),
                                        cv2.FONT_HERSHEY_SIMPLEX, 1,
                                        (0, 111, 255), 2)
                            male_icon = cv2.imread(MALE_ICON_PATH)
                            female_icon = cv2.imread(FEMALE_ICON_PATH)

                            if enable_gender_icons:
                                if gender == 'M':
                                    gender_icon = male_icon
                                else:
                                    gender_icon = female_icon

                                img[y - 75:y - 75 + male_icon.shape[0],
                                    x + int(w / 2) - 45:x + int(w / 2) - 45 +
                                    male_icon.shape[1]] = gender_icon
                            else:
                                cv2.putText(img, gender,
                                            (x + int(w / 2) - 42, y - 45),
                                            cv2.FONT_HERSHEY_SIMPLEX, 1,
                                            (0, 111, 255), 2)

                    except Exception as e:
                        log_print(e)
                        # print("exception", str(e))

            self.face_info, move_in_idx, move_out_idx = self.face_manager.recognize_face(
                face_info=self.face_info,
                face_box=face_boxes,
                rgb_frame=img_rgb,
                age=ages,
                gender=genders,
                csi_port=csi_port)

            if move_in_idx:
                if LOCAL:
                    print("id:", self.face_info["id"])
                    print("age:", self.face_info["age"])
                    print("gender:", self.face_info["gender"])
                    print("tstamp:", self.face_info["t_stamp"])
                    print("status:", self.face_info["type"])
                else:
                    send_data(data=self.face_info, send_idx=move_in_idx)

            if move_out_idx:
                if LOCAL:
                    print("id:", self.face_info["id"])
                    print("age:", self.face_info["age"])
                    print("gender:", self.face_info["gender"])
                    print("tstamp:", self.face_info["t_stamp"])
                    print("status:", self.face_info["type"])
                else:
                    send_data(data=self.face_info, send_idx=move_out_idx)

            return
Ejemplo n.º 24
0
    processed_file_names = []

    for processed_f_path in processed_files:
        f_name = extract_file_name(file_path=processed_f_path)
        processed_file_names.append(f_name)

    total_lens = len(input_image_path)
    for i, path in enumerate(input_image_path):

        file_name = ntpath.basename(path).replace(".jpg", "")
        if file_name in processed_file_names:
            continue

        print("Process {}-({} / {})".format(path, i + 1, total_lens))
        try:
            frame_content = process_ocr_text(frame_path=path)
            txt_file_path = os.path.join(OUTPUT_DIR,
                                         "{}.txt".format(file_name))
            save_file(content=frame_content,
                      filename=txt_file_path,
                      method='w')
            log_print(info_str=path + "\n" + "Successfully processed")
            print("Successfully processed {}".format(path))

        except Exception as e:
            log_print(info_str=path)
            log_print(info_str=e)

    for jpg_path in glob.glob(os.path.join(OUTPUT_DIR, "*.jpg")):
        os.remove(jpg_path)
Ejemplo n.º 25
0
def get_gender_info(json_data, blog_width):

    gender_dict = {"Women": "", "Men": ""}
    gender_data = []
    gender_ranges = []
    percent_ranges = []
    gender_json = json_data["textAnnotations"][1:]

    try:
        for _json in gender_json:

            if _json["description"] == "Gender":
                left = _json["boundingPoly"]["vertices"][0]["x"] - RANGE_MARGIN
                top = _json["boundingPoly"]["vertices"][0]["y"] - RANGE_MARGIN
                right = left + blog_width + RANGE_MARGIN
                for _json_1 in gender_json:
                    women_center_x = int(
                        0.5 * (_json_1["boundingPoly"]["vertices"][0]["x"] +
                               _json_1["boundingPoly"]["vertices"][1]["x"]))
                    women_center_y = int(
                        0.5 * (_json_1["boundingPoly"]["vertices"][0]["y"] +
                               _json_1["boundingPoly"]["vertices"][2]["y"]))
                    if _json_1[
                            "description"] == "Women" and left <= women_center_x <= right and women_center_y >= top:
                        for _json_2 in gender_json:
                            if _json_2["description"] == "Men":
                                men_center_x = int(0.5 * (
                                    _json_2["boundingPoly"]["vertices"][0]["x"]
                                    +
                                    _json_2["boundingPoly"]["vertices"][1]["x"]
                                ))
                                men_center_y = int(0.5 * (
                                    _json_2["boundingPoly"]["vertices"][0]["y"]
                                    +
                                    _json_2["boundingPoly"]["vertices"][2]["y"]
                                ))
                                if left <= men_center_x <= right and abs(
                                        men_center_y -
                                        women_center_y) <= LINE_DIFF:
                                    bottom = men_center_y + RANGE_MARGIN
                                    gender_ranges.append(
                                        [left, top, right, bottom])
                                    percent_ranges.append([
                                        _json["boundingPoly"]["vertices"][2]
                                        ["y"], _json_1["boundingPoly"]
                                        ["vertices"][0]["y"],
                                        _json_1["boundingPoly"]["vertices"][0]
                                        ["x"], _json_1["boundingPoly"]
                                        ["vertices"][1]["x"],
                                        _json_2["boundingPoly"]["vertices"][0]
                                        ["x"], _json_2["boundingPoly"]
                                        ["vertices"][1]["x"]
                                    ])
                                    break
                        break

        correct_range_num = 0
        if len(gender_ranges) > 1:

            for i, location_range in enumerate(gender_ranges):
                for _json in gender_json:
                    if "%" not in _json["description"]:
                        continue
                    center_x = int(0.5 *
                                   (_json["boundingPoly"]["vertices"][0]["x"] +
                                    _json["boundingPoly"]["vertices"][1]["x"]))
                    center_y = int(0.5 *
                                   (_json["boundingPoly"]["vertices"][0]["y"] +
                                    _json["boundingPoly"]["vertices"][3]["y"]))
                    if location_range[0] <= center_x <= location_range[2] and \
                            location_range[1] <= center_y <= location_range[3]:
                        correct_range_num = i
                        break

        left = gender_ranges[correct_range_num][0]
        top = gender_ranges[correct_range_num][1]
        right = gender_ranges[correct_range_num][2]
        bottom = gender_ranges[correct_range_num][3]

        percent_range = percent_ranges[correct_range_num]
        percent_top = percent_range[0]
        percent_bottom = percent_range[1]
        women_percent_left = percent_range[2]
        women_percent_right = percent_range[3]
        men_percent_left = percent_range[4]
        men_percent_right = percent_range[5]

        for _json in gender_json:
            range_center_x = int(0.5 *
                                 (_json["boundingPoly"]["vertices"][0]["x"] +
                                  _json["boundingPoly"]["vertices"][1]["x"]))
            range_center_y = int(0.5 *
                                 (_json["boundingPoly"]["vertices"][0]["y"] +
                                  _json["boundingPoly"]["vertices"][2]["y"]))
            if left <= range_center_x <= right and top <= range_center_y < bottom:
                gender_data.append(_json)

        for range_data in gender_data:
            percent_center_x = int(
                0.5 * (range_data["boundingPoly"]["vertices"][0]["x"] +
                       range_data["boundingPoly"]["vertices"][1]["x"]))
            percent_center_y = int(
                0.5 * (range_data["boundingPoly"]["vertices"][0]["y"] +
                       range_data["boundingPoly"]["vertices"][2]["y"]))
            if women_percent_left <= percent_center_x <= women_percent_right and \
                    percent_top <= percent_center_y <= percent_bottom:
                gender_dict["Women"] += range_data["description"]
            if men_percent_left <= percent_center_x <= men_percent_right and \
                    percent_top <= percent_center_y <= percent_bottom:
                gender_dict["Men"] += range_data["description"]

        return gender_dict

    except Exception as e:
        log_print(info_str=e)

        return gender_dict
Ejemplo n.º 26
0
    def extract_motorist_occupant_page(self, json_info, frame_path):

        info = {}

        unit_number = ""
        owner_name = ""
        birth_date = ""
        age = ""
        gender = ""
        address = ""
        phone = ""
        injuries = ""
        seating_position = ""

        frame = cv2.imread(frame_path)
        frame_height, frame_width = frame.shape[:2]

        unit_cnt = 0
        sorted_json_info = sorted(json_info, key=lambda k: k["boundingPoly"]["vertices"][0]["y"])

        for i, _json in enumerate(sorted_json_info):
            unit_center_x = 0.5 * (_json["boundingPoly"]["vertices"][0]["x"] +
                                   _json["boundingPoly"]["vertices"][1]["x"])
            unit_center_y = 0.5 * (_json["boundingPoly"]["vertices"][0]["y"] +
                                   _json["boundingPoly"]["vertices"][3]["y"])
            if _json["description"].lower() == "unit" and unit_center_x < 0.15 * frame_width and \
                    unit_center_y < 0.5 * frame_height:
                unit_number_candidates, left, right, top, bottom = self.get_json_candidates(json_info=sorted_json_info,
                                                                                            f_json=_json,
                                                                                            s_json=_json)
                for candi in unit_number_candidates:
                    if candi["boundingPoly"]["vertices"][1]["x"] < \
                            sorted_json_info[i + 1]["boundingPoly"]["vertices"][1]["x"] + 30 \
                            and candi["description"].replace(",", "").replace(".", "").isdigit():
                        unit_number += candi["description"]
                if unit_number == "" or unit_number == "0":
                    unit_number = extract_digit_roi(roi_frame=frame[top:bottom, left + 15:right - 15])
                if unit_number != "":
                    unit_cnt += 1
                    if unit_number == "0":
                        unit_number += str(unit_cnt)
                    info[f"unit_{unit_cnt}"] = {}
                    info[f"unit_{unit_cnt}"]["unit_number"] = unit_number.replace(",", "").replace(".", "")
                    unit_number = ""

        if unit_cnt > 0:
            cnt = 1
            for i, _json in enumerate(sorted_json_info):
                if _json["description"].lower() == "name" and sorted_json_info[i + 2]["description"].lower() == "last":
                    json_width = int(0.64 * frame_width) - \
                                 sorted_json_info[i + 2]["boundingPoly"]["vertices"][1]["x"] - 5
                    owner_name_candidates, _, _, _, _ = self.get_json_candidates(json_info=sorted_json_info,
                                                                                 f_json=_json,
                                                                                 s_json=sorted_json_info[i + 1],
                                                                                 diff_bottom_height=40,
                                                                                 diff_left_width=5,
                                                                                 diff_right_width=json_width)

                    sorted_owner_name_candidates = sorted(owner_name_candidates,
                                                          key=lambda k: k["boundingPoly"]["vertices"][0]["x"])

                    for candi in sorted_owner_name_candidates:
                        owner_name += candi["description"] + " "
                    info[f"unit_{cnt}"]["name"] = owner_name
                    owner_name = ""

                elif _json["description"].lower() == "date" and \
                        sorted_json_info[i + 2]["description"].lower() == "birth":
                    json_width = sorted_json_info[i + 2]["boundingPoly"]["vertices"][1]["x"] - \
                                 _json["boundingPoly"]["vertices"][0]["x"]
                    birth_date_candidates = self.perform_ocr_roi_frame(f_json=_json, s_json=sorted_json_info[i + 2],
                                                                       frame=frame,
                                                                       diff_width=json_width, diff_bottom=50)
                    # if unit_cnt < 1:
                    #     break
                    # if cnt == 0 or "birth_date" in info[f"unit_{cnt}"]:
                    #     cnt += 1

                    for candi in birth_date_candidates:
                        if candi["description"].lower() not in ["date", "of", "birth"]:
                            birth_date += candi["description"]
                    birth_date = birth_date.replace(",", "").replace(".", "").replace("|", "")
                    if len(birth_date) == 10:
                        birth_date = birth_date[:2] + "/" + birth_date[3:5] + "/" + birth_date[6:]
                    info[f"unit_{cnt}"]["birth_date"] = birth_date
                    birth_date = ""
                elif _json["description"].lower() == "age":
                    json_width = _json["boundingPoly"]["vertices"][1]["x"] - _json["boundingPoly"]["vertices"][0]["x"]
                    age_candidates = self.perform_ocr_roi_frame(frame=frame, f_json=_json, s_json=_json,
                                                                diff_width=json_width)
                    for candi in age_candidates:
                        if candi["description"].lower() != "age":
                            age += candi["description"]

                    info[f"unit_{cnt}"]["age"] = age.replace(",", "").replace(".", "")
                    age = ""

                elif _json["description"].lower() == "gender":
                    gender_candidates = self.perform_ocr_roi_frame(frame=frame, f_json=_json, s_json=_json,
                                                                   diff_width=7)
                    for candi in gender_candidates:
                        if candi["description"].lower() not in ["gender", "gende"]:
                            gender += candi["description"]
                    info[f"unit_{cnt}"]["gender"] = gender.replace(",", "").replace(".", "")
                    gender = ""

                elif _json["description"].lower() == "address" and \
                        sorted_json_info[i + 1]["description"].lower() == ":":
                    json_width = int(0.64 * frame_width) - \
                                 sorted_json_info[i + 1]["boundingPoly"]["vertices"][1]["x"] - 5
                    address_candidates, _, _, _, _ = self.get_json_candidates(json_info=sorted_json_info, f_json=_json,
                                                                              s_json=sorted_json_info[i + 1],
                                                                              diff_right_width=json_width,
                                                                              diff_left_width=5)

                    for candi in address_candidates:
                        address += candi["description"] + " "
                    info[f"unit_{cnt}"]["address"] = address.replace(",", "").replace(".", "")
                    address = ""

                elif _json["description"].lower() == "contact" and \
                        sorted_json_info[i + 1]["description"].lower() == "phone":
                    phone_candidates, _, _, _, _ = self.get_json_candidates(json_info=sorted_json_info, f_json=_json,
                                                                            s_json=sorted_json_info[i + 1],
                                                                            diff_left_width=5)

                    for j, candi in enumerate(phone_candidates):
                        if candi["boundingPoly"]["vertices"][1]["x"] - candi["boundingPoly"]["vertices"][0]["x"] > 2:
                            phone += candi["description"]
                    info[f"unit_{cnt}"]["phone"] = phone.replace(",", "").replace(".", "")
                    phone = ""

                elif _json["description"].lower() == "injuries":
                    injuries_company_candidates, left, right, top, bottom = \
                        self.get_json_candidates(json_info=sorted_json_info, f_json=_json, s_json=_json,
                                                 diff_right_width=5)
                    for candi in injuries_company_candidates:
                        injuries += candi["description"] + " "
                    if injuries == "":
                        injuries = extract_digit_roi(roi_frame=frame[top:bottom, left:right])
                    info[f"unit_{cnt}"]["injuries"] = injuries.replace(",", "").replace(".", "")
                    injuries = ""
                    if "seating_position" in list(info[f"unit_{cnt}"].keys()):
                        if cnt == unit_cnt:
                            break
                        cnt += 1

                elif _json["description"].lower() == "seating" and \
                        sorted_json_info[i + 1]["description"].lower() == "position":
                    seating_position_candidates, left, right, top, bottom = \
                        self.get_json_candidates(json_info=sorted_json_info, f_json=_json,
                                                 s_json=sorted_json_info[i + 1],
                                                 diff_left_width=5, diff_right_width=5)
                    left = max(left, _json["boundingPoly"]["vertices"][0]["x"] - 3)
                    if right == 0:
                        right = sorted_json_info[i + 1]["boundingPoly"]["vertices"][1]["x"] + 3
                    else:
                        right = min(right, sorted_json_info[i + 1]["boundingPoly"]["vertices"][1]["x"] + 3)
                    for candi in seating_position_candidates:
                        if left <= 0.5 * (candi["boundingPoly"]["vertices"][0]["x"] +
                                          candi["boundingPoly"]["vertices"][1]["x"]) <= right:
                            seating_position += candi["description"]
                    if seating_position == "":
                        seating_position = extract_digit_roi(roi_frame=frame[top:bottom, left:right])
                    info[f"unit_{cnt}"]["seating_position"] = seating_position.replace(",", "").replace(".", "")
                    seating_position = ""
                    if "injuries" in list(info[f"unit_{cnt}"].keys()):
                        if cnt == unit_cnt:
                            break
                        cnt += 1

        del_keys = []
        for i_key in info.keys():
            try:
                if info[i_key]["name"] == "" and info[i_key]["birth_date"] == "":
                    del_keys.append(i_key)
            except Exception as e:
                del_keys.append(i_key)
                log_print(e)

        for d_key in del_keys:
            info.pop(d_key, None)

        return info
Ejemplo n.º 27
0
    def run(self):
        control_similarities = []
        risk_similarities = []
        control_similarity_values = []
        similarity_rations = []
        risk_similarity_values = []
        avg_similarity_values = []

        file_name = ntpath.basename(INPUT_EXCEL_PATH).replace(".xlsx", "")
        output_file_path = os.path.join(OUTPUT_DIR, f"{file_name}_result.csv")

        input_df = pd.read_excel(INPUT_EXCEL_PATH)
        risk_descriptions = input_df["Risk Description"].values.tolist()
        control_descriptions = input_df["Control Description"].values.tolist()
        control_features = []
        for c_des in control_descriptions:
            try:
                c_des_feature = self.feature_extractor.get_feature_token_words(text=c_des)
            except Exception as e:
                c_des_feature = None
                log_print(e)
            control_features.append(c_des_feature)

        for i, c_i_feature in enumerate(control_features):
            i_similarity = []
            if c_i_feature is not None:
                for j, c_j_feature in enumerate(control_features):
                    if j == i or c_j_feature is None:
                        continue
                    i_j_similarity = cosine_similarity([c_i_feature], [c_j_feature])
                    if i_j_similarity[0][0] >= SIMILARITY_THRESH:
                        i_similarity.append([j, i_j_similarity[0][0]])

            if not i_similarity:
                control_similarities.append("NA")
                risk_similarities.append("NA")
                control_similarity_values.append("NA")
                similarity_rations.append("NA")
                risk_similarity_values.append("NA")
                avg_similarity_values.append("NA")
            else:
                sorted_similarity = sorted(i_similarity, key=lambda k: k[1], reverse=True)[:SIMILARITY_NUMBER]
                similarity_indices = np.array(sorted_similarity)[:, 0].astype(np.int)
                init_controls = ""
                init_risks = ""
                init_control_scores = ""
                init_rations = ""
                init_risk_scores = ""
                init_avg_scores = ""
                for m, s_index in enumerate(similarity_indices):
                    risk_des_feature = self.feature_extractor.get_feature_token_words(text=risk_descriptions[i])
                    risk_similar_feature = \
                        self.feature_extractor.get_feature_token_words(text=risk_descriptions[s_index])
                    risk_similarity = cosine_similarity([risk_des_feature], [risk_similar_feature])
                    init_controls += control_descriptions[s_index] + ","
                    init_risks += str(risk_descriptions[s_index]) + ","
                    init_control_scores += str(sorted_similarity[m][1]) + ","
                    init_risk_scores += str(risk_similarity[0][0]) + ","
                    init_avg_scores += str(0.5 * (risk_similarity[0][0] + sorted_similarity[m][1])) + ","
                    if sorted_similarity[m][1] >= 0.75:
                        init_rations += "high" + ","
                    elif 0.5 < sorted_similarity[m][1] < 0.75:
                        init_rations += "medium" + ","
                    else:
                        init_rations += "low" + ","
                control_similarities.append(init_controls[:-1])
                risk_similarities.append(init_risks[:-1])
                control_similarity_values.append(init_control_scores[:-1])
                similarity_rations.append(init_rations[:-1])
                risk_similarity_values.append(init_risk_scores)
                avg_similarity_values.append(init_avg_scores)

            print(f"Processed Control Description {i + 1} rows")

        input_df["Similar Sentences"] = control_similarities
        input_df["Risk Sentences"] = risk_similarities
        input_df["Similar Values"] = control_similarity_values
        input_df["Similar Rations"] = similarity_rations
        input_df["Risk Values"] = risk_similarity_values
        input_df["Average Values"] = avg_similarity_values

        input_df.to_csv(output_file_path, index=True, header=True, mode="w")

        print(f"[INFO] Successfully saved in {output_file_path}")

        return
Ejemplo n.º 28
0
    def extract_info_from_json(self, json_result):
        json_width = json_result["textAnnotations"][0]["boundingPoly"][
            "vertices"][1]["x"]
        text_info = json_result["textAnnotations"][1:]
        if self.pdf_type == "type11":
            for i, t_info in enumerate(text_info):
                for k, t_coord in enumerate(
                        t_info["boundingPoly"]["vertices"]):
                    if "y" not in t_coord.keys():
                        text_info[i]["boundingPoly"]["vertices"][k]["y"] = 0
            if not search_specific_info(json_result=text_info,
                                        search_word=["Zeit", "Zelt"]):
                self.pdf_type = "type11_B"
        elif self.pdf_type == "type7":
            if not search_specific_info(json_result=text_info,
                                        search_word=["Anwendung"]):
                self.pdf_type = "type7_B"

        type_model_info = TYPE_INFO[self.pdf_type]

        for i, t_info in enumerate(text_info):
            try:
                t_info_vertices = t_info["boundingPoly"]["vertices"]
                t_left = t_info_vertices[0]["x"]
                t_right = t_info_vertices[1]["x"]
                t_top = t_info_vertices[0]["y"]
                t_bottom = t_info_vertices[2]["y"]
                t_width = t_right - t_left
                t_height = t_bottom - t_top
                t_des = t_info["description"]
                if "Kies" in t_des:
                    if t_des.replace(
                            "Kies",
                            "").isdigit() and self.field_info["Barcode"] == "":
                        self.field_info["Barcode"] = t_des
                for type_key in type_model_info.keys():
                    sub_type_info = type_model_info[type_key]
                    if t_des in sub_type_info[
                            "search_word"] and self.field_info[type_key] == "":
                        if (self.pdf_type == "type4" and type_key == "Lieferschein_Nr") and \
                                (text_info[i + 1]["description"] != "-" or text_info[i + 2]["description"] != "Nr"):
                            continue
                        if (self.pdf_type == "type5" or self.pdf_type == "type11") and type_key == "Lieferschein_Nr" \
                                and t_right < 0.5 * json_width:
                            continue
                        self.field_info[type_key] = \
                            extract_roi_info(json_result=text_info[:i] + text_info[i + 1:],
                                             roi_left=eval(sub_type_info["left"]), pdf_type=self.pdf_type,
                                             roi_right=eval(sub_type_info["right"]), roi_top=eval(sub_type_info["top"]),
                                             roi_bottom=eval(sub_type_info["bottom"]), label=type_key)
                if t_width > t_height:
                    pass
            except Exception as e:
                log_print(info_str=e)
                # print(e)
                if (self.pdf_type == "type11" or self.pdf_type == "type11_B") and \
                        t_info["description"] in ["Lleterscheln", "Lieferschein"]:
                    self.field_info["Lieferschein_Nr"] = \
                        extract_roi_info(json_result=text_info[i + 1:],
                                         roi_left=t_info["boundingPoly"]["vertices"][2]["x"],
                                         roi_right=2 * t_info["boundingPoly"]["vertices"][2]["x"] - t_info
                                         ["boundingPoly"]["vertices"][3]["x"],
                                         roi_top=-2, roi_bottom=1.2 * t_info["boundingPoly"]["vertices"][2]["y"],
                                         y_top=1, label="Lieferschein_Nr", pdf_type=self.pdf_type)
        self.field_info["Volume"] = self.field_info["Volume"].replace(".", ",")
        self.field_info["Gewicht"] = self.field_info["Gewicht"].replace(
            ".", ",")

        return self.field_info
Ejemplo n.º 29
0
def get_discovery_info(json_data, blog_width):
    discovery_dict = {
        "Reach": {
            "Start Date": "",
            "End Date": ""
        },
        "Impression": {
            "Start Date": "",
            "End Date": ""
        }
    }
    discovery_data = []
    discovery_ranges = []
    discovery_json = json_data["textAnnotations"][1:]

    try:
        for _json in discovery_json:

            if _json["description"] == "Discovery":
                left = _json["boundingPoly"]["vertices"][0]["x"] - RANGE_MARGIN
                top = _json["boundingPoly"]["vertices"][0]["y"] - RANGE_MARGIN
                right = left + blog_width + RANGE_MARGIN
                for _json_1 in discovery_json:
                    interaction_center_x = int(
                        0.5 * (_json_1["boundingPoly"]["vertices"][0]["x"] +
                               _json_1["boundingPoly"]["vertices"][1]["x"]))
                    interaction_center_y = int(
                        0.5 * (_json_1["boundingPoly"]["vertices"][0]["y"] +
                               _json_1["boundingPoly"]["vertices"][2]["y"]))
                    if _json_1["description"] == "Interactions" and left <= interaction_center_x <= right and \
                            interaction_center_y >= top:
                        bottom = interaction_center_y + RANGE_MARGIN
                        discovery_ranges.append([left, top, right, bottom])
                        break

        correct_range_num = 0
        if len(discovery_ranges) > 1:

            for i, location_range in enumerate(discovery_ranges):
                for _json in discovery_json:
                    if "Reach" not in _json[
                            "description"] or "Impressions" not in _json[
                                "description"]:
                        continue
                    center_x = int(0.5 *
                                   (_json["boundingPoly"]["vertices"][0]["x"] +
                                    _json["boundingPoly"]["vertices"][1]["x"]))
                    center_y = int(0.5 *
                                   (_json["boundingPoly"]["vertices"][0]["y"] +
                                    _json["boundingPoly"]["vertices"][3]["y"]))
                    if location_range[0] <= center_x <= location_range[2] and \
                            location_range[1] <= center_y <= location_range[3]:
                        correct_range_num = i
                        break

        left = discovery_ranges[correct_range_num][0]
        top = discovery_ranges[correct_range_num][1]
        right = discovery_ranges[correct_range_num][2]
        bottom = discovery_ranges[correct_range_num][3]

        for _json in discovery_json:
            range_center_x = int(0.5 *
                                 (_json["boundingPoly"]["vertices"][0]["x"] +
                                  _json["boundingPoly"]["vertices"][1]["x"]))
            range_center_y = int(0.5 *
                                 (_json["boundingPoly"]["vertices"][0]["y"] +
                                  _json["boundingPoly"]["vertices"][2]["y"]))
            if left <= range_center_x <= right and top <= range_center_y < bottom:
                discovery_data.append(_json)

        reach_top = None
        reach_bottom = None
        impression_top = None
        impression_bottom = None

        for range_data in discovery_data:
            if range_data["description"] == "Reach":
                reach_top = range_data["boundingPoly"]["vertices"][2]["y"]
            if range_data["description"] == "Impressions":
                reach_bottom = range_data["boundingPoly"]["vertices"][0]["y"]
                impression_top = range_data["boundingPoly"]["vertices"][2]["y"]
            if range_data["description"] == "Interactions":
                impression_bottom = range_data["boundingPoly"]["vertices"][0][
                    "y"]

        reach_data = ""
        impression_data = ""
        for range_data in discovery_data:
            center_y = int(0.5 *
                           (range_data["boundingPoly"]["vertices"][0]["y"] +
                            range_data["boundingPoly"]["vertices"][2]["y"]))

            if reach_top <= center_y <= reach_bottom:
                reach_data += range_data["description"] + " "
            if impression_top <= center_y <= impression_bottom:
                impression_data += range_data["description"] + ""

        discovery_dict["Reach"]["Start Date"] = reach_data[
            reach_data.rfind(".") + 1:reach_data.rfind("-")]
        discovery_dict["Reach"]["End Date"] = reach_data[reach_data.
                                                         rfind("-") + 1:]
        discovery_dict["Impression"]["Start Date"] = \
            impression_data[impression_data.rfind(".") + 1:impression_data.rfind("-")]
        discovery_dict["Impression"]["End Date"] = reach_data[reach_data.
                                                              rfind("-") + 1:]

        return discovery_dict

    except Exception as e:
        log_print(info_str=e)

        return discovery_dict
Ejemplo n.º 30
0
def get_age_range_values(frame_path, json_data):
    age_range_dict = {}
    age_items = ["All", "Men", "Women"]

    age_range_json = json_data["textAnnotations"][1:]
    age_ranges = []
    age_range_left = 0
    age_range_top = 0
    age_range_right = 0
    age_range_bottom = 0
    first_age_range_coordinate = None
    second_age_range_coordinate = None
    third_age_range_coordinate = None
    forth_age_range_coordinate = None
    fifth_age_range_coordinate = None
    sixth_age_range_coordinate = None
    seventh_age_range_coordinate = None
    all_region = None
    men_region = None
    women_region = None

    try:
        for i, _json_1 in enumerate(age_range_json):

            if _json_1["description"] == "Age" and age_range_json[i + 1]["description"] == "Range":
                tmp_center_y_left = int(0.5 * (_json_1["boundingPoly"]["vertices"][0]["y"]
                                               + _json_1["boundingPoly"]["vertices"][2]["y"]))

                for j, _json_2 in enumerate(age_range_json):
                    if _json_2["description"] == "Women" and age_range_json[j - 1]["description"] == "Men" and \
                            age_range_json[j - 2]["description"] == "All":
                        tmp_center_y_right = int(0.5 * (_json_2["boundingPoly"]["vertices"][0]["y"]
                                                        + _json_2["boundingPoly"]["vertices"][2]["y"]))
                        if abs(tmp_center_y_left - tmp_center_y_right) < RANGE_MARGIN:
                            age_range_left = _json_1["boundingPoly"]["vertices"][0]["x"] - RANGE_MARGIN
                            age_range_top = _json_1["boundingPoly"]["vertices"][0]["y"] - RANGE_MARGIN
                            age_range_right = _json_2["boundingPoly"]["vertices"][1]["x"] + RANGE_MARGIN
                            break

                for _json_3 in age_range_json:
                    if _json_3["description"] == "65" and \
                            age_range_left <= int(0.5 * (_json_3["boundingPoly"]["vertices"][0]["x"] +
                                                         _json_3["boundingPoly"]["vertices"][1]["x"])) <= \
                            age_range_right:
                        age_range_bottom = _json_3["boundingPoly"]["vertices"][2]["y"] + LINE_DIFF
                        break

                age_ranges.append([age_range_left, age_range_top, age_range_right, age_range_bottom])
        correct_range_nums = []

        for i, age_range in enumerate(age_ranges):
            for _json in age_range_json:
                if "%" not in _json["description"]:
                    continue
                center_x = int(0.5 * (_json["boundingPoly"]["vertices"][0]["x"] +
                                      _json["boundingPoly"]["vertices"][1]["x"]))
                center_y = int(0.5 * (_json["boundingPoly"]["vertices"][0]["y"] +
                                      _json["boundingPoly"]["vertices"][3]["y"]))
                if age_range[0] <= center_x <= age_range[2] and age_range[1] <= center_y <= age_range[3]:
                    correct_range_nums.append(i)
                    break

        blog_width = 0

        for correct_range_num in correct_range_nums:

            tmp_dict = {
                "13 - 17": "",
                "18 - 24": "",
                "25 - 34": "",
                "35 - 44": "",
                "45 - 54": "",
                "55 - 64": "",
                "65 + ": ""
            }
            age_range_data = []

            left = age_ranges[correct_range_num][0]
            top = age_ranges[correct_range_num][1]
            right = age_ranges[correct_range_num][2]
            bottom = age_ranges[correct_range_num][3]
            blog_width += right - left

            for i, _json in enumerate(age_range_json):
                range_center_x = int(0.5 * (_json["boundingPoly"]["vertices"][0]["x"] +
                                            _json["boundingPoly"]["vertices"][1]["x"]))
                range_center_y = int(0.5 * (_json["boundingPoly"]["vertices"][0]["y"] +
                                            _json["boundingPoly"]["vertices"][2]["y"]))
                if _json["description"] == "13" and age_range_json[i + 1]["description"] == "-" \
                        and left <= range_center_x < right and top <= range_center_y <= bottom:
                    first_age_range_coordinate = [range_center_y,
                                                  age_range_json[i + 2]["boundingPoly"]["vertices"][1]["x"]]
                if _json["description"] == "18" and age_range_json[i + 1]["description"] == "-" \
                        and left <= range_center_x < right and top <= range_center_y <= bottom:
                    second_age_range_coordinate = [range_center_y,
                                                   age_range_json[i + 2]["boundingPoly"]["vertices"][1]["x"]]
                if _json["description"] == "25" and age_range_json[i + 1]["description"] == "-" \
                        and left <= range_center_x < right and top <= range_center_y <= bottom:
                    third_age_range_coordinate = [range_center_y,
                                                  age_range_json[i + 2]["boundingPoly"]["vertices"][1]["x"]]
                if _json["description"] == "35" and age_range_json[i + 1]["description"] == "-" \
                        and left <= range_center_x < right and top <= range_center_y <= bottom:
                    forth_age_range_coordinate = [range_center_y,
                                                  age_range_json[i + 2]["boundingPoly"]["vertices"][1]["x"]]
                if _json["description"] == "45" and age_range_json[i + 1]["description"] == "-" \
                        and left <= range_center_x < right and top <= range_center_y <= bottom:
                    fifth_age_range_coordinate = [range_center_y,
                                                  age_range_json[i + 2]["boundingPoly"]["vertices"][1]["x"]]
                if _json["description"] == "55" and age_range_json[i + 1]["description"] == "-" \
                        and left <= range_center_x < right and top <= range_center_y <= bottom:
                    sixth_age_range_coordinate = [range_center_y,
                                                  age_range_json[i + 2]["boundingPoly"]["vertices"][1]["x"]]
                if _json["description"] == "65" and left <= range_center_x < right and top <= range_center_y <= bottom:
                    seventh_age_range_coordinate = [range_center_y,
                                                    age_range_json[i + 1]["boundingPoly"]["vertices"][1]["x"]]
                if left <= range_center_x <= right and top <= range_center_y < bottom:
                    age_range_data.append(_json)

            for range_data in age_range_data:
                range_data_center_y = int(0.5 * (range_data["boundingPoly"]["vertices"][0]["y"] +
                                                 range_data["boundingPoly"]["vertices"][2]["y"]))
                range_data_center_x = int(0.5 * (range_data["boundingPoly"]["vertices"][0]["x"] +
                                                 range_data["boundingPoly"]["vertices"][1]["x"]))
                if range_data["description"] == "All":
                    all_region = [range_data["boundingPoly"]["vertices"][0]["x"],
                                  range_data["boundingPoly"]["vertices"][0]["y"],
                                  range_data["boundingPoly"]["vertices"][2]["x"],
                                  range_data["boundingPoly"]["vertices"][2]["y"]]
                if range_data["description"] == "Men":
                    men_region = [range_data["boundingPoly"]["vertices"][0]["x"],
                                  range_data["boundingPoly"]["vertices"][0]["y"],
                                  range_data["boundingPoly"]["vertices"][2]["x"],
                                  range_data["boundingPoly"]["vertices"][2]["y"]]
                if range_data["description"] == "Women":
                    women_region = [range_data["boundingPoly"]["vertices"][0]["x"],
                                    range_data["boundingPoly"]["vertices"][0]["y"],
                                    range_data["boundingPoly"]["vertices"][2]["x"],
                                    range_data["boundingPoly"]["vertices"][2]["y"]]
                if abs(range_data_center_y - first_age_range_coordinate[0]) < LINE_DIFF:
                    if range_data_center_x > first_age_range_coordinate[1]:
                        tmp_dict["13 - 17"] += range_data["description"]
                if abs(range_data_center_y - second_age_range_coordinate[0]) < LINE_DIFF:
                    if range_data_center_x > second_age_range_coordinate[1]:
                        tmp_dict["18 - 24"] += range_data["description"]
                if abs(range_data_center_y - third_age_range_coordinate[0]) < LINE_DIFF:
                    if range_data_center_x > third_age_range_coordinate[1]:
                        tmp_dict["25 - 34"] += range_data["description"]
                if abs(range_data_center_y - forth_age_range_coordinate[0]) < LINE_DIFF:
                    if range_data_center_x > forth_age_range_coordinate[1]:
                        tmp_dict["35 - 44"] += range_data["description"]
                if abs(range_data_center_y - fifth_age_range_coordinate[0]) < LINE_DIFF:
                    if range_data_center_x > fifth_age_range_coordinate[1]:
                        tmp_dict["45 - 54"] += range_data["description"]
                if abs(range_data_center_y - sixth_age_range_coordinate[0]) < LINE_DIFF:
                    if range_data_center_x > sixth_age_range_coordinate[1]:
                        tmp_dict["55 - 64"] += range_data["description"]
                if abs(range_data_center_y - seventh_age_range_coordinate[0]) < LINE_DIFF:
                    if range_data_center_x > seventh_age_range_coordinate[1]:
                        tmp_dict["65 + "] += range_data["description"]

            age_item = select_item(frame_path=frame_path, region=[all_region, men_region, women_region],
                                   items=age_items)
            age_range_dict[age_item] = tmp_dict

        blog_width /= len(correct_range_nums)

        for item in age_items:
            tmp_dict = {
                "13 - 17": "",
                "18 - 24": "",
                "25 - 34": "",
                "35 - 44": "",
                "45 - 54": "",
                "55 - 64": "",
                "65 + ": ""
            }
            if item not in age_range_dict.keys():
                age_range_dict[item] = tmp_dict

        return age_range_dict, blog_width

    except Exception as e:
        log_print(info_str=e)
        for item in age_items:
            tmp_dict = {
                "13 - 17": "",
                "18 - 24": "",
                "25 - 34": "",
                "35 - 44": "",
                "45 - 54": "",
                "55 - 64": "",
                "65 + ": ""
            }
            age_range_dict[item] = tmp_dict

        blog_width = 0

        return age_range_dict, blog_width