def success(): try: if request.method == 'POST': for tmp_path in glob.glob(os.path.join(INPUT_DIR, "*.*")): os.remove(tmp_path) for tmp_path in glob.glob(os.path.join(UPLOAD_FOLDER, "*.*")): os.remove(tmp_path) if 'file' not in request.files: flash('No file part') return redirect(request.url) file = request.files['file'] if file.filename == '': message = 'Click on Choose File button to select picture before uploading' return render_template('file_upload_form.html', messages=message, filename=None, data=None) file_path = os.path.join(INPUT_DIR, secure_filename(file.filename)) file.save(file_path) filename, result_info = social_estimator.process_one_frame( frame_path=file_path) message = 'Image successfully uploaded and Estimated' return render_template('file_upload_form.html', messages=message, filename=filename, data=result_info) except Exception as e: log_print(info_str=e) return render_template('file_upload_form.html', messages=e, filename=None, data=None)
def perform_ocr(self): # upload_files_len = len(glob.glob(os.path.join(OUTPUT_DIR, "*.pdf"))) # download_files_len = len(glob.glob(os.path.join(INPUT_DIR, "*.pdf"))) while True: input_files = glob.glob(os.path.join(INPUT_DIR, "*.*")) for pdf_path in input_files: try: pdf_name = ntpath.basename(pdf_path) extension = pdf_name[pdf_name.rfind(".") + 1:] if extension != "pdf": continue if pdf_name not in self.processed_files: print(f"[INFO] {pdf_name} processing...") extracted_info = self.pdf_extractor.main( pdf_path=pdf_path) output_pdf_path = self.pdf_creator.repopulate_pdf( info=extracted_info, pdf_name=pdf_name) self.s3_manager.upload_files(file_path=output_pdf_path) self.processed_files.append(pdf_name) except Exception as e: log_print(e) # upload_files_len = len(glob.glob(os.path.join(OUTPUT_DIR, "*.pdf"))) # download_files_len = len(glob.glob(os.path.join(INPUT_DIR, "*.pdf"))) content = "" for i, file_name in enumerate(self.processed_files): if i < len(self.processed_files) - 1: content += file_name + "\n" else: content += file_name save_file(content=content, filename=PROCESSED_FILE, method='w')
def run(self): self.check_server() # self.show_green_string('yes') # self.scroll_message('test') # signall = "turn on yellow" # win.get_event() while True: try: if self.terminate_flag: print(self.terminate_flag) break self.receive_data() self.send_data() except Exception as e: log_print(info_str=e) time.sleep(0.05) connected = False print(" ### Client Disconnected ") while not connected: try: self.check_server() connected = True print(" ### Client Reconnected") except socket.error: pass if self.send_data_time is not None: now = time.time() if now - self.send_data_time > 5: self.window.show_init_screen() self.send_data_time = None time.sleep(0.01)
def save_recorded_video(): try: video = request.files['video'] file_name = request.form['info'] video_path = os.path.join(VIDEO_INPUT_DIR, file_name) video.save(video_path) return json.dumps({'result': "success"}) except Exception as e: log_print(e) return json.dumps({'result': "fail"})
def main(self, pdf_files): self.multi_pdf_result = {} for p_file in pdf_files: p_file_name = ntpath.basename(p_file).replace(".pdf", "") try: invoice_result = self.invoice_extractor.run(pdf_path=p_file, pdf_file_name=p_file_name) self.multi_pdf_result[ntpath.basename(p_file)] = invoice_result except Exception as e: log_print(e) return self.multi_pdf_result
def check_server(self): print("\n[!] Checking server") while True: try: self.connection = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.connection.connect((self.host, self.port)) print("\n[!] Server is running!") break except Exception as e: print("\n[!] Server is not running!") log_print(info_str=e) time.sleep(0.2)
def show_recognition_result(self, birth, age, statue): try: birth = birth.replace(",", ".") except Exception as e: log_print(info_str=e) pass self.birth_value.setText(birth) self.age_value.setText(str(age)) self.statue_value.setText(statue) self.card_sys.send_card_recog_data_flag = True self.card_sys.card_recog_data = statue self.card_sys.card_face_image = self.card_face_image
def success(): if request.method == 'POST': f = request.files['file'] file_path = os.path.join(UPLOAD_DIR, secure_filename(f.filename)) f.save(file_path) file_name_ext = ntpath.basename(file_path) file_name = file_name_ext[:file_name_ext.find(".")] saved_path = ocr_extractor.process_ocr_text(frame_path=file_path, file_name=file_name) saved_file_name = ntpath.basename(saved_path) log_print(info_str="Created {}".format(saved_path)) return send_file(os.path.join(OUTPUT_DIR, saved_file_name), as_attachment=True)
def extract_roi_info(json_result, roi_left, roi_right, roi_top, roi_bottom, pdf_type, y_top=None, label=None): roi_info = [] roi_info_txt = "" for j_info in json_result: try: if label == "Lieferschein Nr" and not j_info["description"].replace("-", "").isdigit(): continue if pdf_type == "type7_B" and label == "Volume" and j_info["description"].isalpha(): continue if pdf_type == "type11" and label == "Lieferschein Nr" and j_info["description"] == "8404": continue if roi_left <= j_info["boundingPoly"]["vertices"][0]["x"] < roi_right and \ roi_top <= j_info["boundingPoly"]["vertices"][0]["y"] < roi_bottom: roi_info.append(j_info) except Exception as e: # print(e) log_print(info_str=e) if y_top is not None: if roi_left <= j_info["boundingPoly"]["vertices"][0]["x"] < roi_right and \ roi_top < y_top <= roi_bottom: roi_info.append(j_info) sorted_y_roi_info = sorted(roi_info, key=lambda k: k["boundingPoly"]["vertices"][0]["y"]) bind_y_close = [] tmp_line = [] init_value = sorted_y_roi_info[0]["boundingPoly"]["vertices"][0]["y"] for s_y_info in sorted_y_roi_info: if abs(init_value - s_y_info["boundingPoly"]["vertices"][0]["y"]) < Y_BIND_THREAD: tmp_line.append(s_y_info) else: bind_y_close.append(tmp_line[:]) tmp_line.clear() tmp_line.append(s_y_info) init_value = s_y_info["boundingPoly"]["vertices"][0]["y"] bind_y_close.append(tmp_line[:]) for b_y_info in bind_y_close: sorted_x_info = sorted(b_y_info, key=lambda k: k["boundingPoly"]["vertices"][0]["x"]) if pdf_type == "type5" and label == "DTS_Date": ret_date = False for candi in sorted_x_info: if ":" in candi["description"]: ret_date = True break if not ret_date: continue for candi in sorted_x_info: roi_info_txt += candi["description"] + " " return roi_info_txt.replace("..", ".")
def check_face_from_db(self, face_img): try: from src.database.manager import DatabaseManager person_face_encoding = face_recognition.face_encodings(face_img)[0] records = DatabaseManager().select_info_from_db() print("records", records) db_face_encoding_list = [] db_face_statue_list = [] db_face_date_list = [] if records.__len__() > 0: for record in records: encoding = np.array(record[3].split(" "), dtype=float) db_face_encoding_list.append(encoding) db_face_statue_list.append(record[2]) db_face_date_list.append(record[1]) print("record success") matches = face_recognition.compare_faces( db_face_encoding_list, person_face_encoding) print(matches) if True in matches: print("Faces match.") match_index = matches.index(True) saved_statue = db_face_statue_list[match_index] saved_date = db_face_date_list[match_index] self.parent.db_saved_date = saved_date self.parent.db_saved_statue = saved_statue self.parent.stop_flag = True if saved_statue == "allow": self.parent.message = "[Erlaubt!]" else: self.parent.message = "[Nicht erlaubt!]" self.parent.age_guessed_time = time.time() else: print("Faces not match.") self.check_db_flag = True else: print("There is no saved face encoding in db") self.check_db_flag = True except Exception as e: print("Failed checking database, it is face encoding problem") log_print(info_str=e)
def get_feature_token_words(self, text): sentences = self.text_processor.tokenize_sentence(text=text) text_features = [] for sentence in sentences: token_words = self.text_processor.tokenize_word( sample=sentence.text) for t_word in token_words: try: word_feature = self.model[t_word] text_features.append(word_feature) except Exception as e: log_print(e) text_feature = self.calculate_text_feature(word_features=text_features) return text_feature
def recognize_card(self, image, face_right): try: date = extract_birthday(img=image, id_type=self.window.document_type, base_line=face_right) date = date.replace(".", "") if date != "": birth_year = 0 if self.window.document_type == ID_TYPE[ 1] or self.window.document_type == ID_TYPE[2]: birth_year = int(date[-4:]) if self.window.document_type == ID_TYPE[0]: birth_year = 1900 + int(date[-2:]) date = date.replace(date[-2:], birth_year) birth_month = int(date[2:4]) birth_day = int(date[0:2]) init_real_age = int(datetime.date.today().year) - birth_year current_month = datetime.date.today().month current_day = datetime.date.today().day if current_month >= birth_month: if current_day >= birth_day: real_age = init_real_age else: real_age = init_real_age - 1 else: real_age = init_real_age - 1 self.real_age = real_age if self.real_age >= 18: self.age_statue = "Allow" else: self.age_statue = "Not Allow" self.window.show_recognition_result(date, self.real_age, self.age_statue) self.start_flag = False except Exception as e: log_print(info_str=e) pass time.sleep(0.01)
def estimate_social_distance(self, frame): distance = {} for fid_i in self.person_trackers.keys(): distance["person_{}".format(fid_i)] = {} for fid_j in self.person_trackers.keys(): if fid_i == fid_j: continue try: geometry = calculate_real_distance_two_persons(self.person_attributes[fid_i]["box"], self.person_attributes[fid_j]["box"]) except Exception as e: log_print(info_str=e) geometry = 0 distance["person_{}".format(fid_i)][fid_j] = geometry for fid_i in self.person_trackers.keys(): left, top, right, bottom = self.person_attributes[fid_i]["box"] # text = "person_" + str(fid_i) inter_dist = [] inter_person_id = [] close_ret = False for fid_j in distance["person_{}".format(fid_i)].keys(): if distance["person_{}".format(fid_i)][fid_j] <= SAFE_DISTANCE: inter_dist.append(distance["person_{}".format(fid_i)][fid_j]) inter_person_id.append(fid_j) close_ret = True if close_ret: # min_dist = min(inter_dist) # min_person_id = inter_person_id[inter_dist.index(min_dist)] # warning_str = text + ";" + "person_" + str(min_person_id) + ":" + str(min_dist) + "cm" cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2) # cv2.putText(frame, warning_str, (left, max(top - 10, 0)), cv2.FONT_HERSHEY_TRIPLEX, 1, # (0, 0, 255), 2) else: cv2.rectangle(frame, (left, top), (right, bottom), (0, 255, 0), 2) # cv2.putText(frame, text, (left, max(top - 10, 0)), cv2.FONT_HERSHEY_TRIPLEX, 1, # (0, 255, 0), 2) cv2.putText(frame, str(fid_i + 1), (left, max(top - 5, 0)), cv2.FONT_HERSHEY_TRIPLEX, 1, (0, 255, 0), 2) return frame
def set_detections(self, detections, timestamps): # Find the location among all recent face locations where this would belong for bbox, timestamp in zip(detections, timestamps): idx, dist = self.find_nearest_face(bbox) try: if dist is not None and dist < 100: self.faces[int(idx)]['bboxes'].append(bbox) self.faces[int(idx)]['timestamps'].append(timestamp) if len(self.faces[int(idx)]['bboxes']) > 7: self.faces[int(idx)]['bboxes'].pop(0) self.faces[int(idx)]['timestamps'].pop(0) else: # This is a new face not in the scene before self.faces.append({ 'timestamps': [timestamp], 'bboxes': [bbox] }) except Exception as e: log_print(info_str=e) # Clean old detections: now = time.time() faces_to_remove = [] for i, face in enumerate(self.faces): if now - face['timestamps'][-1] > 0.5: faces_to_remove.append(i) for i in faces_to_remove: try: self.faces.pop(i) except Exception as e: # Face was deleted by other thread. log_print(info_str=e)
def face_compare(self): try: from src.database.manager import DatabaseManager face_encoding1 = None for i in range(0, self.person_face_image_list.__len__()): try: face_encoding1 = face_recognition.face_encodings( self.person_face_image_list[i])[0] break except Exception as e: log_print(info_str=e) print("can't find face feature from image") face_locations = face_recognition.face_locations( self.card_face_image) face_encoding2 = face_recognition.face_encodings( self.card_face_image, face_locations) matches = face_recognition.compare_faces(face_encoding2, face_encoding1) print(matches) if True in matches: print("Faces match.") self.age_guess_thread.message = "[Erlaubt!]" self.age_guess_thread.age_guessed_time = time.time() DatabaseManager().save_face_encoding_to_db( face_encoding1, "allow") else: print("Faces not match.") self.age_guess_thread.message = "[Nicht erlaubt!]" self.age_guess_thread.age_guessed_time = time.time() DatabaseManager().save_face_encoding_to_db( face_encoding1, "not allow") self.person_face_image_list = [] except Exception as e: print( "Faces not captured or can't not find features from face images!" ) log_print(info_str=e)
def main(self): self.socket_connection() while True: try: if self.terminate_flag: break self.send_data() self.receive_data() except Exception as e: log_print(info_str=e) time.sleep(0.5) connected = False print(" Client Disconnected ") while not connected: try: self.socket_connection() connected = True print(" Client Reconnected") except socket.error: pass time.sleep(0.2)
def run(self): similarity_values = [] file_name = ntpath.basename(INPUT_EXCEL_PATH).replace(".xlsx", "") output_file_path = os.path.join( OUTPUT_DIR, f"{file_name}_{SHEET_NAME}_result.csv") input_df = pd.read_excel(INPUT_EXCEL_PATH, sheet_name=SHEET_NAME) master_key = input_df.iloc[1, 1] master_feature = self.feature_extractor.get_feature_token_words( text=master_key) statements = input_df.iloc[2:, 3].values.tolist() if statements: for s_des in statements: try: s_des_feature = self.feature_extractor.get_feature_token_words( text=s_des) proximity = cosine_similarity([master_feature], [s_des_feature]) similarity_values.append(proximity[0][0]) except Exception as e: similarity_values.append("None") log_print(e) output_df = pd.DataFrame([[master_key], statements, similarity_values]).T output_df.to_csv( output_file_path, index=True, header=["Master Key", "Statements", "Proximity Score"], mode='w') print(f"[INFO] Successfully saved in {output_file_path}") else: print( f"[INFO] There are not any statements to estimate in {SHEET_NAME}" ) return
def estimate_frame_rotation(json_result): rotation_res = None for j_res in json_result["textAnnotations"][1:]: try: if "Kies" in j_res["description"]: if j_res["description"].replace("Kies", "").isdigit(): j_res_vertices = j_res["boundingPoly"]["vertices"] if abs(j_res_vertices[0]["y"] - j_res_vertices[1]["y"]) > ROTATION_Y_THREAD: if j_res_vertices[0]["y"] > j_res_vertices[1]["y"]: rotation_res = "clockwise" else: rotation_res = "anti_clockwise" else: if j_res_vertices[0]["x"] > j_res_vertices[1]["x"]: rotation_res = "reflection" else: rotation_res = None break except Exception as e: print(e) log_print(info_str=e) return rotation_res
def main(self, pdf_path): self.pdf_info = {"report": {}, "unit": [], "motorist": {}, "occupant": {}} pdf_images = [np.array(page) for page in convert_from_path(pdf_path, 200)] file_name = ntpath.basename(pdf_path).replace(".pdf", "") for i, pdf_image in enumerate(pdf_images): try: pdf_frame_path = os.path.join(PDF_IMAGES_DIR, f"{file_name}_{i}.jpg") cv2.imwrite(pdf_frame_path, pdf_image) self.pdf_info = self.extract_page_info(pdf_page_frame_path=pdf_frame_path, file_name=file_name, index=i) except Exception as e: print(e) log_print(e) for info_key in self.pdf_info.keys(): if "report_number" in self.pdf_info["report"].keys(): if info_key == "unit": for unit_info in self.pdf_info[info_key]: unit_info["report_number"] = self.pdf_info["report"]["report_number"] elif info_key in ["motorist", "occupant"]: if "unit_1" in self.pdf_info[info_key].keys(): self.pdf_info[info_key]["unit_1"]["report_number"] = self.pdf_info["report"]["report_number"] return self.pdf_info
def get_feature_token_words(self, text, supported_vocab=None): sentences = self.text_processor.tokenize_sentence(text=text) text_features = [] if supported_vocab is not None: vocabs = supported_vocab.split(";") for vocab in vocabs: try: vocab_feature = self.model[vocab.replace(" ", "")] text_features.append(vocab_feature) except Exception as e: log_print(e) for sentence in sentences: token_words = self.text_processor.tokenize_word( sample=sentence.text) for t_word in token_words: try: word_feature = self.model[t_word] text_features.append(word_feature) except Exception as e: log_print(e) text_feature = self.calculate_text_feature(word_features=text_features) return text_feature
def run(): try: frame1_url = request.args.get('img1') frame2_url = request.args.get('img2') frame1_path = get_image_from_url(https_url=frame1_url, file_id="1") frame2_path = get_image_from_url(https_url=frame2_url, file_id="2") processed_frame1_path = preprocess_image(frame_path=frame1_path, file_id="1") processed_frame2_path = preprocess_image(frame_path=frame2_path, file_id="2") _, similarity = compare_two_images(frame1_path=processed_frame1_path, frame2_path=processed_frame2_path) ss = 1 - math.exp(EXP_CONST * similarity) data = {'score': ss} response = json.dumps(data) for path in glob.glob(os.path.join(CUR_DIR, 'utils', '*.jpg')): os.remove(path) return response except Exception as e: log_print(info_str=e) data = {'score': 0} response = json.dumps(data) return response
def process_one_frame(self, frame_path): social_distance_result = {"danger": [], "safe": []} frame = cv2.imread(frame_path) height, width = frame.shape[:2] file_name = ntpath.basename(frame_path) st_time = time.time() boxes, confidences = self.person_detector.detect_person_yolo( frame=frame) filtered_idx, _ = non_max_suppression_slow(boxes=np.array(boxes), keys=range(len(boxes))) # filtered_idx = cv2.dnn.NMSBoxes(boxes, confidences, DETECT_CONFIDENCE, OVERLAP_THRESH) print(time.time() - st_time) if len(filtered_idx) > 0: # idf = filtered_idx.flatten() center = [] distance = {} for i in filtered_idx: (x1, y1) = (boxes[i][0], boxes[i][1]) (x2, y2) = (boxes[i][2], boxes[i][3]) center.append([x1, y1, x2, y2]) for i in range(len(center)): distance["person_{}".format(i)] = {} for j in range(len(center)): if i == j: continue try: geometry = calculate_real_distance_two_persons( center[i], center[j]) except Exception as e: log_print(info_str=e) geometry = 0 distance["person_{}".format(i)][j] = geometry for i in range(len(center)): left, top, right, bottom = center[i] text = "person_" + str(i + 1) inter_dist = [] inter_person_id = [] close_ret = False for j in distance["person_{}".format(i)].keys(): if i == j: continue if distance["person_{}".format(i)][j] <= SAFE_DISTANCE: inter_dist.append(distance["person_{}".format(i)][j]) inter_person_id.append(j) close_ret = True if close_ret: min_dist = min(inter_dist) min_person_id = inter_person_id[inter_dist.index(min_dist)] warning_str = text + "; " + "person_" + str( min_person_id + 1) + ":" + str(min_dist) + "cm" social_distance_result["danger"].append(warning_str) cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2) # cv2.putText(frame, warning_str, (x, max(y - 10, 0)), cv2.FONT_HERSHEY_TRIPLEX, 1, # (0, 0, 255), 2) else: social_distance_result["safe"].append(text) cv2.rectangle(frame, (left, top), (right, bottom), (0, 255, 0), 2) cv2.putText(frame, str(i + 1), (left, max(top - 3, 0)), cv2.FONT_HERSHEY_TRIPLEX, height / 1500, (0, 255, 0), 3) print(social_distance_result) if width >= 800: fx = 800 / width else: fx = 1 cv2.imwrite(os.path.join(UPLOAD_FOLDER, file_name), cv2.resize(frame, None, fx=fx, fy=fx)) # cv2.imshow("social distance", frame) # cv2.waitKey() return file_name, social_distance_result
def detect_one_frame(self, img, csi_port): img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) faces = self.face_cascade.detectMultiScale(img, 1.3, 5) if len(faces) == 0: self.face_manager.update_face_info(info=self.face_info) else: face_boxes = [] ages = [] genders = [] for (x, y, w, h) in faces: if w > 90: # ignore small faces # mention detected face """overlay = img.copy(); output = img.copy(); opacity = 0.6 cv2.rectangle(img,(x,y),(x+w,y+h),(128,128,128),cv2.FILLED) #draw rectangle to main image cv2.addWeighted(overlay, opacity, img, 1 - opacity, 0, img)""" cv2.rectangle(img, (x, y), (x + w, y + h), (128, 128, 128), 1) # draw rectangle to main image # extract detected face detected_face = img[int(y):int(y + h), int(x):int(x + w)] # crop detected face face_box = [int(y), int(x + w), int(y + h), int(x)] try: # age gender data set has 40% margin around the face. expand detected face. margin = 30 margin_x = int((w * margin) / 100) margin_y = int((h * margin) / 100) detected_face = img[ adjust_boundary_value(int(y - margin_y), 0): adjust_boundary_value(int(y + h + margin_y), img.shape[0]), adjust_boundary_value(int(x - margin_x), 0): adjust_boundary_value(int(x + w + margin_x), img.shape[1])] face_box = [ adjust_boundary_value(int(y - margin_y), 0), adjust_boundary_value(int(x + w + margin_x), img.shape[1]), adjust_boundary_value(int(y + h + margin_y), img.shape[0]), adjust_boundary_value(int(x - margin_x), 0) ] except Exception as e: log_print(e) # print("detected face has no margin") # print(e) try: # vgg-face expects inputs (224, 224, 3) detected_face = cv2.resize(detected_face, (224, 224)) img_pixels = image.img_to_array(detected_face) img_pixels = np.expand_dims(img_pixels, axis=0) img_pixels /= 255 # find out age and gender age_distributions = self.age_mdl.predict(img_pixels) apparent_age = int( np.floor( np.sum(age_distributions * self.output_indexes, axis=1))[0]) gender_distribution = self.gender_mdl.predict( img_pixels)[0] gender_index = np.argmax(gender_distribution) face_boxes.append(face_box) ages.append(apparent_age) if LOCAL: enable_gender_icons = True genders.append(gender_index) if gender_index == 0: gender = "F" else: gender = "M" # background for age gender declaration info_box_color = (46, 200, 255) # triangle_cnt = np.array([(x+int(w/2), y+10), (x+int(w/2)-25, y-20), # (x+int(w/2)+25, y-20)]) triangle_cnt = np.array([ (x + int(w / 2), y), (x + int(w / 2) - 20, y - 20), (x + int(w / 2) + 20, y - 20) ]) cv2.drawContours(img, [triangle_cnt], 0, info_box_color, -1) cv2.rectangle(img, (x + int(w / 2) - 50, y - 20), (x + int(w / 2) + 50, y - 90), info_box_color, cv2.FILLED) # labels for age and gender cv2.putText(img, apparent_age, (x + int(w / 2), y - 45), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 111, 255), 2) male_icon = cv2.imread(MALE_ICON_PATH) female_icon = cv2.imread(FEMALE_ICON_PATH) if enable_gender_icons: if gender == 'M': gender_icon = male_icon else: gender_icon = female_icon img[y - 75:y - 75 + male_icon.shape[0], x + int(w / 2) - 45:x + int(w / 2) - 45 + male_icon.shape[1]] = gender_icon else: cv2.putText(img, gender, (x + int(w / 2) - 42, y - 45), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 111, 255), 2) except Exception as e: log_print(e) # print("exception", str(e)) self.face_info, move_in_idx, move_out_idx = self.face_manager.recognize_face( face_info=self.face_info, face_box=face_boxes, rgb_frame=img_rgb, age=ages, gender=genders, csi_port=csi_port) if move_in_idx: if LOCAL: print("id:", self.face_info["id"]) print("age:", self.face_info["age"]) print("gender:", self.face_info["gender"]) print("tstamp:", self.face_info["t_stamp"]) print("status:", self.face_info["type"]) else: send_data(data=self.face_info, send_idx=move_in_idx) if move_out_idx: if LOCAL: print("id:", self.face_info["id"]) print("age:", self.face_info["age"]) print("gender:", self.face_info["gender"]) print("tstamp:", self.face_info["t_stamp"]) print("status:", self.face_info["type"]) else: send_data(data=self.face_info, send_idx=move_out_idx) return
processed_file_names = [] for processed_f_path in processed_files: f_name = extract_file_name(file_path=processed_f_path) processed_file_names.append(f_name) total_lens = len(input_image_path) for i, path in enumerate(input_image_path): file_name = ntpath.basename(path).replace(".jpg", "") if file_name in processed_file_names: continue print("Process {}-({} / {})".format(path, i + 1, total_lens)) try: frame_content = process_ocr_text(frame_path=path) txt_file_path = os.path.join(OUTPUT_DIR, "{}.txt".format(file_name)) save_file(content=frame_content, filename=txt_file_path, method='w') log_print(info_str=path + "\n" + "Successfully processed") print("Successfully processed {}".format(path)) except Exception as e: log_print(info_str=path) log_print(info_str=e) for jpg_path in glob.glob(os.path.join(OUTPUT_DIR, "*.jpg")): os.remove(jpg_path)
def get_gender_info(json_data, blog_width): gender_dict = {"Women": "", "Men": ""} gender_data = [] gender_ranges = [] percent_ranges = [] gender_json = json_data["textAnnotations"][1:] try: for _json in gender_json: if _json["description"] == "Gender": left = _json["boundingPoly"]["vertices"][0]["x"] - RANGE_MARGIN top = _json["boundingPoly"]["vertices"][0]["y"] - RANGE_MARGIN right = left + blog_width + RANGE_MARGIN for _json_1 in gender_json: women_center_x = int( 0.5 * (_json_1["boundingPoly"]["vertices"][0]["x"] + _json_1["boundingPoly"]["vertices"][1]["x"])) women_center_y = int( 0.5 * (_json_1["boundingPoly"]["vertices"][0]["y"] + _json_1["boundingPoly"]["vertices"][2]["y"])) if _json_1[ "description"] == "Women" and left <= women_center_x <= right and women_center_y >= top: for _json_2 in gender_json: if _json_2["description"] == "Men": men_center_x = int(0.5 * ( _json_2["boundingPoly"]["vertices"][0]["x"] + _json_2["boundingPoly"]["vertices"][1]["x"] )) men_center_y = int(0.5 * ( _json_2["boundingPoly"]["vertices"][0]["y"] + _json_2["boundingPoly"]["vertices"][2]["y"] )) if left <= men_center_x <= right and abs( men_center_y - women_center_y) <= LINE_DIFF: bottom = men_center_y + RANGE_MARGIN gender_ranges.append( [left, top, right, bottom]) percent_ranges.append([ _json["boundingPoly"]["vertices"][2] ["y"], _json_1["boundingPoly"] ["vertices"][0]["y"], _json_1["boundingPoly"]["vertices"][0] ["x"], _json_1["boundingPoly"] ["vertices"][1]["x"], _json_2["boundingPoly"]["vertices"][0] ["x"], _json_2["boundingPoly"] ["vertices"][1]["x"] ]) break break correct_range_num = 0 if len(gender_ranges) > 1: for i, location_range in enumerate(gender_ranges): for _json in gender_json: if "%" not in _json["description"]: continue center_x = int(0.5 * (_json["boundingPoly"]["vertices"][0]["x"] + _json["boundingPoly"]["vertices"][1]["x"])) center_y = int(0.5 * (_json["boundingPoly"]["vertices"][0]["y"] + _json["boundingPoly"]["vertices"][3]["y"])) if location_range[0] <= center_x <= location_range[2] and \ location_range[1] <= center_y <= location_range[3]: correct_range_num = i break left = gender_ranges[correct_range_num][0] top = gender_ranges[correct_range_num][1] right = gender_ranges[correct_range_num][2] bottom = gender_ranges[correct_range_num][3] percent_range = percent_ranges[correct_range_num] percent_top = percent_range[0] percent_bottom = percent_range[1] women_percent_left = percent_range[2] women_percent_right = percent_range[3] men_percent_left = percent_range[4] men_percent_right = percent_range[5] for _json in gender_json: range_center_x = int(0.5 * (_json["boundingPoly"]["vertices"][0]["x"] + _json["boundingPoly"]["vertices"][1]["x"])) range_center_y = int(0.5 * (_json["boundingPoly"]["vertices"][0]["y"] + _json["boundingPoly"]["vertices"][2]["y"])) if left <= range_center_x <= right and top <= range_center_y < bottom: gender_data.append(_json) for range_data in gender_data: percent_center_x = int( 0.5 * (range_data["boundingPoly"]["vertices"][0]["x"] + range_data["boundingPoly"]["vertices"][1]["x"])) percent_center_y = int( 0.5 * (range_data["boundingPoly"]["vertices"][0]["y"] + range_data["boundingPoly"]["vertices"][2]["y"])) if women_percent_left <= percent_center_x <= women_percent_right and \ percent_top <= percent_center_y <= percent_bottom: gender_dict["Women"] += range_data["description"] if men_percent_left <= percent_center_x <= men_percent_right and \ percent_top <= percent_center_y <= percent_bottom: gender_dict["Men"] += range_data["description"] return gender_dict except Exception as e: log_print(info_str=e) return gender_dict
def extract_motorist_occupant_page(self, json_info, frame_path): info = {} unit_number = "" owner_name = "" birth_date = "" age = "" gender = "" address = "" phone = "" injuries = "" seating_position = "" frame = cv2.imread(frame_path) frame_height, frame_width = frame.shape[:2] unit_cnt = 0 sorted_json_info = sorted(json_info, key=lambda k: k["boundingPoly"]["vertices"][0]["y"]) for i, _json in enumerate(sorted_json_info): unit_center_x = 0.5 * (_json["boundingPoly"]["vertices"][0]["x"] + _json["boundingPoly"]["vertices"][1]["x"]) unit_center_y = 0.5 * (_json["boundingPoly"]["vertices"][0]["y"] + _json["boundingPoly"]["vertices"][3]["y"]) if _json["description"].lower() == "unit" and unit_center_x < 0.15 * frame_width and \ unit_center_y < 0.5 * frame_height: unit_number_candidates, left, right, top, bottom = self.get_json_candidates(json_info=sorted_json_info, f_json=_json, s_json=_json) for candi in unit_number_candidates: if candi["boundingPoly"]["vertices"][1]["x"] < \ sorted_json_info[i + 1]["boundingPoly"]["vertices"][1]["x"] + 30 \ and candi["description"].replace(",", "").replace(".", "").isdigit(): unit_number += candi["description"] if unit_number == "" or unit_number == "0": unit_number = extract_digit_roi(roi_frame=frame[top:bottom, left + 15:right - 15]) if unit_number != "": unit_cnt += 1 if unit_number == "0": unit_number += str(unit_cnt) info[f"unit_{unit_cnt}"] = {} info[f"unit_{unit_cnt}"]["unit_number"] = unit_number.replace(",", "").replace(".", "") unit_number = "" if unit_cnt > 0: cnt = 1 for i, _json in enumerate(sorted_json_info): if _json["description"].lower() == "name" and sorted_json_info[i + 2]["description"].lower() == "last": json_width = int(0.64 * frame_width) - \ sorted_json_info[i + 2]["boundingPoly"]["vertices"][1]["x"] - 5 owner_name_candidates, _, _, _, _ = self.get_json_candidates(json_info=sorted_json_info, f_json=_json, s_json=sorted_json_info[i + 1], diff_bottom_height=40, diff_left_width=5, diff_right_width=json_width) sorted_owner_name_candidates = sorted(owner_name_candidates, key=lambda k: k["boundingPoly"]["vertices"][0]["x"]) for candi in sorted_owner_name_candidates: owner_name += candi["description"] + " " info[f"unit_{cnt}"]["name"] = owner_name owner_name = "" elif _json["description"].lower() == "date" and \ sorted_json_info[i + 2]["description"].lower() == "birth": json_width = sorted_json_info[i + 2]["boundingPoly"]["vertices"][1]["x"] - \ _json["boundingPoly"]["vertices"][0]["x"] birth_date_candidates = self.perform_ocr_roi_frame(f_json=_json, s_json=sorted_json_info[i + 2], frame=frame, diff_width=json_width, diff_bottom=50) # if unit_cnt < 1: # break # if cnt == 0 or "birth_date" in info[f"unit_{cnt}"]: # cnt += 1 for candi in birth_date_candidates: if candi["description"].lower() not in ["date", "of", "birth"]: birth_date += candi["description"] birth_date = birth_date.replace(",", "").replace(".", "").replace("|", "") if len(birth_date) == 10: birth_date = birth_date[:2] + "/" + birth_date[3:5] + "/" + birth_date[6:] info[f"unit_{cnt}"]["birth_date"] = birth_date birth_date = "" elif _json["description"].lower() == "age": json_width = _json["boundingPoly"]["vertices"][1]["x"] - _json["boundingPoly"]["vertices"][0]["x"] age_candidates = self.perform_ocr_roi_frame(frame=frame, f_json=_json, s_json=_json, diff_width=json_width) for candi in age_candidates: if candi["description"].lower() != "age": age += candi["description"] info[f"unit_{cnt}"]["age"] = age.replace(",", "").replace(".", "") age = "" elif _json["description"].lower() == "gender": gender_candidates = self.perform_ocr_roi_frame(frame=frame, f_json=_json, s_json=_json, diff_width=7) for candi in gender_candidates: if candi["description"].lower() not in ["gender", "gende"]: gender += candi["description"] info[f"unit_{cnt}"]["gender"] = gender.replace(",", "").replace(".", "") gender = "" elif _json["description"].lower() == "address" and \ sorted_json_info[i + 1]["description"].lower() == ":": json_width = int(0.64 * frame_width) - \ sorted_json_info[i + 1]["boundingPoly"]["vertices"][1]["x"] - 5 address_candidates, _, _, _, _ = self.get_json_candidates(json_info=sorted_json_info, f_json=_json, s_json=sorted_json_info[i + 1], diff_right_width=json_width, diff_left_width=5) for candi in address_candidates: address += candi["description"] + " " info[f"unit_{cnt}"]["address"] = address.replace(",", "").replace(".", "") address = "" elif _json["description"].lower() == "contact" and \ sorted_json_info[i + 1]["description"].lower() == "phone": phone_candidates, _, _, _, _ = self.get_json_candidates(json_info=sorted_json_info, f_json=_json, s_json=sorted_json_info[i + 1], diff_left_width=5) for j, candi in enumerate(phone_candidates): if candi["boundingPoly"]["vertices"][1]["x"] - candi["boundingPoly"]["vertices"][0]["x"] > 2: phone += candi["description"] info[f"unit_{cnt}"]["phone"] = phone.replace(",", "").replace(".", "") phone = "" elif _json["description"].lower() == "injuries": injuries_company_candidates, left, right, top, bottom = \ self.get_json_candidates(json_info=sorted_json_info, f_json=_json, s_json=_json, diff_right_width=5) for candi in injuries_company_candidates: injuries += candi["description"] + " " if injuries == "": injuries = extract_digit_roi(roi_frame=frame[top:bottom, left:right]) info[f"unit_{cnt}"]["injuries"] = injuries.replace(",", "").replace(".", "") injuries = "" if "seating_position" in list(info[f"unit_{cnt}"].keys()): if cnt == unit_cnt: break cnt += 1 elif _json["description"].lower() == "seating" and \ sorted_json_info[i + 1]["description"].lower() == "position": seating_position_candidates, left, right, top, bottom = \ self.get_json_candidates(json_info=sorted_json_info, f_json=_json, s_json=sorted_json_info[i + 1], diff_left_width=5, diff_right_width=5) left = max(left, _json["boundingPoly"]["vertices"][0]["x"] - 3) if right == 0: right = sorted_json_info[i + 1]["boundingPoly"]["vertices"][1]["x"] + 3 else: right = min(right, sorted_json_info[i + 1]["boundingPoly"]["vertices"][1]["x"] + 3) for candi in seating_position_candidates: if left <= 0.5 * (candi["boundingPoly"]["vertices"][0]["x"] + candi["boundingPoly"]["vertices"][1]["x"]) <= right: seating_position += candi["description"] if seating_position == "": seating_position = extract_digit_roi(roi_frame=frame[top:bottom, left:right]) info[f"unit_{cnt}"]["seating_position"] = seating_position.replace(",", "").replace(".", "") seating_position = "" if "injuries" in list(info[f"unit_{cnt}"].keys()): if cnt == unit_cnt: break cnt += 1 del_keys = [] for i_key in info.keys(): try: if info[i_key]["name"] == "" and info[i_key]["birth_date"] == "": del_keys.append(i_key) except Exception as e: del_keys.append(i_key) log_print(e) for d_key in del_keys: info.pop(d_key, None) return info
def run(self): control_similarities = [] risk_similarities = [] control_similarity_values = [] similarity_rations = [] risk_similarity_values = [] avg_similarity_values = [] file_name = ntpath.basename(INPUT_EXCEL_PATH).replace(".xlsx", "") output_file_path = os.path.join(OUTPUT_DIR, f"{file_name}_result.csv") input_df = pd.read_excel(INPUT_EXCEL_PATH) risk_descriptions = input_df["Risk Description"].values.tolist() control_descriptions = input_df["Control Description"].values.tolist() control_features = [] for c_des in control_descriptions: try: c_des_feature = self.feature_extractor.get_feature_token_words(text=c_des) except Exception as e: c_des_feature = None log_print(e) control_features.append(c_des_feature) for i, c_i_feature in enumerate(control_features): i_similarity = [] if c_i_feature is not None: for j, c_j_feature in enumerate(control_features): if j == i or c_j_feature is None: continue i_j_similarity = cosine_similarity([c_i_feature], [c_j_feature]) if i_j_similarity[0][0] >= SIMILARITY_THRESH: i_similarity.append([j, i_j_similarity[0][0]]) if not i_similarity: control_similarities.append("NA") risk_similarities.append("NA") control_similarity_values.append("NA") similarity_rations.append("NA") risk_similarity_values.append("NA") avg_similarity_values.append("NA") else: sorted_similarity = sorted(i_similarity, key=lambda k: k[1], reverse=True)[:SIMILARITY_NUMBER] similarity_indices = np.array(sorted_similarity)[:, 0].astype(np.int) init_controls = "" init_risks = "" init_control_scores = "" init_rations = "" init_risk_scores = "" init_avg_scores = "" for m, s_index in enumerate(similarity_indices): risk_des_feature = self.feature_extractor.get_feature_token_words(text=risk_descriptions[i]) risk_similar_feature = \ self.feature_extractor.get_feature_token_words(text=risk_descriptions[s_index]) risk_similarity = cosine_similarity([risk_des_feature], [risk_similar_feature]) init_controls += control_descriptions[s_index] + "," init_risks += str(risk_descriptions[s_index]) + "," init_control_scores += str(sorted_similarity[m][1]) + "," init_risk_scores += str(risk_similarity[0][0]) + "," init_avg_scores += str(0.5 * (risk_similarity[0][0] + sorted_similarity[m][1])) + "," if sorted_similarity[m][1] >= 0.75: init_rations += "high" + "," elif 0.5 < sorted_similarity[m][1] < 0.75: init_rations += "medium" + "," else: init_rations += "low" + "," control_similarities.append(init_controls[:-1]) risk_similarities.append(init_risks[:-1]) control_similarity_values.append(init_control_scores[:-1]) similarity_rations.append(init_rations[:-1]) risk_similarity_values.append(init_risk_scores) avg_similarity_values.append(init_avg_scores) print(f"Processed Control Description {i + 1} rows") input_df["Similar Sentences"] = control_similarities input_df["Risk Sentences"] = risk_similarities input_df["Similar Values"] = control_similarity_values input_df["Similar Rations"] = similarity_rations input_df["Risk Values"] = risk_similarity_values input_df["Average Values"] = avg_similarity_values input_df.to_csv(output_file_path, index=True, header=True, mode="w") print(f"[INFO] Successfully saved in {output_file_path}") return
def extract_info_from_json(self, json_result): json_width = json_result["textAnnotations"][0]["boundingPoly"][ "vertices"][1]["x"] text_info = json_result["textAnnotations"][1:] if self.pdf_type == "type11": for i, t_info in enumerate(text_info): for k, t_coord in enumerate( t_info["boundingPoly"]["vertices"]): if "y" not in t_coord.keys(): text_info[i]["boundingPoly"]["vertices"][k]["y"] = 0 if not search_specific_info(json_result=text_info, search_word=["Zeit", "Zelt"]): self.pdf_type = "type11_B" elif self.pdf_type == "type7": if not search_specific_info(json_result=text_info, search_word=["Anwendung"]): self.pdf_type = "type7_B" type_model_info = TYPE_INFO[self.pdf_type] for i, t_info in enumerate(text_info): try: t_info_vertices = t_info["boundingPoly"]["vertices"] t_left = t_info_vertices[0]["x"] t_right = t_info_vertices[1]["x"] t_top = t_info_vertices[0]["y"] t_bottom = t_info_vertices[2]["y"] t_width = t_right - t_left t_height = t_bottom - t_top t_des = t_info["description"] if "Kies" in t_des: if t_des.replace( "Kies", "").isdigit() and self.field_info["Barcode"] == "": self.field_info["Barcode"] = t_des for type_key in type_model_info.keys(): sub_type_info = type_model_info[type_key] if t_des in sub_type_info[ "search_word"] and self.field_info[type_key] == "": if (self.pdf_type == "type4" and type_key == "Lieferschein_Nr") and \ (text_info[i + 1]["description"] != "-" or text_info[i + 2]["description"] != "Nr"): continue if (self.pdf_type == "type5" or self.pdf_type == "type11") and type_key == "Lieferschein_Nr" \ and t_right < 0.5 * json_width: continue self.field_info[type_key] = \ extract_roi_info(json_result=text_info[:i] + text_info[i + 1:], roi_left=eval(sub_type_info["left"]), pdf_type=self.pdf_type, roi_right=eval(sub_type_info["right"]), roi_top=eval(sub_type_info["top"]), roi_bottom=eval(sub_type_info["bottom"]), label=type_key) if t_width > t_height: pass except Exception as e: log_print(info_str=e) # print(e) if (self.pdf_type == "type11" or self.pdf_type == "type11_B") and \ t_info["description"] in ["Lleterscheln", "Lieferschein"]: self.field_info["Lieferschein_Nr"] = \ extract_roi_info(json_result=text_info[i + 1:], roi_left=t_info["boundingPoly"]["vertices"][2]["x"], roi_right=2 * t_info["boundingPoly"]["vertices"][2]["x"] - t_info ["boundingPoly"]["vertices"][3]["x"], roi_top=-2, roi_bottom=1.2 * t_info["boundingPoly"]["vertices"][2]["y"], y_top=1, label="Lieferschein_Nr", pdf_type=self.pdf_type) self.field_info["Volume"] = self.field_info["Volume"].replace(".", ",") self.field_info["Gewicht"] = self.field_info["Gewicht"].replace( ".", ",") return self.field_info
def get_discovery_info(json_data, blog_width): discovery_dict = { "Reach": { "Start Date": "", "End Date": "" }, "Impression": { "Start Date": "", "End Date": "" } } discovery_data = [] discovery_ranges = [] discovery_json = json_data["textAnnotations"][1:] try: for _json in discovery_json: if _json["description"] == "Discovery": left = _json["boundingPoly"]["vertices"][0]["x"] - RANGE_MARGIN top = _json["boundingPoly"]["vertices"][0]["y"] - RANGE_MARGIN right = left + blog_width + RANGE_MARGIN for _json_1 in discovery_json: interaction_center_x = int( 0.5 * (_json_1["boundingPoly"]["vertices"][0]["x"] + _json_1["boundingPoly"]["vertices"][1]["x"])) interaction_center_y = int( 0.5 * (_json_1["boundingPoly"]["vertices"][0]["y"] + _json_1["boundingPoly"]["vertices"][2]["y"])) if _json_1["description"] == "Interactions" and left <= interaction_center_x <= right and \ interaction_center_y >= top: bottom = interaction_center_y + RANGE_MARGIN discovery_ranges.append([left, top, right, bottom]) break correct_range_num = 0 if len(discovery_ranges) > 1: for i, location_range in enumerate(discovery_ranges): for _json in discovery_json: if "Reach" not in _json[ "description"] or "Impressions" not in _json[ "description"]: continue center_x = int(0.5 * (_json["boundingPoly"]["vertices"][0]["x"] + _json["boundingPoly"]["vertices"][1]["x"])) center_y = int(0.5 * (_json["boundingPoly"]["vertices"][0]["y"] + _json["boundingPoly"]["vertices"][3]["y"])) if location_range[0] <= center_x <= location_range[2] and \ location_range[1] <= center_y <= location_range[3]: correct_range_num = i break left = discovery_ranges[correct_range_num][0] top = discovery_ranges[correct_range_num][1] right = discovery_ranges[correct_range_num][2] bottom = discovery_ranges[correct_range_num][3] for _json in discovery_json: range_center_x = int(0.5 * (_json["boundingPoly"]["vertices"][0]["x"] + _json["boundingPoly"]["vertices"][1]["x"])) range_center_y = int(0.5 * (_json["boundingPoly"]["vertices"][0]["y"] + _json["boundingPoly"]["vertices"][2]["y"])) if left <= range_center_x <= right and top <= range_center_y < bottom: discovery_data.append(_json) reach_top = None reach_bottom = None impression_top = None impression_bottom = None for range_data in discovery_data: if range_data["description"] == "Reach": reach_top = range_data["boundingPoly"]["vertices"][2]["y"] if range_data["description"] == "Impressions": reach_bottom = range_data["boundingPoly"]["vertices"][0]["y"] impression_top = range_data["boundingPoly"]["vertices"][2]["y"] if range_data["description"] == "Interactions": impression_bottom = range_data["boundingPoly"]["vertices"][0][ "y"] reach_data = "" impression_data = "" for range_data in discovery_data: center_y = int(0.5 * (range_data["boundingPoly"]["vertices"][0]["y"] + range_data["boundingPoly"]["vertices"][2]["y"])) if reach_top <= center_y <= reach_bottom: reach_data += range_data["description"] + " " if impression_top <= center_y <= impression_bottom: impression_data += range_data["description"] + "" discovery_dict["Reach"]["Start Date"] = reach_data[ reach_data.rfind(".") + 1:reach_data.rfind("-")] discovery_dict["Reach"]["End Date"] = reach_data[reach_data. rfind("-") + 1:] discovery_dict["Impression"]["Start Date"] = \ impression_data[impression_data.rfind(".") + 1:impression_data.rfind("-")] discovery_dict["Impression"]["End Date"] = reach_data[reach_data. rfind("-") + 1:] return discovery_dict except Exception as e: log_print(info_str=e) return discovery_dict
def get_age_range_values(frame_path, json_data): age_range_dict = {} age_items = ["All", "Men", "Women"] age_range_json = json_data["textAnnotations"][1:] age_ranges = [] age_range_left = 0 age_range_top = 0 age_range_right = 0 age_range_bottom = 0 first_age_range_coordinate = None second_age_range_coordinate = None third_age_range_coordinate = None forth_age_range_coordinate = None fifth_age_range_coordinate = None sixth_age_range_coordinate = None seventh_age_range_coordinate = None all_region = None men_region = None women_region = None try: for i, _json_1 in enumerate(age_range_json): if _json_1["description"] == "Age" and age_range_json[i + 1]["description"] == "Range": tmp_center_y_left = int(0.5 * (_json_1["boundingPoly"]["vertices"][0]["y"] + _json_1["boundingPoly"]["vertices"][2]["y"])) for j, _json_2 in enumerate(age_range_json): if _json_2["description"] == "Women" and age_range_json[j - 1]["description"] == "Men" and \ age_range_json[j - 2]["description"] == "All": tmp_center_y_right = int(0.5 * (_json_2["boundingPoly"]["vertices"][0]["y"] + _json_2["boundingPoly"]["vertices"][2]["y"])) if abs(tmp_center_y_left - tmp_center_y_right) < RANGE_MARGIN: age_range_left = _json_1["boundingPoly"]["vertices"][0]["x"] - RANGE_MARGIN age_range_top = _json_1["boundingPoly"]["vertices"][0]["y"] - RANGE_MARGIN age_range_right = _json_2["boundingPoly"]["vertices"][1]["x"] + RANGE_MARGIN break for _json_3 in age_range_json: if _json_3["description"] == "65" and \ age_range_left <= int(0.5 * (_json_3["boundingPoly"]["vertices"][0]["x"] + _json_3["boundingPoly"]["vertices"][1]["x"])) <= \ age_range_right: age_range_bottom = _json_3["boundingPoly"]["vertices"][2]["y"] + LINE_DIFF break age_ranges.append([age_range_left, age_range_top, age_range_right, age_range_bottom]) correct_range_nums = [] for i, age_range in enumerate(age_ranges): for _json in age_range_json: if "%" not in _json["description"]: continue center_x = int(0.5 * (_json["boundingPoly"]["vertices"][0]["x"] + _json["boundingPoly"]["vertices"][1]["x"])) center_y = int(0.5 * (_json["boundingPoly"]["vertices"][0]["y"] + _json["boundingPoly"]["vertices"][3]["y"])) if age_range[0] <= center_x <= age_range[2] and age_range[1] <= center_y <= age_range[3]: correct_range_nums.append(i) break blog_width = 0 for correct_range_num in correct_range_nums: tmp_dict = { "13 - 17": "", "18 - 24": "", "25 - 34": "", "35 - 44": "", "45 - 54": "", "55 - 64": "", "65 + ": "" } age_range_data = [] left = age_ranges[correct_range_num][0] top = age_ranges[correct_range_num][1] right = age_ranges[correct_range_num][2] bottom = age_ranges[correct_range_num][3] blog_width += right - left for i, _json in enumerate(age_range_json): range_center_x = int(0.5 * (_json["boundingPoly"]["vertices"][0]["x"] + _json["boundingPoly"]["vertices"][1]["x"])) range_center_y = int(0.5 * (_json["boundingPoly"]["vertices"][0]["y"] + _json["boundingPoly"]["vertices"][2]["y"])) if _json["description"] == "13" and age_range_json[i + 1]["description"] == "-" \ and left <= range_center_x < right and top <= range_center_y <= bottom: first_age_range_coordinate = [range_center_y, age_range_json[i + 2]["boundingPoly"]["vertices"][1]["x"]] if _json["description"] == "18" and age_range_json[i + 1]["description"] == "-" \ and left <= range_center_x < right and top <= range_center_y <= bottom: second_age_range_coordinate = [range_center_y, age_range_json[i + 2]["boundingPoly"]["vertices"][1]["x"]] if _json["description"] == "25" and age_range_json[i + 1]["description"] == "-" \ and left <= range_center_x < right and top <= range_center_y <= bottom: third_age_range_coordinate = [range_center_y, age_range_json[i + 2]["boundingPoly"]["vertices"][1]["x"]] if _json["description"] == "35" and age_range_json[i + 1]["description"] == "-" \ and left <= range_center_x < right and top <= range_center_y <= bottom: forth_age_range_coordinate = [range_center_y, age_range_json[i + 2]["boundingPoly"]["vertices"][1]["x"]] if _json["description"] == "45" and age_range_json[i + 1]["description"] == "-" \ and left <= range_center_x < right and top <= range_center_y <= bottom: fifth_age_range_coordinate = [range_center_y, age_range_json[i + 2]["boundingPoly"]["vertices"][1]["x"]] if _json["description"] == "55" and age_range_json[i + 1]["description"] == "-" \ and left <= range_center_x < right and top <= range_center_y <= bottom: sixth_age_range_coordinate = [range_center_y, age_range_json[i + 2]["boundingPoly"]["vertices"][1]["x"]] if _json["description"] == "65" and left <= range_center_x < right and top <= range_center_y <= bottom: seventh_age_range_coordinate = [range_center_y, age_range_json[i + 1]["boundingPoly"]["vertices"][1]["x"]] if left <= range_center_x <= right and top <= range_center_y < bottom: age_range_data.append(_json) for range_data in age_range_data: range_data_center_y = int(0.5 * (range_data["boundingPoly"]["vertices"][0]["y"] + range_data["boundingPoly"]["vertices"][2]["y"])) range_data_center_x = int(0.5 * (range_data["boundingPoly"]["vertices"][0]["x"] + range_data["boundingPoly"]["vertices"][1]["x"])) if range_data["description"] == "All": all_region = [range_data["boundingPoly"]["vertices"][0]["x"], range_data["boundingPoly"]["vertices"][0]["y"], range_data["boundingPoly"]["vertices"][2]["x"], range_data["boundingPoly"]["vertices"][2]["y"]] if range_data["description"] == "Men": men_region = [range_data["boundingPoly"]["vertices"][0]["x"], range_data["boundingPoly"]["vertices"][0]["y"], range_data["boundingPoly"]["vertices"][2]["x"], range_data["boundingPoly"]["vertices"][2]["y"]] if range_data["description"] == "Women": women_region = [range_data["boundingPoly"]["vertices"][0]["x"], range_data["boundingPoly"]["vertices"][0]["y"], range_data["boundingPoly"]["vertices"][2]["x"], range_data["boundingPoly"]["vertices"][2]["y"]] if abs(range_data_center_y - first_age_range_coordinate[0]) < LINE_DIFF: if range_data_center_x > first_age_range_coordinate[1]: tmp_dict["13 - 17"] += range_data["description"] if abs(range_data_center_y - second_age_range_coordinate[0]) < LINE_DIFF: if range_data_center_x > second_age_range_coordinate[1]: tmp_dict["18 - 24"] += range_data["description"] if abs(range_data_center_y - third_age_range_coordinate[0]) < LINE_DIFF: if range_data_center_x > third_age_range_coordinate[1]: tmp_dict["25 - 34"] += range_data["description"] if abs(range_data_center_y - forth_age_range_coordinate[0]) < LINE_DIFF: if range_data_center_x > forth_age_range_coordinate[1]: tmp_dict["35 - 44"] += range_data["description"] if abs(range_data_center_y - fifth_age_range_coordinate[0]) < LINE_DIFF: if range_data_center_x > fifth_age_range_coordinate[1]: tmp_dict["45 - 54"] += range_data["description"] if abs(range_data_center_y - sixth_age_range_coordinate[0]) < LINE_DIFF: if range_data_center_x > sixth_age_range_coordinate[1]: tmp_dict["55 - 64"] += range_data["description"] if abs(range_data_center_y - seventh_age_range_coordinate[0]) < LINE_DIFF: if range_data_center_x > seventh_age_range_coordinate[1]: tmp_dict["65 + "] += range_data["description"] age_item = select_item(frame_path=frame_path, region=[all_region, men_region, women_region], items=age_items) age_range_dict[age_item] = tmp_dict blog_width /= len(correct_range_nums) for item in age_items: tmp_dict = { "13 - 17": "", "18 - 24": "", "25 - 34": "", "35 - 44": "", "45 - 54": "", "55 - 64": "", "65 + ": "" } if item not in age_range_dict.keys(): age_range_dict[item] = tmp_dict return age_range_dict, blog_width except Exception as e: log_print(info_str=e) for item in age_items: tmp_dict = { "13 - 17": "", "18 - 24": "", "25 - 34": "", "35 - 44": "", "45 - 54": "", "55 - 64": "", "65 + ": "" } age_range_dict[item] = tmp_dict blog_width = 0 return age_range_dict, blog_width