def test_rh_put(): rh.post( "/metadata/1324_testcorpus_5768/" + os.environ["TESTUSER"] + "/xml/en/html/ajokielto.xml", { "uid": os.environ["TESTUSER"], "testkey": "testvalue" }) xml_metadata = rh.get( "/metadata/1324_testcorpus_5768/" + os.environ["TESTUSER"] + "/xml/en/html/ajokielto.xml", {"uid": os.environ["TESTUSER"]}) parser = xml_parser.XmlParser(xml_metadata.split("\n")) metadata = parser.getMetadata() assert metadata["testkey"] == "testvalue" rh.put( "/metadata/1324_testcorpus_5768/" + os.environ["TESTUSER"] + "/xml/en/html/ajokielto.xml", { "uid": os.environ["TESTUSER"], "testkey": "testvalue2" }) xml_metadata = rh.get( "/metadata/1324_testcorpus_5768/" + os.environ["TESTUSER"] + "/xml/en/html/ajokielto.xml", {"uid": os.environ["TESTUSER"]}) parser = xml_parser.XmlParser(xml_metadata.split("\n")) metadata = parser.getMetadata() assert "testvalue" in metadata["testkey"] and "testvalue2" in metadata[ "testkey"]
def test_corpus_settings(client): login(client, os.environ["TESTUSER"], os.environ["TESTPW"]) rh.delete("/storage/1233test_corpus5678", {"uid": os.environ["TESTUSER"]}) post_data = { "name": "1233test_corpus5678", "group": "group", "domain": "domain", "origin": "origin", "description": "description", "pdf_reader": "pdf_reader", "document_alignment": "document_alignment", "sentence_alignment": "sentence_alignment", "sentence_splitter": "sentence_splitter", "autoalignment": "on" } rv = client.post('/create_corpus', data=post_data, follow_redirects=True) rv = client.get('/corpus_settings/1233test_corpus5678') assert b'Corpus Settings' in rv.data xml_metadata = rh.get( "/metadata/1233test_corpus5678/" + os.environ["TESTUSER"], {"uid": os.environ["TESTUSER"]}) parser = xml_parser.XmlParser(xml_metadata.split("\n")) metadata = parser.getMetadata() field_dict = opusrepository.initialize_field_dict() for key in post_data.keys(): if key != "name": assert post_data[key] == metadata[field_dict[key][0]] post_data = { "name": "1233test_corpus5678", "group": "group2", "domain": "domain2", "origin": "origin2", "description": "description2", "pdf_reader": "pdf_reader2", "document_alignment": "document_alignment2", "sentence_alignment": "sentence_alignment2", "sentence_splitter": "sentence_splitter2", "autoalignment": "off" } rv = client.post('/corpus_settings/1233test_corpus5678', data=post_data, follow_redirects=True) xml_metadata = rh.get( "/metadata/1233test_corpus5678/" + os.environ["TESTUSER"], {"uid": os.environ["TESTUSER"]}) parser = xml_parser.XmlParser(xml_metadata.split("\n")) metadata = parser.getMetadata() field_dict = opusrepository.initialize_field_dict() for key in post_data.keys(): if key != "name": assert post_data[key] == metadata[field_dict[key][0]] rh.delete("/storage/1233test_corpus5678", {"uid": os.environ["TESTUSER"]})
def test_getFileContent(): xml_data = [ '<letsmt-ws version="56">', '<list path="/testcorpus/testuser/xml/en/html/ajokielto.xml">', '<entry><?xml version="1.0" encoding="utf-8"?>', '<letsmt version="1.0">', '<p id="1">', '<s id="1">If a person is sentenced to a punishment for causing a serious traffic hazard, '+\ 'driving while intoxicated or driving while seriously intoxicated, the court also imposes a '+\ 'driving ban for at most five years.</s>', '</p>', '</entry>', '</list>', '<status code="0" location="/storage/testcorpus/testuser/xml/en/html/ajokielto.xml"'+\ 'operation="GET" type="ok"></status>', '</letsmt-ws>' ] parser = xml_parser.XmlParser(xml_data) content = parser.getFileContent() correct = '<?xml version="1.0" encoding="utf-8"?>\n'+\ '<letsmt version="1.0">\n'+\ '<p id="1">\n'+\ '<s id="1">If a person is sentenced to a punishment for causing a serious traffic '+\ 'hazard, driving while intoxicated or driving while seriously intoxicated, the court '+\ 'also imposes a driving ban for at most five years.</s>\n'+\ '</p>\n' assert content == correct
def test_recursiveCorpora(): parser = xml_parser.XmlParser([]) parser.recursiveCorpora(ET.fromstring( '<list path="">'\ '<entry directory="testcorpus2/testuser" />'\ '</list>' )) assert len(parser.elementList) == 0 parser.recursiveCorpora(ET.fromstring( '<list path="">'\ '<entry path="testcorpus/testuser" />'\ '<entry path="testcorpus2/testuser" />'\ '</list>' )) assert parser.elementList[0] == "testcorpus" assert parser.elementList[1] == "testcorpus2" parser.recursiveCorpora(ET.fromstring( '<list path="">'\ '<entry path="testcorpus/testuser" />'\ '<entry path="testcorpus2/testuser" />'\ '</list>' )) assert len(parser.elementList) == 2 assert parser.elementList[0] == "testcorpus" assert parser.elementList[1] == "testcorpus2"
def test_getGroupOwner(): xml_data = [ '<letsmt-ws version="56">', '<list path="/group/">', '<entry id="testuser" kind="group" owner="testuser" testattr="testvalue">', '<user>testuser</user>', '</entry>', '</list>', '<status code="0" location="/group/testuser" operation="GET" type="ok"></status>', '</letsmt-ws>' ] parser = xml_parser.XmlParser(xml_data) assert parser.getGroupOwner() == "testuser"
def test_recursiveAttrTag(): element = ET.fromstring( '<list path="/group/">'\ '<entry id="testuser" kind="group" owner="testuser" testattr="testvalue">'\ '<user>testuser</user>'\ '</entry>'\ '</list>' ) parser = xml_parser.XmlParser([]) parser.recursiveAttrTag(element, "entry", "testattr") assert parser.elementString == "testvalue"
def test_groupsForUser(): xml_data = [ '<letsmt-ws version="56">', '<list path="group/testuser">', '<entry id="testuser" kind="user info">', '<member>testuser</member>', '<member_of>testuser,public</member_of>', '<my_group>testuser</my_group>', '</entry>', '</list>', '<status code="0" location="/group/testuser/" operation="GET" type="ok"></status>', '</letsmt-ws>' ] parser = xml_parser.XmlParser(xml_data) groups = parser.groupsForUser() assert groups[0] == "testuser" assert groups[1] == "public"
def test_itemExists(): xml_data = [ '<letsmt-ws version="56">', '<list path="/testcorpus">', '<entry kind="branch" path="/testcorpus/testuser">', '<name>testuser</name>', '<group>public</group>', '<owner>testuser</owner>', '</entry>', '</list>', '<status code="0" location="/storage/testcorpus" operation="GET" type="ok"></status>', '</letsmt-ws>' ] parser = xml_parser.XmlParser(xml_data) assert parser.itemExists() == True xml_data = [ '<letsmt-ws version="56">', '''<status code="6" location="/storage/testcorpusdoesnotexist" operation="GET"\ type="error">Cannot find/read slot 'testcorpusdoesnotexist'</status>''', '</letsmt-ws>' ] parser = xml_parser.XmlParser(xml_data) assert parser.itemExists() == False
def test_getJobs(): xml_data = [ '<letsmt-ws version="56">', '<list path="jobs">', '<entry name="job_1543831841_642742498" file="testcorpus/testuser/uploads/html.tar.gz"\ id="890" job="testcorpus/testuser/jobs/import/uploads/html.tar.gz.xml" status="RUNNING" />', '</list>', '<status code="0" location="job" operation="GET" type="ok" />', '</letsmt-ws>' ] parser = xml_parser.XmlParser(xml_data) jobs = parser.getJobs() assert jobs[0][0] == "testcorpus/testuser/uploads/html.tar.gz" assert jobs[0][1] == "RUNNING"
def test_collectToList(): xml_data = [ '<letsmt-ws version="56">', '<list path="/testcorpus">', '<entry kind="branch" path="/testcorpus/testuser">', '<name>testuser</name>', '<group>public</group>', '<owner>testuser</owner>', '</entry>', '</list>', '<status code="0" location="/storage/testcorpus" operation="GET" type="ok"></status>', '</letsmt-ws>' ] parser = xml_parser.XmlParser(xml_data) elementlist = parser.collectToList("group") assert len(elementlist) == 1 assert elementlist[0] == "public"
def test_branchesForCorpus(): xml_data = [ '<letsmt-ws version="56">', '<list path="/testcorpus">', '<entry kind="branch" path="/testcorpus/testuser">', '<name>testuser</name>', '<group>public</group>', '<owner>testuser</owner>', '</entry>', '</list>', '<status code="0" location="/storage/testcorpus" operation="GET" type="ok"></status>', '</letsmt-ws>' ] parser = xml_parser.XmlParser(xml_data) branches = parser.branchesForCorpus() assert len(branches) == 1 assert branches[0] == "testuser"
def get_webview_roi(self,xml_str): x1 = x2 = y1 = y2 = None xml_parser = xp.XmlParser() # xml_str = xml_parser.xml2str(xml_file) node_list1 = xml_parser.read_from_string(xml_str) if node_list1 is None: log.info('Read xml file FAILURE!!!') for n in node_list1: if n.get('class') == 'android.webkit.WebView': bound_value = n.get('bounds') x1, y1, x2, y2 = xml_parser.coord_transform(bound_value) break return (x1, y1, x2, y2)
def test_recursiveCollect(): element = ET.fromstring( '<list path="/testcorpus">'\ '<entry kind="branch" path="/testcorpus/testuser">'\ '<name>testuser</name>'\ '<group>public</group>'\ '<owner>testuser</owner>'\ '</entry>'\ '</list>' ) parser = xml_parser.XmlParser([]) parser.recursiveCollect(element, "name") assert parser.elementList[-1] == "testuser" parser.recursiveCollect(element, "group") assert parser.elementList[-1] == "public"
def test_getUsers(): xml_data = [ '<letsmt-ws version="56">', '<list path="/group/">', '<entry id="public" kind="group" owner="admin">', '<user>user1</user>', '<user>user2</user>', '<user>user3</user>', '<user>user4</user>', '</entry>', '</list>', '<status code="0" location="/group/public" operation="GET" type="ok"></status>', '</letsmt-ws>' ] parser = xml_parser.XmlParser(xml_data) users = parser.getUsers() assert len(users) == 4 assert users[0] == "user1" assert users[3] == "user4"
def test_corporaForUser(): xml_data = [ '<letsmt-ws version="56">'\ '<list path="">'\ '<entry path="testcorpus2/testuser" />'\ '<entry path="testcorpus/testuser" />'\ '</list>'\ '<status code="0" location="/metadata" operation="GET" type="ok">Found 2 matching entries</status>'\ '</letsmt-ws>' ] parser = xml_parser.XmlParser(xml_data) corpora = parser.corporaForUser() assert corpora[0] == "testcorpus2" assert corpora[1] == "testcorpus"
def test_recursiveGroups(): parser = xml_parser.XmlParser([]) parser.recursiveGroups( ET.fromstring('<entry id="testuser" kind="user info">\ <member>testuser</member>\ <my_group>testuser</my_group>\ </entry>')) assert len(parser.elementList) == 0 parser.recursiveGroups( ET.fromstring('<entry id="testuser" kind="user info">\ <member>testuser</member>\ <member_of>testuser,public</member_of>\ <my_group>testuser</my_group>\ </entry>')) assert parser.elementList[0] == "testuser" assert parser.elementList[1] == "public"
def get_filecontent(): if session: username = session["username"] path = request.args.get("path", "", type=str) #content = get_from_api_and_parse("/storage"+path, {"uid": username, "action": "cat", "to": "1000"}, "getFileContent") content = get_from_api_and_parse("/storage" + path, { "uid": username, "action": "cat" }, "getFileContent") parser = xml_parser.XmlParser(content.split("\n")) if "/tmx/" in path: content = parser.parseTMX() else: content = parser.parseDocXML() return jsonify(content=content)
def get_from_api_and_parse(path, parameters, function): rawXml = rh.get(path, parameters) parser = xml_parser.XmlParser(rawXml.split("\n")) parser_functions = { "corporaForUser": parser.corporaForUser, "groupsForUser": parser.groupsForUser, "getMetadata": parser.getMetadata, "getUsers": parser.getUsers, "branchesForCorpus": parser.branchesForCorpus, "navigateDirectory": parser.navigateDirectory, "getMonolingualAndParallel": parser.getMonolingualAndParallel, "getAlignCandidates": parser.getAlignCandidates, "getJobs": parser.getJobs, "getJobPath": parser.getJobPath, "getFileContent": parser.getFileContent, "itemExists": parser.itemExists } data = parser_functions[function]() return data
def dom_check_login_ui(j_dict): xml_dom_url = j_dict['dom_tree_url'] xml_parser = xp.XmlParser() xml_parser.get_xml_doc(xml_dom_url) node_list = xml_parser.get_node_list() # assert len(node_list) != 0, "Dom node list is EMPTY!!!" if len(node_list) == 0: return None detect_logon_txt_list = [] detect_user_account_txt_list = [] detect_pwd_txt_list = [] for n in node_list: attr_class_val = n.get('class') attr_text_val = n.get('text') attr_rc_id_val = n.get('resource-id') if check_in_cfg_itm_lis(attr_class_val, LOGON_WIDGET_CLASS) \ and (check_in_cfg_itm_lis(attr_text_val, LOGON_TEXT_ATTR) or check_in_cfg_itm_lis(attr_rc_id_val, LOGON_RC_ID_ATTR)): # print("logon button: ", n.get('bounds')) detect_logon_txt_list.append(n) if check_in_cfg_itm_lis(attr_class_val, USER_WIDGET_CLASS) \ and (check_in_cfg_itm_lis(attr_text_val, USER_TEXT_ATTR) or check_in_cfg_itm_lis(attr_rc_id_val, USER_RC_ID_ATTR)): # print("user edit box: ", n.get('bounds')) # detect_user_account_txt_list.append(n.get('bounds')) detect_user_account_txt_list.append(n) if check_in_cfg_itm_lis(attr_class_val, PWD_WIDGET_CLASS) \ and (check_in_cfg_itm_lis(attr_text_val, PWD_TEXT_ATTR) or check_in_cfg_itm_lis(attr_rc_id_val, PWD_RC_ID_ATTR)): # print("password edit box: ", n.get('bounds')) # detect_pwd_txt_list.append(n.get('bounds')) detect_pwd_txt_list.append(n) # assert len(detect_user_account_txt_list) == 1 \ # and len(detect_pwd_txt_list) == 1, "Detect ERROR !!!" return detect_user_account_txt_list, detect_pwd_txt_list, detect_logon_txt_list
def test_getMonolingualAndParallel(): xml_data = [ '<letsmt-ws version="56">', '<list path="">', '<entry path="testcorpus/testuser">', '<name>testuser</name>', '<gid>public</gid>', '<import_queue></import_queue>', '<langs>fi,en,sv</langs>', '<modif>2018-11-27 14:05:14</modif>', '<origin></origin>', '<owner>testuser</owner>', '<parallel-langs>fi-sv,en-fi,en-sv</parallel-langs>', '</entry>', '</list>', '<status code="0" location="/metadata/testcorpus/testuser" operation="GET"\ type="ok">Found matching path ID. Listing all of its properties</status>', '</letsmt-ws>' ] parser = xml_parser.XmlParser(xml_data) mopa = parser.getMonolingualAndParallel() assert len(mopa[0]) == 3 assert len(mopa[1]) == 3 for lan in [["fi", "dir"], ["sv", "dir"], ["en", "dir"]]: assert lan in mopa[0] for lan in [["fi-sv", "dir"], ["en-sv", "dir"], ["en-fi", "dir"]]: assert lan in mopa[1]
def test_recursiveMetadata(): element = ET.fromstring( '<entry path="testcorpus/testuser/uploads/html.tar.gz">'\ '<description>test</description>'\ '<direction>unknown</direction>'\ '<gid>public</gid>'\ '<owner>testuser</owner>'\ '<status>job canceled</status>'\ '</entry>' ) parser = xml_parser.XmlParser([]) parser.recursiveMetadata(element) correct = { "description": "test", "direction": "unknown", "gid": "public", "owner": "testuser", "status": "job canceled" } for key in correct.keys(): assert parser.elementDict[key] == correct[key]
def test_getMetadata(): xml_data = [ '<letsmt-ws version="56">', '<list path="">', '<entry path="testcorpus/testuser/uploads/html.tar.gz">', '<description>test</description>', '<direction>unknown</direction>', '<gid>public</gid>', '<owner>testuser</owner>', '<status>job canceled</status>', '</entry>', '</list>', '<status code="0" location="/metadata/testcorpus/testuser/uploads/html.tar.gz"\ operation="GET" type="ok">Found matching path ID. Listing all of its properties</status>', '</letsmt-ws>' ] parser = xml_parser.XmlParser(xml_data) metadata = parser.getMetadata() correct = { "description": "test", "direction": "unknown", "gid": "public", "owner": "testuser", "status": "job canceled" } for key in correct.keys(): assert metadata[key] == correct[key]
def test_getAlignmentCandidates(): xml_data = [ '<letsmt-ws version="56">', '<list path="testcorpus/testuser/">', '<entry path="testcorpus/testuser/xml/en/html/ajokielto.xml">', '<align-candidates>xml/fi/html/ajokielto.xml</align-candidates>', '<aligned_with>xml/fi/html/ajokielto.xml,xml/sv/html/ajokielto.xml</aligned_with>', '<gid>public</gid>', '<imported_from>uploads/html.tar.gz:html/en/ajokielto.html</imported_from>', '<language>en</language>', '<owner>testuser</owner>', '<parsed>ud/en/html/ajokielto.xml</parsed>', '<resource-type>corpusfile</resource-type>', '<size>38</size>', '<status>successfully aligned with testcorpus/testuser/xml/sv/html/ajokielto.xml</status>', '</entry>', '</list>', '<status code="0" location="/metadata/testcorpus/testuser" operation="GET" type="ok">Found 1 matching\ entries</status>', '</letsmt-ws>' ] parser = xml_parser.XmlParser(xml_data) candidates = parser.getAlignCandidates() correct = {'en/html/ajokielto.xml': ['fi/html/ajokielto.xml']} for key in correct.keys(): for item in correct[key]: assert item in candidates[key]
def test_create_corpus(client): login(client, os.environ["TESTUSER"], os.environ["TESTPW"]) rh.delete("/storage/1233test_corpus5678", {"uid": os.environ["TESTUSER"]}) rv = client.get('/create_corpus') assert b'Create Corpus' in rv.data post_data = { "name": "", "group": "group", "domain": "domain", "origin": "origin", "description": "description", "pdf_reader": "pdf_reader", "document_alignment": "document_alignment", "sentence_alignment": "sentence_alignment", "sentence_splitter": "sentence_splitter", "autoalignment": "on" } rv = client.post('/create_corpus', data=post_data, follow_redirects=True) assert b'Name must be ASCII only and must not contain spaces' in rv.data post_data["name"] = "with space" rv = client.post('/create_corpus', data=post_data, follow_redirects=True) assert b'Name must be ASCII only and must not contain spaces' in rv.data post_data["name"] = "1233test_corpus5678" rv = client.post('/create_corpus', data=post_data, follow_redirects=True) assert b'Corpus "1233test_corpus5678" created!' in rv.data xml_metadata = rh.get( "/metadata/1233test_corpus5678/" + os.environ["TESTUSER"], {"uid": os.environ["TESTUSER"]}) parser = xml_parser.XmlParser(xml_metadata.split("\n")) metadata = parser.getMetadata() field_dict = opusrepository.initialize_field_dict() for key in post_data.keys(): if key != "name": assert post_data[key] == metadata[field_dict[key][0]] rv = client.post('/create_corpus', data=post_data, follow_redirects=True) assert b'Corpus "1233test_corpus5678" already exists!' in rv.data rh.delete("/storage/1233test_corpus5678", {"uid": os.environ["TESTUSER"]})
def test_navigateDirectory(): xml_data = [ '<letsmt-ws version="56">', '<list path="/testcorpus/testuser/uploads">', '<entry kind="file">', '<name>html.tar.gz</name>', '<commit revision="HEAD">', '<author>testuser</author>', '<date>unknown</date>', '</commit>', '<group>public</group>', '<owner>testuser</owner>', '<size>14210</size>', '</entry>', '<entry kind="dir">', '<name>html</name>', '<commit revision="HEAD">', '<author>testuser</author>', '<date>unknown</date>', '</commit>', '<group>public</group>', '<owner>testuser</owner>', '</entry>', '</list>', '<status code="0" location="/storage/testcorpus/testuser/uploads" operation="GET" type="ok"></status>', '</letsmt-ws>' ] parser = xml_parser.XmlParser(xml_data) dirs = parser.navigateDirectory() assert len(dirs) == 2 assert dirs[0][0] == "html.tar.gz" assert dirs[0][1] == "file" assert dirs[1][0] == "html" assert dirs[1][1] == "dir"
def __init__(self): # self.urls = url_manager.UrlManager() self.downloader = xml_downloader.XmlDownloader() self.parser = xml_parser.XmlParser() self.outputer = xml_outputer.XmlOutputer()
import hashlib import cv2 if __name__ == '__main__': # # xml_doc = minidom.parse(xml_full_path) # # # xmlDoc = minidom.loadXMLDoc("books.xml") # # # # x = xml_doc.getElementsByTagName("title")[0] # # parent = x.parentNode # # # # document.write("Parent node: " + parent.nodeName) # xml_dom_url = json_dict['dom_tree_url'] # shot_img_url = json_dict['screen_shot_url'] xml_parser = xp.XmlParser() xml_dom_url1 = '/root/workspace/devicepass-ai/devicepassai/devicepassai/ImageDiffer/1_1.xml' xml_dom_url2 = '/root/workspace/devicepass-ai/devicepassai/devicepassai/ImageDiffer/1_2.xml' shot_img_url = '/root/workspace/devicepass-ai/devicepassai/devicepassai/ImageDiffer/1_2.jpg' img_shot = cv2.imread(shot_img_url, cv2.IMREAD_COLOR) node_list1 = xml_parser.get_xml_data(xml_dom_url1) if node_list1 is None: print " Read xml file FAILURE!!!" leaf_node_list1 = [] for n in node_list1: children_node = n.getchildren() if len(children_node) == 0: leaf_node_list1.append(n)
def main_entry(j_dict): trace_logger() xml_dom_url = j_dict['dom_tree_url'] shot_img_url = j_dict['screen_shot_url'] logging.info( "--------Auto Login Verification Start----------------------------") logging.info("dom tree file:" + xml_dom_url) logging.info("screen shot file:" + shot_img_url) # ocr识别默认采用英文,分别用英文和中文字符集做2次识别 is_chinese_sim = False img_proc = image_processor.ImageProcessor(is_chinese_sim) # 读入全图和XML数据,XML数据返回全部node节点数据 screen_img_color, height, width = img_proc.read_screen_img(shot_img_url) if screen_img_color is None: return [" Read image file FAILURE!!!"] xml_parser = xp.XmlParser() node_list = xml_parser.get_xml_data(xml_dom_url) if node_list is None: return [" Read xml file FAILURE!!!"] ########################################################################################## edit_text_widgets = xml_parser.get_edit_text_widgets( node_list, USER_WIDGET_CLASS) '''读取xml控件类型:编辑框可能有其他类型''' btn_widgets = xml_parser.get_btn_widget(node_list, LOGON_WIDGET_CLASS) '''读取xml控件类型:登录框类型可能有其他类型''' for n in btn_widgets: logging.info("candidate logon widget1: " + str(n.get('bounds'))) estm_login_button = None if len(btn_widgets) == 0: '''如果没有指定类型的按钮.则以android.view.View进行ocr识别''' btn_widgets = xml_parser.get_btn_widget(node_list, LOGON_WIDGET_CLASS_SUPPLEMENT) estm_login_button = img_proc.ocr_detect(True, shot_img_url, LOGON_KEY, btn_widgets) flag = xml_parser.is_login_ui(edit_text_widgets, btn_widgets) if flag: logging.info("current ui is logon dialog !") else: logging.info("current ui is NOT logon dialog !!!") return None estm_user_edits = xml_parser.xml_estm_edit(edit_text_widgets, USER_TEXT_ATTR, USER_RC_ID_ATTR) '''根据xml读取属性信息不可靠''' estm_user_edit = None is_logon_btn = False if len(estm_user_edits) == 0: estm_user_edit = img_proc.ocr_detect(is_logon_btn, shot_img_url, USER_ACCOUNT_KEY, edit_text_widgets) elif len(estm_user_edits) > 1: estm_user_edit = img_proc.ocr_detect(is_logon_btn, shot_img_url, USER_ACCOUNT_KEY, estm_user_edits) elif len(estm_user_edits) == 1: estm_user_edit = estm_user_edits[0] # if estm_user_edit is None: # return None if estm_user_edit is not None: logging.info("FINAL detect user edit: " + str(estm_user_edit.get('bounds'))) num_edit_text_widgets = len(edit_text_widgets) estm_pwd_edit = None if num_edit_text_widgets >= 1: # if num_edit_text_widgets > 1: estm_pwd_edits = xml_parser.xml_estm_edit(edit_text_widgets, PWD_TEXT_ATTR, PWD_RC_ID_ATTR) if len(estm_pwd_edits) == 0: estm_pwd_edit = img_proc.ocr_detect(is_logon_btn, shot_img_url, PWD_KEY, edit_text_widgets) elif len(estm_pwd_edits) > 1: estm_pwd_edit = img_proc.ocr_detect(is_logon_btn, shot_img_url, PWD_KEY, estm_pwd_edits) elif len(estm_pwd_edits) == 1: estm_pwd_edit = estm_pwd_edits[0] # assert estm_pwd_edit is not None # if estm_pwd_edit is None: # return None if estm_pwd_edit is not None: logging.info("FINAL detect password edit: " + str(estm_pwd_edit.get('bounds'))) candid_login_buttons = xml_parser.xml_estm_button(estm_user_edit, estm_pwd_edit, btn_widgets, LOGON_KEY) '''xml读取属性信息不可靠''' # assert len(candid_login_buttons) >= 1 # if len(candid_login_buttons) == 0: # return None for n in candid_login_buttons: logging.info("candidate logon widget2: " + str(n.get('bounds'))) estm_login_buttons = None edit_widget = None if len(candid_login_buttons) == 1: estm_login_button = candid_login_buttons[0] elif len(candid_login_buttons) > 1 or len(candid_login_buttons) == 0: if estm_user_edit is not None and estm_pwd_edit is not None: edit_widget = estm_pwd_edit if estm_user_edit is not None and estm_pwd_edit is None: edit_widget = estm_user_edit if estm_user_edit is None and estm_pwd_edit is not None: edit_widget = estm_pwd_edit if estm_user_edit is None and estm_pwd_edit is None: return None estm_login_button = img_proc.ocr_detect_logon_button( True, edit_widget, shot_img_url, LOGON_KEY, candid_login_buttons) # assert estm_login_button is not None if estm_login_button is None: return None logging.info("FINAL detect logon button: " + str(estm_login_button.get('bounds'))) return verify_result(estm_user_edit, estm_pwd_edit, estm_login_button)
def main_entry2(json_dict): trace_logger() xml_dom_url = json_dict['dom_tree_url'] shot_img_url = json_dict['screen_shot_url'] logging.info( "--------Auto Login Verification Start----------------------------") logging.info("dom tree file:" + xml_dom_url) logging.info("screen shot file:" + shot_img_url) # ocr识别默认采用英文,分别用英文和中文字符集做2次识别 is_chinese_sim = False img_proc = image_processor.ImageProcessor(is_chinese_sim) # 读入全图和XML数据,XML数据返回全部node节点数据 screen_img_color, height, width = img_proc.read_screen_img(shot_img_url) if screen_img_color is None: return [" Read image file FAILURE!!!"] xml_parser = xp.XmlParser() node_list = xml_parser.get_xml_data(xml_dom_url) if node_list is None: return [" Read xml file FAILURE!!!"] ''' 判断是否是登录框的标准依据2条, 一个是关键字信息,另一个是通过ocr获取控件位置关联信息 比如帐号编辑框, 如返回结果有多个,需要结合关键字和控件位置信息进行筛选。 ocr的识别准确度是关键,登录按钮可能需要裁边后再做ocr 一次遍历获得所有的候选信息,再依据位置关系进行筛选 矩形识别判断登录按钮. 针对ui是webview的特例的情况,直接调用图形检测和ocr, 当前逻辑如果根节点ocr最后没有返回正确信息,则调用图形检测和ocr模块 ''' # 全图矩形检测,找出登录按钮, 再进行ocr检测 ''' ocr检测帐号和密码编辑框, 不用xml的控件类型等不可靠的属性信息 结果将控件属性值和ocr结果进行关联 关键字信息采用NLP语义分析 ''' leaf_node_list = [] for n in node_list: children_node = n.getchildren() if len(children_node) == 0: leaf_node_list.append(n) account_candidate = [] pwd_candidate = [] login_candidate = [] for n in leaf_node_list: coord = n.get('bounds') if coord is None: continue eng_ocr_detect_failure = False for i in range(0, 2): if eng_ocr_detect_failure: is_chinese_sim = True img_proc.set_language(is_chinese_sim) flag_account = img_proc.ocr_detect_match(False, screen_img_color, coord, USER_ACCOUNT_KEY) flag_pwd = img_proc.ocr_detect_match(False, screen_img_color, coord, PWD_KEY) flag_login = img_proc.ocr_detect_match(True, screen_img_color, coord, LOGON_KEY) if not flag_account and not flag_pwd and not flag_login: eng_ocr_detect_failure = True continue else: if flag_account: account_candidate.append(n) elif flag_pwd: pwd_candidate.append(n) elif flag_login: login_candidate.append(n) break print 'Account Candidates:' for n1 in account_candidate: print n1.get('bounds') print 'Password Candidates:' for n2 in pwd_candidate: print n2.get('bounds') print 'Login Candidates:' for n3 in login_candidate: print n3.get('bounds')
def test_parseLine(): parser = xml_parser.XmlParser([]) result = parser.parseLine('<div test="attribute">hello</div>') assert result == ('"div"', '"hello"', '"div"', {'test': 'attribute'})