class Data(object): def __init__(self, folder_path): self.folder_path = folder_path self.page_parse = PageParse() self.content = None def define_content(self, data_format): self.content = data_format def parsePage(self, pages): self.page_parse.add_page(pages) content = self.page_parse.parse() return content def parsePage(self, page): reg = 'src="(.+?\.jpg)"' imgre = re.compile(reg) img_list = re.findall(imgre, page) reg = 'href="(.+?\.html)"' htmlre = re.compile(reg) html_list = re.findall(htmlre, page) return img_list, html_list def download(self, file_list): print("Contains images: {} ".format(len(file_list))) for i in range(len(file_list)): image_url = file_list[i] image_name = image_url.split('/')[-1] request.urlretrieve(image_url, self.folder_path+image_name)
def setup_class(cls): page_xml = '' with open('testdata/pageSource/pageSource.xml', 'r', encoding='utf-8') as f: for line in f.readlines(): page_xml += line page = PageParse(page_xml) cls.root = page.current_page_root
def get_page_info(self): ''' Returns: page: PageParse class instance. current_activity: string ''' current_page_source = self.get_page_source() if not current_page_source: self.driver.click_device_back() # test: if after click back, still timeout, relaunch app. try: self.__get_page_source() except FunctionTimedOut: self.driver.launch_app() log.warning('Get page source timeout, relaunch app.') except WebDriverException: log.warning('refresh appium driver') self.init_appium() finally: return self.get_page_info() else: current_activity = self.driver.get_current_activity() page = PageParse(current_page_source, current_activity) return page
def test_page_parse_equal_str_error(self, path): with pytest.raises(expected_exception=XMLSyntaxError): page_xml_1 = self.get_page_xml(path) page_xml_2 = "test" page1 = PageParse(page_xml_1) assert page1 != page_xml_2
def test_page_parse_not_equal_str(self, path): page_xml_1 = self.get_page_xml(path[0]) page_xml_2 = self.get_page_xml(path[1]) page1 = PageParse(page_xml_1) assert page1 != page_xml_2
def test_page_parse_equal_str(self, path): page_xml = self.get_page_xml(path) page1 = PageParse(page_xml) assert page1 == page_xml
def test_page_parse_init(self): page_xml = self.get_page_xml('testdata/pageSource/pageSource.xml') page = PageParse(page_xml) print(page)
def __init__(self, folder_path): self.folder_path = folder_path self.page_parse = PageParse() self.content = None