def _test_get_category_urls(test_file): print("\n====================== category urls =========================\n") print("".join([ "Testing get_category_urls(), don't warry if you see some log on \n", "the fly, they are for the porpose of analyzing webpage, the fail \n", "of testing would be shown by [assert]\n" ])) if os.path.isfile(test_file) is False: print("".join([ "Error: can't find test_file: {}, please check filename or", " generate new test_file\n" ]).format(test_file)) return False f = open(test_file) obj = pkl.load(f) print_time(obj) ground_input = obj['ground_input'] ground_truth = obj['ground_truth'] for i, url in enumerate(ground_input): retry = 0 while retry < _RETRY_LIMIT: try: ret = bnext_parser.get_category_urls(url, back_counting_offset=3) ret = ret[-40:] if ret != ground_truth[i]: print('test failed: {}\n'.format(url)) return False break except ConnectionError: retry += 1 print('({}/{}) retrying...'.format(retry, _RETRY_LIMIT)) time.sleep(randint(10, 15)) sys.stdout.write('.') # time.sleep(1) return True print('\nSuccess')
def generate_get_category_testcase(ground_input): global _pseudo_request_response_dict generating_time = datetime.now() ground_truth = [] for url in ground_input: ret = bnext_parser.get_category_urls(url, back_counting_offset=3) ground_truth.append(ret[-40:]) # ======================================================================================== # Getting needed responses for "mocking requests.get()"" # ======================================================================================== res = requests.get(url) _pseudo_request_response_dict[url] = res prefix = 'http://www.bnext.com.tw' soup = BeautifulSoup(res.content) page_list = soup.find('ul', 'pagination') last_page = page_list.findAll('a')[-1]['href'] midfix = '?p=' last_page = int(last_page.split('=')[-1]) + 1 starting_page = last_page - 3 for page in range(starting_page, last_page): res = try_response(url + midfix + str(page)) _pseudo_request_response_dict[url + midfix + str(page)] = res # ======================================================================================== # Dumping testcases to file # ======================================================================================== obj = {'ground_input': ground_input, 'ground_truth': ground_truth, 'generating_time': generating_time} with open( './bnext/resources/testcase/get_category_urls_testcase.pkl', 'w') as f: pkl.dump(obj, f) return ground_truth