def generate_parser_page_testcase(ground_input): generating_time = datetime.now() ground_truth = [] global _pseudo_request_response_dict for i, url in enumerate(ground_input): # ======================================================================================== # First, getting testcase ground truth # ======================================================================================== print('({}/{}) {}'.format(i + 1, len(ground_input), url)) retry = 0 while retry < _RETRY_LIMIT: try: ret = bnext_parser.parser_page(url) ground_truth.append(ret) break except ConnectionError: retry += 1 print('({}/{}) retrying...'.format(retry, _RETRY_LIMIT)) time.sleep(randint(10, 15)) assert (retry < _RETRY_LIMIT), "maximum retry limit reached" _pseudo_request_response_dict[url] = try_response(url) # ======================================================================================== # Next, getting needed responses for "mocking requests.get()"" # ======================================================================================== utility_string1 = 'https://graph.facebook.com/fql?q=SELECT%20like_count,%20total_count,%20share_count,%20click_count,%20commentsbox_count%20FROM%20link_stat%20WHERE%20url%20=%20%22{}%22' # + '&limit=3' # limit is just for testing utility_string2 = 'https://graph.facebook.com/comments?id={}&filter=stream&fields=parent.fields(id),message,from,created_time,like_count{}' res = try_response(url) _pseudo_request_response_dict['url'] = res res_fb1 = try_response(utility_string1.format(url)) _pseudo_request_response_dict[utility_string1.format(url)] = res_fb1 suffix = '' while True: res_fb2 = try_response(utility_string2.format(url, suffix)) _pseudo_request_response_dict[ utility_string2.format(url, suffix)] = res_fb2 data = res_fb2.json()['data'] if len(data) == 0: break paging = res_fb2.json()['paging'] if 'next' not in paging.keys(): break else: suffix = paging['next'] suffix = suffix[suffix.find('&after='):] # ======================================================================================== # Dumping testcases to file # ======================================================================================== with open('./bnext/resources/testcase/parser_page_testcase.pkl', 'w') as f: obj = {'ground_input': ground_input, 'ground_truth': ground_truth, 'generating_time': generating_time} pkl.dump(obj, f)
def _test_parser_page(test_file): print("\n======================= parser page ==========================\n") print("".join([ "Testing parser_page(), don't warry if you see some log on the fly,\n", "they are for the porpose of analyzing webpage, the fail of testing\n", "would be shown by [assert]\n" ]) ) if os.path.isfile(test_file) is False: print("".join([ "Error: can't find test_file: {}, please check filename or", "generate new test_file\n" ]).format(test_file) ) return False f = open(test_file) obj = pkl.load(f) print_time(obj) ground_input = obj['ground_input'] ground_truth = obj['ground_truth'] for i, url in enumerate(ground_input): print('({}/{}) {}'.format(i + 1, len(ground_input), url)) retry = 0 while retry < _RETRY_LIMIT: try: ret = bnext_parser.parser_page(url) break except ConnectionError: retry += 1 print('({}/{}) retrying...'.format(retry, _RETRY_LIMIT)) time.sleep(randint(10, 15)) if ret != ground_truth[i]: print('test failed: {}\n'.format(url)) return if ret != ground_truth[i]: print('test failed: {}\n'.format(url)) return False # time.sleep(randint(1, 3)) print('\nSuccess') return True
def _test_parser_page(test_file): print("\n======================= parser page ==========================\n") print("".join([ "Testing parser_page(), don't warry if you see some log on the fly,\n", "they are for the porpose of analyzing webpage, the fail of testing\n", "would be shown by [assert]\n" ])) if os.path.isfile(test_file) is False: print("".join([ "Error: can't find test_file: {}, please check filename or", "generate new test_file\n" ]).format(test_file)) return False f = open(test_file) obj = pkl.load(f) print_time(obj) ground_input = obj['ground_input'] ground_truth = obj['ground_truth'] for i, url in enumerate(ground_input): print('({}/{}) {}'.format(i + 1, len(ground_input), url)) retry = 0 while retry < _RETRY_LIMIT: try: ret = bnext_parser.parser_page(url) break except ConnectionError: retry += 1 print('({}/{}) retrying...'.format(retry, _RETRY_LIMIT)) time.sleep(randint(10, 15)) if ret != ground_truth[i]: print('test failed: {}\n'.format(url)) return if ret != ground_truth[i]: print('test failed: {}\n'.format(url)) return False # time.sleep(randint(1, 3)) print('\nSuccess') return True