예제 #1
0
def _test_get_category_urls(test_file):

    print("\n====================== category urls =========================\n")
    print("".join([
        "Testing get_category_urls(), don't warry if you see some log on \n",
        "the fly, they are for the porpose of analyzing webpage, the fail \n",
        "of testing would be shown by [assert]\n"
    ]))

    if os.path.isfile(test_file) is False:
        print("".join([
            "Error: can't find test_file: {}, please check filename or",
            " generate new test_file\n"
        ]).format(test_file))
        return False

    f = open(test_file)
    obj = pkl.load(f)
    print_time(obj)

    ground_input = obj['ground_input']
    ground_truth = obj['ground_truth']

    for i, url in enumerate(ground_input):
        retry = 0
        while retry < _RETRY_LIMIT:
            try:
                ret = bnext_parser.get_category_urls(url,
                                                     back_counting_offset=3)
                ret = ret[-40:]
                if ret != ground_truth[i]:
                    print('test failed: {}\n'.format(url))
                    return False
                break
            except ConnectionError:
                retry += 1
                print('({}/{}) retrying...'.format(retry, _RETRY_LIMIT))
                time.sleep(randint(10, 15))

        sys.stdout.write('.')
        # time.sleep(1)

    return True
    print('\nSuccess')
예제 #2
0
def generate_get_category_testcase(ground_input):
    global _pseudo_request_response_dict

    generating_time = datetime.now()
    ground_truth = []

    for url in ground_input:
        ret = bnext_parser.get_category_urls(url, back_counting_offset=3)
        ground_truth.append(ret[-40:])

# ========================================================================================
# Getting needed responses for "mocking requests.get()""
# ========================================================================================
        res = requests.get(url)
        _pseudo_request_response_dict[url] = res
        prefix = 'http://www.bnext.com.tw'
        soup = BeautifulSoup(res.content)
        page_list = soup.find('ul', 'pagination')
        last_page = page_list.findAll('a')[-1]['href']
        midfix = '?p='
        last_page = int(last_page.split('=')[-1]) + 1
        starting_page = last_page - 3

        for page in range(starting_page, last_page):
            res = try_response(url + midfix + str(page))
            _pseudo_request_response_dict[url + midfix + str(page)] = res

# ========================================================================================
# Dumping testcases to file
# ========================================================================================

    obj = {'ground_input': ground_input, 'ground_truth': ground_truth,
           'generating_time': generating_time}

    with open(
      './bnext/resources/testcase/get_category_urls_testcase.pkl',
      'w') as f:

        pkl.dump(obj, f)

    return ground_truth