Python parser_page Exemples, bnext_parser.parser_page Python Exemples

Exemple #1

0

Afficher le fichier

def generate_parser_page_testcase(ground_input):
    generating_time = datetime.now()
    ground_truth = []
    global _pseudo_request_response_dict

    for i, url in enumerate(ground_input):

        # ========================================================================================
        # First, getting testcase ground truth
        # ========================================================================================
        print('({}/{}) {}'.format(i + 1, len(ground_input), url))
        retry = 0
        while retry < _RETRY_LIMIT:
            try:
                ret = bnext_parser.parser_page(url)
                ground_truth.append(ret)
                break
            except ConnectionError:
                retry += 1
                print('({}/{}) retrying...'.format(retry, _RETRY_LIMIT))
                time.sleep(randint(10, 15))
        assert (retry < _RETRY_LIMIT), "maximum retry limit reached"
        _pseudo_request_response_dict[url] = try_response(url)
# ========================================================================================
# Next, getting needed responses for "mocking requests.get()""
# ========================================================================================

        utility_string1 = 'https://graph.facebook.com/fql?q=SELECT%20like_count,%20total_count,%20share_count,%20click_count,%20commentsbox_count%20FROM%20link_stat%20WHERE%20url%20=%20%22{}%22'

        # + '&limit=3' # limit is just for testing
        utility_string2 = 'https://graph.facebook.com/comments?id={}&filter=stream&fields=parent.fields(id),message,from,created_time,like_count{}'
        res = try_response(url)
        _pseudo_request_response_dict['url'] = res
        res_fb1 = try_response(utility_string1.format(url))
        _pseudo_request_response_dict[utility_string1.format(url)] = res_fb1

        suffix = ''
        while True:
            res_fb2 = try_response(utility_string2.format(url, suffix))
            _pseudo_request_response_dict[
                utility_string2.format(url, suffix)] = res_fb2
            data = res_fb2.json()['data']
            if len(data) == 0:
                break
            paging = res_fb2.json()['paging']
            if 'next' not in paging.keys():
                break
            else:
                suffix = paging['next']
                suffix = suffix[suffix.find('&after='):]
# ========================================================================================
# Dumping testcases to file
# ========================================================================================

    with open('./bnext/resources/testcase/parser_page_testcase.pkl', 'w') as f:
        obj = {'ground_input': ground_input, 'ground_truth': ground_truth,
               'generating_time': generating_time}

        pkl.dump(obj, f)

Exemple #2

0

Afficher le fichier

Fichier : tests.py Projet : livingbio/news_parser

def _test_parser_page(test_file):

    print("\n======================= parser page ==========================\n")
    print("".join([
        "Testing parser_page(), don't warry if you see some log on the fly,\n",
        "they are for the porpose of analyzing webpage, the fail of testing\n",
        "would be shown by [assert]\n"
        ])
    )

    if os.path.isfile(test_file) is False:
        print("".join([
            "Error: can't find test_file: {}, please check filename or",
            "generate new test_file\n"
            ]).format(test_file)
        )

        return False

    f = open(test_file)
    obj = pkl.load(f)
    print_time(obj)

    ground_input = obj['ground_input']
    ground_truth = obj['ground_truth']

    for i, url in enumerate(ground_input):
        print('({}/{}) {}'.format(i + 1, len(ground_input), url))

        retry = 0
        while retry < _RETRY_LIMIT:
            try:
                ret = bnext_parser.parser_page(url)
                break
            except ConnectionError:
                retry += 1
                print('({}/{}) retrying...'.format(retry, _RETRY_LIMIT))
                time.sleep(randint(10, 15))

        if ret != ground_truth[i]:
            print('test failed: {}\n'.format(url))
            return

        if ret != ground_truth[i]:
            print('test failed: {}\n'.format(url))
            return False
        # time.sleep(randint(1, 3))

    print('\nSuccess')
    return True

Exemple #3

0

Afficher le fichier

def _test_parser_page(test_file):

    print("\n======================= parser page ==========================\n")
    print("".join([
        "Testing parser_page(), don't warry if you see some log on the fly,\n",
        "they are for the porpose of analyzing webpage, the fail of testing\n",
        "would be shown by [assert]\n"
    ]))

    if os.path.isfile(test_file) is False:
        print("".join([
            "Error: can't find test_file: {}, please check filename or",
            "generate new test_file\n"
        ]).format(test_file))

        return False

    f = open(test_file)
    obj = pkl.load(f)
    print_time(obj)

    ground_input = obj['ground_input']
    ground_truth = obj['ground_truth']

    for i, url in enumerate(ground_input):
        print('({}/{}) {}'.format(i + 1, len(ground_input), url))

        retry = 0
        while retry < _RETRY_LIMIT:
            try:
                ret = bnext_parser.parser_page(url)
                break
            except ConnectionError:
                retry += 1
                print('({}/{}) retrying...'.format(retry, _RETRY_LIMIT))
                time.sleep(randint(10, 15))

        if ret != ground_truth[i]:
            print('test failed: {}\n'.format(url))
            return

        if ret != ground_truth[i]:
            print('test failed: {}\n'.format(url))
            return False
        # time.sleep(randint(1, 3))

    print('\nSuccess')
    return True