def test_open_file(): with open(os.path.join(TEST_DIR, 'empty.html')) as f: p = Parser(doc=f) assert_true(p.__doc__ is not None) assert_true(type(p) is not None) assert_true(type(p.to_dict()) is dict)
def test_person_with_url(): p = Parser(doc=open("test/examples/person_with_url.html")) result = p.to_dict() assert_equal(result["items"][0]["properties"]["name"], ['Tom Morris']) assert_equal(result["items"][0]["properties"]["url"], ['http://tommorris.org/'])
def parse(): if request.method == 'GET': u = request.args['url'] else: u = request.form['url'] print(u) p = Parser(url=unicode(u)) return Response(p.to_json(pretty_print=True), status=200, mimetype='application/json')
def test_doc_tag(): # test that strings, BS doc and BS tags are all parsed doc = '''<article class="h-entry"></article>''' soup = BeautifulSoup(doc) parse_string = Parser(doc).to_dict() assert 'h-entry' in parse_string['items'][0]['type'] parse_doc = Parser(soup).to_dict() assert 'h-entry' in parse_doc['items'][0]['type'] parse_tag = Parser(soup.article).to_dict() assert 'h-entry' in parse_tag['items'][0]['type']
def test_photo_with_alt(): """Confirm that alt text in img is parsed with feature flag img_with_alt under as a u-* property and implied photo """ path = 'experimental/img_with_alt.html' # without flag result = parse_fixture(path) # experimental img_with_alt=True with open(os.path.join(TEST_DIR, path)) as f: exp_result = Parser(doc=f, html_parser='html5lib', img_with_alt=True).to_dict() # simple img with u-* assert_equal('/photo.jpg', result['items'][0]['properties']['photo'][0]) assert_equal('/photo.jpg', exp_result['items'][0]['properties']['photo'][0]) assert_equal('/photo.jpg', result['items'][1]['properties']['url'][0]) assert_equal('/photo.jpg', exp_result['items'][1]['properties']['url'][0]['value']) assert_equal('alt text', exp_result['items'][1]['properties']['url'][0]['alt']) assert_equal('/photo.jpg', result['items'][2]['properties']['in-reply-to'][0]) assert_equal('/photo.jpg', exp_result['items'][2]['properties']['in-reply-to'][0]['value']) assert_equal('', exp_result['items'][2]['properties']['in-reply-to'][0]['alt']) # img with u-* and h-* example assert_true('h-cite' in result['items'][3]['properties']['in-reply-to'][0]['type']) assert_equal('/photo.jpg', result['items'][3]['properties']['in-reply-to'][0]['properties']['photo'][0]) assert_equal('/photo.jpg', result['items'][3]['properties']['in-reply-to'][0]['value']) assert_false('alt' in result['items'][3]['properties']['in-reply-to'][0]) assert_true('h-cite' in exp_result['items'][3]['properties']['in-reply-to'][0]['type']) assert_equal('/photo.jpg', exp_result['items'][3]['properties']['in-reply-to'][0]['properties']['photo'][0]) assert_equal('/photo.jpg', exp_result['items'][3]['properties']['in-reply-to'][0]['value']) assert_false('alt' in exp_result['items'][3]['properties']['in-reply-to'][0]) assert_true('h-cite' in result['items'][4]['properties']['in-reply-to'][0]['type']) assert_equal('/photo.jpg', result['items'][4]['properties']['in-reply-to'][0]['properties']['photo'][0]) assert_equal('/photo.jpg', result['items'][4]['properties']['in-reply-to'][0]['value']) assert_false('alt' in result['items'][4]['properties']['in-reply-to'][0]) assert_true('h-cite' in exp_result['items'][4]['properties']['in-reply-to'][0]['type']) assert_equal('/photo.jpg', exp_result['items'][4]['properties']['in-reply-to'][0]['properties']['photo'][0]['value']) assert_equal('/photo.jpg', exp_result['items'][4]['properties']['in-reply-to'][0]['value']) assert_equal('alt text', exp_result['items'][4]['properties']['in-reply-to'][0]['properties']['photo'][0]['alt']) assert_equal('alt text', exp_result['items'][4]['properties']['in-reply-to'][0]['alt']) assert_true('h-cite' in result['items'][5]['properties']['in-reply-to'][0]['type']) assert_equal('/photo.jpg', result['items'][5]['properties']['in-reply-to'][0]['properties']['photo'][0]) assert_equal('/photo.jpg', result['items'][5]['properties']['in-reply-to'][0]['value']) assert_false('alt' in result['items'][5]['properties']['in-reply-to'][0]) assert_true('h-cite' in exp_result['items'][5]['properties']['in-reply-to'][0]['type']) assert_equal('/photo.jpg', exp_result['items'][5]['properties']['in-reply-to'][0]['properties']['photo'][0]['value']) assert_equal('/photo.jpg', exp_result['items'][5]['properties']['in-reply-to'][0]['value']) assert_equal('', exp_result['items'][5]['properties']['in-reply-to'][0]['properties']['photo'][0]['alt']) assert_equal('', exp_result['items'][5]['properties']['in-reply-to'][0]['alt'])
def test_input_tree_integrity(): """ make sure that if we parse a BS4 soup, our modifications do not leak into the document represented by it """ for path in get_all_files(): with open(os.path.join(TEST_DIR, path)) as f: soup = BeautifulSoup(f,features='lxml') html1 = soup.prettify() p = Parser(doc=soup, html_parser='lxml') html2 = soup.prettify() yield make_labelled_cmp("tree_integrity_" + path), html1, html2
def test_user_agent(getter): ua_expect = 'mf2py - microformats2 parser for python' assert_true(Parser.useragent.startswith(ua_expect)) resp = mock.MagicMock() resp.content = b'' resp.text = '' resp.headers = {} getter.return_value = resp Parser(url='http://example.com') getter.assert_called_with('http://example.com', headers={'User-Agent': Parser.useragent}) Parser.useragent = 'something else' assert_equal(Parser.useragent, 'something else') # set back to default. damn stateful classes Parser.useragent = 'mf2py - microformats2 parser for python'
def test_doc_tag_backcompat(): # test that strings, BS doc and BS tags are all parsed and in the latter cases copies are made and are modified by backcompat doc = '''<article class="hentry"></article>''' soup = BeautifulSoup(doc) parse_string = Parser(doc).to_dict() assert_true('h-entry' in parse_string['items'][0]['type']) p = Parser(soup) assert_true('h-entry' in p.to_dict()['items'][0]['type']) assert_false(soup is p.__doc__) assert_false(soup == p.__doc__) p = Parser(soup.article) assert_true('h-entry' in p.to_dict()['items'][0]['type']) assert_false(soup.article is p.__doc__) assert_false(soup.article == p.__doc__)
def test_implied_name_empty_alt(): """An empty alt text should not prevent us from including other children in the implied name. """ p = Parser(doc=""" <a class="h-card" href="https://twitter.com/kylewmahan"> <img src="https://example.org/test.jpg" alt=""> @kylewmahan </a>""").to_dict() hcard = p['items'][0] assert_equal({ 'type': ['h-card'], 'properties': { 'name': ['@kylewmahan'], 'url': ['https://twitter.com/kylewmahan'], 'photo': ['https://example.org/test.jpg'], }, }, hcard)
def test_complex_e_content(): """When parsing h-* e-* properties, we should fold {"value":..., "html":...} into the parsed microformat object, instead of nesting it under an unnecessary second layer of "value": """ result = Parser(doc="""<!DOCTYPE html><div class="h-entry"> <div class="h-card e-content"><p>Hello</p></div></div>""").to_dict() assert_equal({ "type": ["h-entry"], "properties": { "content": [{ "type": [ "h-card" ], "properties": { "name": ["Hello"] }, "html": "<p>Hello</p>", "value": "Hello" }], "name": ["Hello"] } }, result["items"][0])
def parse_fixture(path, url=None): with open(os.path.join("test/examples/", path)) as f: p = Parser(doc=f, url=url, html_parser='html5lib') return p.to_dict()
def test_base(): with open(os.path.join(TEST_DIR, 'base.html')) as f: p = Parser(doc=f) assert_equal(p.__url__, "http://tantek.com/")
def test_base(): p = Parser(doc=open("test/examples/base.html")) assert_equal(p.__url__, "http://tantek.com/")
def test_empty(): p = Parser() assert_true(type(p) is not None) assert_true(type(p.to_dict()) is dict)
def parse_fixture(path, url=None): with open(os.path.join(TEST_DIR, path)) as f: p = Parser(doc=f, url=url, html_parser='html5lib') return p.to_dict()
def test_simple_person_reference_implied(): p = Parser(doc=open("test/examples/simple_person_reference_implied.html")) result = p.to_dict() assert_equal(result["items"][0]["properties"], {'name': ['Frances Berriman']})
# coding: utf-8 from mf2py import Parser import os.path import glob import json allfiles = glob.glob(os.path.join('.', 'tests', 'tests', '**', '**', '*.json')) for jsonfile in allfiles: htmlfile = jsonfile[:-4] + 'html' with open(htmlfile) as f: p = json.loads(Parser(doc=f).to_json(pretty_print=True))
def test_open_file(): p = Parser(doc=open("test/examples/empty.html")) assert_true(p.__doc__ is not None) assert_true(type(p) is not None) assert_true(type(p.to_dict()) is dict)