Esempio n. 1
0
def test_open_file():
    with open(os.path.join(TEST_DIR, 'empty.html')) as f:
        p = Parser(doc=f)

    assert_true(p.__doc__ is not None)
    assert_true(type(p) is not None)
    assert_true(type(p.to_dict()) is dict)
Esempio n. 2
0
def test_person_with_url():
    p = Parser(doc=open("test/examples/person_with_url.html"))
    result = p.to_dict()
    assert_equal(result["items"][0]["properties"]["name"],
                 ['Tom Morris'])
    assert_equal(result["items"][0]["properties"]["url"],
                 ['http://tommorris.org/'])
Esempio n. 3
0
def test_person_with_url():
    p = Parser(doc=open("test/examples/person_with_url.html"))
    result = p.to_dict()
    assert_equal(result["items"][0]["properties"]["name"],
                 ['Tom Morris'])
    assert_equal(result["items"][0]["properties"]["url"],
                 ['http://tommorris.org/'])
Esempio n. 4
0
def parse():
    if request.method == 'GET':
        u = request.args['url']
    else:
        u = request.form['url']
    print(u)
    p = Parser(url=unicode(u))
    return Response(p.to_json(pretty_print=True), status=200,
                    mimetype='application/json')
Esempio n. 5
0
def parse():
    if request.method == 'GET':
        u = request.args['url']
    else:
        u = request.form['url']
    print(u)
    p = Parser(url=unicode(u))
    return Response(p.to_json(pretty_print=True),
                    status=200,
                    mimetype='application/json')
Esempio n. 6
0
def test_doc_tag():
    # test that strings, BS doc and BS tags are all parsed
    doc = '''<article class="h-entry"></article>'''
    soup = BeautifulSoup(doc)
    parse_string = Parser(doc).to_dict()
    assert 'h-entry' in parse_string['items'][0]['type']
    parse_doc = Parser(soup).to_dict()
    assert 'h-entry' in parse_doc['items'][0]['type']
    parse_tag = Parser(soup.article).to_dict()
    assert 'h-entry' in parse_tag['items'][0]['type']
Esempio n. 7
0
def test_photo_with_alt():
    """Confirm that alt text in img is parsed with feature flag img_with_alt under as a u-* property and implied photo
    """

    path = 'experimental/img_with_alt.html'

    # without flag
    result = parse_fixture(path)

    # experimental img_with_alt=True
    with open(os.path.join(TEST_DIR, path)) as f:
        exp_result = Parser(doc=f, html_parser='html5lib', img_with_alt=True).to_dict()

    # simple img with u-*
    assert_equal('/photo.jpg', result['items'][0]['properties']['photo'][0])
    assert_equal('/photo.jpg', exp_result['items'][0]['properties']['photo'][0])

    assert_equal('/photo.jpg', result['items'][1]['properties']['url'][0])
    assert_equal('/photo.jpg', exp_result['items'][1]['properties']['url'][0]['value'])
    assert_equal('alt text', exp_result['items'][1]['properties']['url'][0]['alt'])

    assert_equal('/photo.jpg', result['items'][2]['properties']['in-reply-to'][0])
    assert_equal('/photo.jpg', exp_result['items'][2]['properties']['in-reply-to'][0]['value'])
    assert_equal('', exp_result['items'][2]['properties']['in-reply-to'][0]['alt'])

    # img with u-* and h-* example
    assert_true('h-cite' in result['items'][3]['properties']['in-reply-to'][0]['type'])
    assert_equal('/photo.jpg', result['items'][3]['properties']['in-reply-to'][0]['properties']['photo'][0])
    assert_equal('/photo.jpg', result['items'][3]['properties']['in-reply-to'][0]['value'])
    assert_false('alt' in result['items'][3]['properties']['in-reply-to'][0])

    assert_true('h-cite' in exp_result['items'][3]['properties']['in-reply-to'][0]['type'])
    assert_equal('/photo.jpg', exp_result['items'][3]['properties']['in-reply-to'][0]['properties']['photo'][0])
    assert_equal('/photo.jpg', exp_result['items'][3]['properties']['in-reply-to'][0]['value'])
    assert_false('alt' in exp_result['items'][3]['properties']['in-reply-to'][0])

    assert_true('h-cite' in result['items'][4]['properties']['in-reply-to'][0]['type'])
    assert_equal('/photo.jpg', result['items'][4]['properties']['in-reply-to'][0]['properties']['photo'][0])
    assert_equal('/photo.jpg', result['items'][4]['properties']['in-reply-to'][0]['value'])
    assert_false('alt' in result['items'][4]['properties']['in-reply-to'][0])

    assert_true('h-cite' in exp_result['items'][4]['properties']['in-reply-to'][0]['type'])
    assert_equal('/photo.jpg', exp_result['items'][4]['properties']['in-reply-to'][0]['properties']['photo'][0]['value'])
    assert_equal('/photo.jpg', exp_result['items'][4]['properties']['in-reply-to'][0]['value'])
    assert_equal('alt text', exp_result['items'][4]['properties']['in-reply-to'][0]['properties']['photo'][0]['alt'])
    assert_equal('alt text', exp_result['items'][4]['properties']['in-reply-to'][0]['alt'])

    assert_true('h-cite' in result['items'][5]['properties']['in-reply-to'][0]['type'])
    assert_equal('/photo.jpg', result['items'][5]['properties']['in-reply-to'][0]['properties']['photo'][0])
    assert_equal('/photo.jpg', result['items'][5]['properties']['in-reply-to'][0]['value'])
    assert_false('alt' in result['items'][5]['properties']['in-reply-to'][0])

    assert_true('h-cite' in exp_result['items'][5]['properties']['in-reply-to'][0]['type'])
    assert_equal('/photo.jpg', exp_result['items'][5]['properties']['in-reply-to'][0]['properties']['photo'][0]['value'])
    assert_equal('/photo.jpg', exp_result['items'][5]['properties']['in-reply-to'][0]['value'])
    assert_equal('', exp_result['items'][5]['properties']['in-reply-to'][0]['properties']['photo'][0]['alt'])
    assert_equal('', exp_result['items'][5]['properties']['in-reply-to'][0]['alt'])
Esempio n. 8
0
def test_input_tree_integrity():
    """ make sure that if we parse a BS4 soup, our modifications do not leak into the document represented by it """

    for path in get_all_files():
        with open(os.path.join(TEST_DIR, path)) as f:
            soup = BeautifulSoup(f,features='lxml')
            html1 = soup.prettify()
            p = Parser(doc=soup, html_parser='lxml')
            html2 = soup.prettify()
        yield make_labelled_cmp("tree_integrity_" + path), html1, html2
Esempio n. 9
0
def test_user_agent(getter):
    ua_expect = 'mf2py - microformats2 parser for python'
    assert_true(Parser.useragent.startswith(ua_expect))

    resp = mock.MagicMock()
    resp.content = b''
    resp.text = ''
    resp.headers = {}
    getter.return_value = resp

    Parser(url='http://example.com')
    getter.assert_called_with('http://example.com',
                              headers={'User-Agent': Parser.useragent})

    Parser.useragent = 'something else'
    assert_equal(Parser.useragent, 'something else')
    # set back to default. damn stateful classes
    Parser.useragent = 'mf2py - microformats2 parser for python'
Esempio n. 10
0
def test_doc_tag_backcompat():
    # test that strings, BS doc and BS tags are all parsed and in the latter cases copies are made and are modified by backcompat
    doc = '''<article class="hentry"></article>'''
    soup = BeautifulSoup(doc)

    parse_string = Parser(doc).to_dict()
    assert_true('h-entry' in parse_string['items'][0]['type'])

    p = Parser(soup)
    assert_true('h-entry' in p.to_dict()['items'][0]['type'])
    assert_false(soup is p.__doc__)
    assert_false(soup == p.__doc__)

    p = Parser(soup.article)
    assert_true('h-entry' in p.to_dict()['items'][0]['type'])
    assert_false(soup.article is p.__doc__)
    assert_false(soup.article == p.__doc__)
Esempio n. 11
0
def test_implied_name_empty_alt():
    """An empty alt text should not prevent us from including other
    children in the implied name.
    """
    p = Parser(doc="""
<a class="h-card" href="https://twitter.com/kylewmahan">
  <img src="https://example.org/test.jpg" alt="">
  @kylewmahan
</a>""").to_dict()

    hcard = p['items'][0]

    assert_equal({
        'type': ['h-card'],
        'properties': {
            'name': ['@kylewmahan'],
            'url': ['https://twitter.com/kylewmahan'],
            'photo': ['https://example.org/test.jpg'],
        },
    }, hcard)
Esempio n. 12
0
def test_complex_e_content():
    """When parsing h-* e-* properties, we should fold {"value":..., "html":...}
    into the parsed microformat object, instead of nesting it under an
    unnecessary second layer of "value":
    """
    result = Parser(doc="""<!DOCTYPE html><div class="h-entry">
<div class="h-card e-content"><p>Hello</p></div></div>""").to_dict()

    assert_equal({
        "type": ["h-entry"],
        "properties": {
            "content": [{
                "type": [
                    "h-card"
                ],
                "properties": {
                    "name": ["Hello"]
                },
                "html": "<p>Hello</p>",
                "value": "Hello"
            }],
            "name": ["Hello"]
        }
    }, result["items"][0])
Esempio n. 13
0
def parse_fixture(path, url=None):
    with open(os.path.join("test/examples/", path)) as f:
        p = Parser(doc=f, url=url, html_parser='html5lib')
        return p.to_dict()
Esempio n. 14
0
def test_base():
    with open(os.path.join(TEST_DIR, 'base.html')) as f:
        p = Parser(doc=f)

    assert_equal(p.__url__, "http://tantek.com/")
Esempio n. 15
0
def test_base():
    p = Parser(doc=open("test/examples/base.html"))
    assert_equal(p.__url__, "http://tantek.com/")
Esempio n. 16
0
def test_empty():
    p = Parser()
    assert_true(type(p) is not None)
    assert_true(type(p.to_dict()) is dict)
Esempio n. 17
0
def parse_fixture(path, url=None):
    with open(os.path.join(TEST_DIR, path)) as f:
        p = Parser(doc=f, url=url, html_parser='html5lib')
        return p.to_dict()
Esempio n. 18
0
def test_simple_person_reference_implied():
    p = Parser(doc=open("test/examples/simple_person_reference_implied.html"))
    result = p.to_dict()
    assert_equal(result["items"][0]["properties"],
                 {'name': ['Frances Berriman']})
Esempio n. 19
0
# coding: utf-8

from mf2py import Parser
import os.path
import glob
import json

allfiles = glob.glob(os.path.join('.', 'tests', 'tests', '**', '**', '*.json'))
for jsonfile in allfiles:
    htmlfile = jsonfile[:-4] + 'html'
    with open(htmlfile) as f:
        p = json.loads(Parser(doc=f).to_json(pretty_print=True))
Esempio n. 20
0
def test_open_file():
    p = Parser(doc=open("test/examples/empty.html"))
    assert_true(p.__doc__ is not None)
    assert_true(type(p) is not None)
    assert_true(type(p.to_dict()) is dict)
Esempio n. 21
0
def test_simple_person_reference_implied():
    p = Parser(doc=open("test/examples/simple_person_reference_implied.html"))
    result = p.to_dict()
    assert_equal(result["items"][0]["properties"],
                 {'name': ['Frances Berriman']})
Esempio n. 22
0
def test_open_file():
    p = Parser(doc=open("test/examples/empty.html"))
    assert_true(p.__doc__ is not None)
    assert_true(type(p) is not None)
    assert_true(type(p.to_dict()) is dict)
Esempio n. 23
0
def parse_fixture(path, url=None):
    with open(os.path.join("test/examples/", path)) as f:
        p = Parser(doc=f, url=url, html_parser='html5lib')
        return p.to_dict()