Example #1
0
def test_content_fields():

    content = Content(BASE_URL, CONTENT)
    assert content.fields(PROFILE) == {
        'title': ['Test content title'],
        'header': ['Test content header'],
    }
Example #2
0
def test_content_select_xpath():

    content = Content(BASE_URL, CONTENT)
    assert content.select('//title/text()', method='xpath').extract() == ['Test content title']
    assert content.select('//a/@href', method='xpath').extract() == ['link1', 'link2', 'link3', 'link4', 'link5', 'link6']
    assert content.select('//p/a/@href', method='xpath').extract() == ['link4', 'link5', 'link6']
    assert content.select('//p/a', method='xpath').extract() == ['<a href="link4">Link4</a>', '<a href="link5">Link5</a>', '<a href="link6">Link6</a>']
Example #3
0
def test_content_fields():

    content = Content(BASE_URL, CONTENT)
    assert content.fields(PROFILE) == {
        'title': ['Test content title'],
        'header': ['Test content header'],
    }
Example #4
0
def test_content_remove_xpath():

    content = Content(BASE_URL, CONTENT)
    content = content.remove('head', method='xpath')
    assert isinstance(content, Content)

    content = content.remove('p', method='xpath')
    content = content.remove('body', method='xpath')
    assert content.extract().replace('\n', '') == u'<html></html>'
Example #5
0
def test_content_select_css():

    content = Content(BASE_URL, CONTENT)
    assert content.select('title', method='css').extract() == [
        '<title>Test content title</title>'
    ]
    assert content.select('html title', method='css').extract() == [
        '<title>Test content title</title>'
    ]
Example #6
0
def test_content_create():

    content = Content(BASE_URL, "<p>Test content</p>")
    assert isinstance(content, Content)

    with pytest.raises(RuntimeError) as err:
        content = Content(None, "<p>Test content</p>")

    with pytest.raises(RuntimeError) as err:
        content = Content([
            BASE_URL,
        ], None)

    with pytest.raises(RuntimeError) as err:
        content = Content(BASE_URL, None)

    with pytest.raises(RuntimeError) as err:
        content = Content(BASE_URL, [
            CONTENT,
        ])
Example #7
0
def test_content_remove_css():

    content = Content(BASE_URL, CONTENT)
    content = content.remove('head', method='css')
    assert isinstance(content, Content)

    content = content.remove('p', method='css')
    content = content.remove('body', method='css')
    assert content.extract().replace('\n', '') == u'<html></html>'
Example #8
0
def test_content_select_xpath():

    content = Content(BASE_URL, CONTENT)
    assert content.select('//title/text()',
                          method='xpath').extract() == ['Test content title']
    assert content.select('//a/@href', method='xpath').extract() == [
        'link1', 'link2', 'link3', 'link4', 'link5', 'link6'
    ]
    assert content.select('//p/a/@href', method='xpath').extract() == [
        'link4', 'link5', 'link6'
    ]
    assert content.select('//p/a', method='xpath').extract() == [
        '<a href="link4">Link4</a>', '<a href="link5">Link5</a>',
        '<a href="link6">Link6</a>'
    ]
Example #9
0
def test_content_select_css():

    content = Content(BASE_URL, CONTENT)
    assert content.select('title', method='css').extract() == ['<title>Test content title</title>']
    assert content.select('html title', method='css').extract() == ['<title>Test content title</title>']
Example #10
0
def test_content_make_links_absolute():

    content = Content(BASE_URL, '<a href="link">Test link</a>')
    content.make_links_absolute()
    assert content.extract() == u'<a href="%s/link">Test link</a>' % BASE_URL
Example #11
0
def test_content_transform():

    content = Content(BASE_URL, CONTENT)
    assert content.transform(PROFILE).extract().replace('\n', '') == '<div><p>Test content header</p></div>'
Example #12
0
def test_content_make_links_absolute():

    content = Content(BASE_URL, '<a href="link">Test link</a>')
    content.make_links_absolute()
    assert content.extract() == u'<a href="%s/link">Test link</a>' % BASE_URL
Example #13
0
def test_content_transform():

    content = Content(BASE_URL, CONTENT)
    assert content.transform(PROFILE).extract().replace(
        '\n', '') == '<div><p>Test content header</p></div>'