from pytest import fixture from lxml import etree as _etree from libextract.core import parse_html from tests import asset_path FOO = asset_path('full_of_foos.html') @fixture def element(): return _etree.fromstring('<tag><nest>Hi</nest><nest>Bye</nest></tag>') @fixture def foo_file(request): fp = open(FOO, 'rb') request.addfinalizer(fp.close) return fp @fixture def etree(): with open(FOO, 'rb') as fp: return parse_html(fp.read())
import pytest from libextract.api import extract from goose import Goose from tests import asset_path TEST_FILENAME = asset_path('test_file') @pytest.fixture def foo_file(request): fp = open(TEST_FILENAME, 'rb') request.addfinalizer(fp.close) return fp
import pytest from tests import asset_path from libextract.core import parse_html FOOS_FILENAME = asset_path('full_of_foos.html') @pytest.fixture def foo_file(request): fp = open(FOOS_FILENAME, 'rb') request.addfinalizer(fp.close) return fp @pytest.fixture def etree(): with open(FOOS_FILENAME, 'rb') as fp: return parse_html(fp, encoding='utf8')
from unittest import TestCase from tests import asset_path from libextract.html import parse_html FOO_ASSET = asset_path('full_of_foos.html') class TestParseHtml(TestCase): def setUp(self): with open(FOO_ASSET, 'rb') as fp: self.etree = parse_html(fp) def runTest(self): divs = self.etree.xpath('//body/article/div') assert all(k.text == 'foo.' for k in divs) assert len(divs) == 9