def test_grabber(httpserver: HTTPServer): sample = get_sample("generic/h1-p-desc.html") class FakeResponse(Response): automatically_set_content_length = False httpserver.expect_request("/h1-p-desc").respond_with_data( sample, headers={"content-type": "text/html"}) httpserver.expect_request("/lazy").respond_with_handler( lambda x: sleep(0.6)) httpserver.expect_request("/huge").respond_with_response( FakeResponse( response=b"x" * 100000, mimetype='text/html', )) httpserver.expect_request("/badmime").respond_with_data( "{}", headers={"content-type": "application/json"}) httpserver.expect_request("/nomime").respond_with_response( Response(mimetype="")) httpserver.expect_request("/large").respond_with_response( FakeResponse( mimetype="text/html", headers={"content-length": "100000"}, )) # success grabber = LinkGrabber(maxsize=100) with pytest.raises(exceptions.MaximumContentSizeError): grabber.get_content(httpserver.url_for("/h1-p-desc")) # initial timeout grabber = LinkGrabber(initial_timeout=0.5) with pytest.raises(ReadTimeout): grabber.get_content(httpserver.url_for("/lazy")) # receive timeout grabber = LinkGrabber(receive_timeout=0.1, chunk_size=1) with pytest.raises(TimeoutError): grabber.get_content(httpserver.url_for("/huge")) # maxsize grabber = LinkGrabber(receive_timeout=10000, chunk_size=1024, maxsize=20) with pytest.raises(exceptions.MaximumContentSizeError): grabber.get_content(httpserver.url_for("/huge")) # large grabber = LinkGrabber(maxsize=100) with pytest.raises(exceptions.MaximumContentSizeError): grabber.get_content(httpserver.url_for("/large")) # nomime grabber = LinkGrabber() with pytest.raises(exceptions.InvalidContentError): grabber.get_content(httpserver.url_for("/nomime")) # badmime grabber = LinkGrabber() with pytest.raises(exceptions.InvalidContentError): grabber.get_content(httpserver.url_for("/badmime"))
def get_samples(sample_names: typing.Iterable[str]) -> SampleGenerator: """Get Samples by name Returns: typing.Iterable[Sample] - iterable of samples """ for sample_name in sample_names: yield get_sample(sample_name)
def get_sample_sentence_ids(sample_names: typing.Iterable[str]) -> StringList: """List friendly names of sample sentences Returns: StringList -- IDs of sample sentences """ ids = [] # type: StringList for sample_name in sample_names: samp = get_sample(sample_name) for sentence in samp.sentences: ids.append('src: {}, sent: {}'.format(samp.name, sentence.id)) return pad_to_longest(ids)
def get_sample_sentences(sample_names: StringList) -> SampleSentenceGenerator: """Get Sample, each sentence from Sample Arguments: sample_names {StringList} -- names of samples Returns: typing.Iterable[Sample] -- Iterator of Samples """ for sample_name in sample_names: samp = get_sample(sample_name) for sentence in samp.sentences: yield samp, sentence
def test_twitter_card(tin, tout): link = Link("http://localhost", content=get_sample("twitter-card/%s" % tin)) preview = TwitterCard(link, parser="html.parser") for key in tout.keys(): assert getattr(preview, key) == tout[key]
def test_opengraph(tin, tout): link = Link("http://localhost", content=get_sample("open-graph/%s" % tin)) preview = OpenGraph(link, parser="html.parser") for key in tout.keys(): assert getattr(preview, key) == tout[key]
def test_link_preview(httpserver: HTTPServer): httpserver.expect_request("/preview1").respond_with_data( get_sample("twitter-card/with-image.html"), headers={"content-type": "text/html"}, ) httpserver.expect_request("/preview2").respond_with_data( get_sample("generic/h1-img.html"), headers={"content-type": "text/html"}, ) httpserver.expect_request("/preview-3.json").respond_with_data( '{}', headers={"content-type": "application/json"}, ) url = httpserver.url_for("/preview1") preview = link_preview(url) assert preview.title == "a title" assert preview.force_title == "a title" assert preview.description is None assert preview.image == "/img/heck.jpg" assert preview.absolute_image == "%s%s" % (url, preview.image) url = httpserver.url_for("/preview2") preview = link_preview(url) assert preview.title == "This title is from the first h1 tag." assert preview.description is None assert preview.image == "http://*****:*****@abc.com/the-bunny(720p)", content="OK") assert preview.force_title == "abc.com/the-bunny(720p)" preview = link_preview("https://192.168.1.1", content="OK") assert preview.force_title == "192.168.1.1" preview = link_preview("https://192.168.1.1:9696", content="OK") assert preview.force_title == "192.168.1.1:9696" preview = link_preview(httpserver.url_for('/preview-3.json')) assert preview.title is None assert preview.description is None assert preview.image is None assert preview.absolute_image is None assert preview.force_title == "Preview 3"
def test_generic(tin, tout): link = Link("http://localhost", content=get_sample("generic/%s" % tin)) preview = Generic(link, parser="html.parser") for key in tout.keys(): assert getattr(preview, key) == tout[key]