def test_curseforge_mod_page(response): """ :class:`CurseforgeSpider` url and item extraction from a sample curseforge mod page. The spider returns two requests, one used to fetch mod files and the other to complete the mod item generation by adding the license. """ spider = CurseforgeSpider() spider._follow_links = True parsed = spider.parse_mod_page(response) parsed = list(parsed) urls = [urlparse.urljoin(response.url, url) for url in expected_urls_mod] # check request urls assert_parse_requests(parsed, urls) # check request meta assert parsed[0].meta["item"] == parsed[1].meta["item"] # check the item extracted so far item = parsed[0].meta["item"] assert item["name"] == "Tinkers Construct" assert item["description"] == "Description FAQ\nA link "\ "http://link_a Minefactory Reloaded end." assert item["created"] == date(2014, 2, 8) assert item["updated"] == date(2015, 5, 10) assert item["downloads"] == 1000 assert (sorted(item["categories"]) == sorted(["armor", "weapons", "tools", "technology", "processing"])) assert sorted(item["authors"]) == sorted(["mDiyo", "boni", "jadedcat"]) assert item["source_url"] == "https://github.com/source_url" assert item["donation_url"] == "https://www.paypal.com/donation_url" assert item["mod_url"] == "http://foo.org"
def test_curseforge_index_urls(response): """ Test :class:`CurseforgeSpider` url extraction from curseforge index sample """ spider = CurseforgeSpider() spider._follow_links = True parsed = iter(spider.parse(response)) urls = [urlparse.urljoin(response.url, url) for url in expected_urls_index] assert_parse_requests(parsed, urls)
def test_curseforge_mod_list_page(response): """ :class:`CurseforgeSpider` url extraction from a sample curseforge mod list page. Pagination is active and this extracts data from the first page. """ spider = CurseforgeSpider() spider._follow_links = True parsed = iter(spider.parse_mod_list_page(response)) urls = [urlparse.urljoin(response.url, url) for url in expected_urls_list_page] assert_parse_requests(parsed, urls)
def test_curseforge_mod_license_and_finalization(response): """ :class:`CurseforgeSpider` mod license parsing and mod item parsing finishing step. """ spider = CurseforgeSpider() spider._follow_links = True response.meta["item"] = ModItem() parsed = spider.parse_mod_license(response) parsed = list(parsed) assert len(parsed) == 1 item = list(parsed)[0] assert item["mod_license"] == "Creative Commons Full Text"