def parsed_node_no_splash(): """Return a parse call to a record without spalsh page url.""" spider = brown_spider.BrownSpider() body = """ { "items": { "docs": [ { "json_uri": "https://repository.library.brown.edu/api/pub/items/bdr:11303/" } ] } } """ response = fake_response_from_string(body) jsonresponse = json.loads(response.body_as_unicode()) jsonrecord = jsonresponse["items"]["docs"][0] response.meta["jsonrecord"] = jsonrecord parsed_item = spider.parse(response).next() assert parsed_item assert parsed_item.record return parsed_item.record
def record(): """Return results from the Brown spider.""" spider = brown_spider.BrownSpider() response = fake_response_from_file('brown/test_1.json') jsonresponse = json.loads(response.body_as_unicode()) jsonrecord = jsonresponse["items"]["docs"][0] jsonrecord["uri"] = "brown/test_splash.html" splash_response = fake_response_from_file('brown/test_splash.html') splash_response.meta["jsonrecord"] = jsonrecord return spider.scrape_splash(splash_response)
def parsed_node(): """Return a parse call to a full record. Return type should be a Scrapy Request object. """ spider = brown_spider.BrownSpider() response = fake_response_from_file('brown/test_1.json') jsonresponse = json.loads(response.body_as_unicode()) jsonrecord = jsonresponse["items"]["docs"][0] response.meta["jsonrecord"] = jsonrecord return spider.parse(response).next()
def no_year_no_author(): """Test behaviour when no year given in thesis info line.""" spider = brown_spider.BrownSpider() body = """ <html> <div class="panel-body"> <dl class=""> <dt>Notes</dt> <dd>Thesis (Ph.D. -- Brown University</dd> </dl> </div> </html> """ return fake_response_from_string(body)
def parsed_node(): """Return a parse call to a full record. Return type should be a Scrapy Request object. """ spider = brown_spider.BrownSpider() response = fake_response_from_file('brown/test_1.json') jsonresponse = json.loads(response.body_as_unicode()) jsonrecord = jsonresponse["items"]["docs"][0] response.meta["jsonrecord"] = jsonrecord with requests_mock.Mocker() as mock: mock.head( 'https://repository.library.brown.edu/studio/item/bdr:11303/PDF/', headers={ 'Content-Type': 'application/pdf', }, ) return spider.parse(response).next()
def record(): """Return results from the Brown spider.""" spider = brown_spider.BrownSpider() with requests_mock.Mocker() as mock: mock.head( 'http://www.example.com/studio/item/bdr:11303/PDF/', headers={ 'Content-Type': 'text/html', }, ) response = fake_response_from_file('brown/test_1.json') jsonresponse = json.loads(response.body_as_unicode()) jsonrecord = jsonresponse["items"]["docs"][0] jsonrecord["uri"] = "brown/test_splash.html" splash_response = fake_response_from_file('brown/test_splash.html') splash_response.meta["jsonrecord"] = jsonrecord parsed_item = spider.scrape_splash(splash_response) assert parsed_item assert parsed_item.record return parsed_item.record
def test_no_author_in_thesis(no_year_no_author): """Test that there are no authors.""" spider = brown_spider.BrownSpider() authors = spider._get_authors(no_year_no_author) assert not authors
def test_no_year_in_thesis(no_year_no_author): """Test that there is no year.""" spider = brown_spider.BrownSpider() year = spider._get_phd_year(no_year_no_author) assert not year