def test_aps_aff_note(): test_file = 'aps/aps_note_affiliation.json' fake_response = fake_response_from_file( test_file, response_type=TextResponse, ) spider = aps_spider.APSSpider() records = list(spider.parse(fake_response)) assert records assert len(records) == 1 expected_authors = [ {'affiliations': [{'value': u'Department of Physics, University of Turin and INFN, Turin, Via ' u'Pietro Giuria 1, I-10125 Turin, Italy'}], 'full_name': u'Caselle, Michele', 'given_names': u'Michele', 'raw_name': u'Michele Caselle', 'surname': u'Caselle'}, {'affiliations': [ {'value': u'Department of Physics, University of Turin and INFN, Turin, ' u'Via Pietro Giuria 1, I-10125 Turin, Italy'}, {'value': u'SISSA and INFN, Sezione di Trieste, Via Bonomea 265, 34136 ' u'Trieste, Italy'}], 'full_name': u'Sorba, Marianna', 'given_names': u'Marianna', 'raw_name': u'Marianna Sorba', 'surname': u'Sorba'}, ] assert records[0]['authors'] == expected_authors
def json_spider_record(tmpdir): from scrapy.http import TextResponse spider = aps_spider.APSSpider() items = spider.parse( fake_response_from_file('aps/aps_single_response.json', response_type=TextResponse)) return spider, items.next()
def test_get_file_name_from_url(): """Test filename generation.""" url = "http://harvest.aps.org/v2/journals/articles/10.1103/PhysRevX.7.041045" expected = "PhysRevX.7.041045.xml" spider = aps_spider.APSSpider(aps_token="secret") file_name = spider._file_name_from_url(url) assert file_name == expected
def results(): """Return results generator from the WSP spider.""" from scrapy.http import TextResponse spider = aps_spider.APSSpider() return spider.parse( fake_response_from_file('aps/aps_single_response.json', response_type=TextResponse))
def test_results_jats_parser_handle_date_absence(): from scrapy.http import XmlResponse spider = aps_spider.APSSpider(aps_token="secret") fake_response = fake_response_from_file( 'aps/PhysRevD.96.095036_no_date_nodes.xml', response_type=XmlResponse, ) record = spider._parse_jats(fake_response).record assert validate(record, 'hep') is None
def inspire_record(): """Return results from the pipeline.""" from scrapy.http import TextResponse spider = aps_spider.APSSpider() items = spider.parse( fake_response_from_file('aps/aps_single_response.json', response_type=TextResponse)) pipeline = InspireAPIPushPipeline() return pipeline.process_item(items.next(), spider)
def test_results_from_jats(): """Get and validate results from mocking a JATS response.""" from scrapy.http import XmlResponse spider = aps_spider.APSSpider(aps_token="secret") fake_response = fake_response_from_file( 'aps/PhysRevD.96.095036.xml', response_type=XmlResponse, ) record = spider._parse_jats(fake_response).record assert validate(record, 'hep') is None
def results(): """Return results generator from the WSP spider.""" from scrapy.http import TextResponse spider = aps_spider.APSSpider() parsed_items = list( spider.parse( fake_response_from_file( 'aps/aps_single_response.json', response_type=TextResponse, ))) records = [parsed_item.record for parsed_item in parsed_items] assert records return records
def results(): """Return results generator from the APS spider.""" records = [] for file in ('aps/aps_single_response.json',): fake_response = fake_response_from_file( file, response_type=TextResponse, ) spider = aps_spider.APSSpider() records.extend(list(spider.parse(fake_response))) assert records assert len(records) == 1 return records
def results_from_json(): """Return results by parsing a JSON file.""" from scrapy.http import TextResponse spider = aps_spider.APSSpider() parsed_items = list( spider.parse( fake_response_from_file( 'aps/aps_single_response.json', response_type=TextResponse, ) ) ) class MockFailure: """Mock twisted.python.failure.Failure, failure on JATS request.""" def __init__(self): self.request = parsed_items[0] records = [spider._parse_json_on_failure(MockFailure()).record] assert records return records