Esempio n. 1
0
def test_aps_aff_note():
    test_file = 'aps/aps_note_affiliation.json'
    fake_response = fake_response_from_file(
        test_file,
        response_type=TextResponse,
    )
    spider = aps_spider.APSSpider()
    records = list(spider.parse(fake_response))

    assert records
    assert len(records) == 1

    expected_authors = [
            {'affiliations': [{'value': u'Department of Physics, University of Turin and INFN, Turin, Via '
                                        u'Pietro Giuria 1, I-10125 Turin, Italy'}],
             'full_name': u'Caselle, Michele',
             'given_names': u'Michele',
             'raw_name': u'Michele Caselle',
             'surname': u'Caselle'},
            {'affiliations': [
                {'value': u'Department of Physics, University of Turin and INFN, Turin, '
                         u'Via Pietro Giuria 1, I-10125 Turin, Italy'},
                {'value': u'SISSA and INFN, Sezione di Trieste, Via Bonomea 265, 34136 '
                             u'Trieste, Italy'}],
                'full_name': u'Sorba, Marianna',
                'given_names': u'Marianna',
                'raw_name': u'Marianna Sorba',
                'surname': u'Sorba'},
    ]

    assert records[0]['authors'] == expected_authors
Esempio n. 2
0
def json_spider_record(tmpdir):
    from scrapy.http import TextResponse
    spider = aps_spider.APSSpider()
    items = spider.parse(
        fake_response_from_file('aps/aps_single_response.json',
                                response_type=TextResponse))
    return spider, items.next()
Esempio n. 3
0
def test_get_file_name_from_url():
    """Test filename generation."""
    url = "http://harvest.aps.org/v2/journals/articles/10.1103/PhysRevX.7.041045"
    expected = "PhysRevX.7.041045.xml"
    spider = aps_spider.APSSpider(aps_token="secret")
    file_name = spider._file_name_from_url(url)

    assert file_name == expected
Esempio n. 4
0
def results():
    """Return results generator from the WSP spider."""
    from scrapy.http import TextResponse

    spider = aps_spider.APSSpider()
    return spider.parse(
        fake_response_from_file('aps/aps_single_response.json',
                                response_type=TextResponse))
Esempio n. 5
0
def test_results_jats_parser_handle_date_absence():
    from scrapy.http import XmlResponse

    spider = aps_spider.APSSpider(aps_token="secret")
    fake_response = fake_response_from_file(
        'aps/PhysRevD.96.095036_no_date_nodes.xml',
        response_type=XmlResponse,
    )
    record = spider._parse_jats(fake_response).record
    assert validate(record, 'hep') is None
Esempio n. 6
0
def inspire_record():
    """Return results from the pipeline."""
    from scrapy.http import TextResponse

    spider = aps_spider.APSSpider()
    items = spider.parse(
        fake_response_from_file('aps/aps_single_response.json',
                                response_type=TextResponse))
    pipeline = InspireAPIPushPipeline()
    return pipeline.process_item(items.next(), spider)
Esempio n. 7
0
def test_results_from_jats():
    """Get and validate results from mocking a JATS response."""
    from scrapy.http import XmlResponse

    spider = aps_spider.APSSpider(aps_token="secret")
    fake_response = fake_response_from_file(
        'aps/PhysRevD.96.095036.xml',
        response_type=XmlResponse,
    )
    record = spider._parse_jats(fake_response).record
    assert validate(record, 'hep') is None
Esempio n. 8
0
def results():
    """Return results generator from the WSP spider."""
    from scrapy.http import TextResponse

    spider = aps_spider.APSSpider()
    parsed_items = list(
        spider.parse(
            fake_response_from_file(
                'aps/aps_single_response.json',
                response_type=TextResponse,
            )))

    records = [parsed_item.record for parsed_item in parsed_items]

    assert records
    return records
Esempio n. 9
0
def results():
    """Return results generator from the APS spider."""

    records = []

    for file in ('aps/aps_single_response.json',):
        fake_response = fake_response_from_file(
            file,
            response_type=TextResponse,
        )
        spider = aps_spider.APSSpider()
        records.extend(list(spider.parse(fake_response)))

    assert records
    assert len(records) == 1

    return records
Esempio n. 10
0
def results_from_json():
    """Return results by parsing a JSON file."""
    from scrapy.http import TextResponse

    spider = aps_spider.APSSpider()
    parsed_items = list(
        spider.parse(
            fake_response_from_file(
                'aps/aps_single_response.json',
                response_type=TextResponse,
            )
        )
    )

    class MockFailure:
        """Mock twisted.python.failure.Failure, failure on JATS request."""
        def __init__(self):
            self.request = parsed_items[0]

    records = [spider._parse_json_on_failure(MockFailure()).record]

    assert records
    return records