Exemple #1
0
def parsed_node_no_splash():
    """Return a parse call to a record without spalsh page url."""
    spider = brown_spider.BrownSpider()
    body = """
    {
    "items": {
        "docs": [
            {
                "json_uri": "https://repository.library.brown.edu/api/pub/items/bdr:11303/"

            }
        ]
    }
    }
    """

    response = fake_response_from_string(body)
    jsonresponse = json.loads(response.body_as_unicode())
    jsonrecord = jsonresponse["items"]["docs"][0]
    response.meta["jsonrecord"] = jsonrecord

    parsed_item = spider.parse(response).next()
    assert parsed_item
    assert parsed_item.record

    return parsed_item.record
Exemple #2
0
def record():
    """Return results from the Brown spider."""
    spider = brown_spider.BrownSpider()
    response = fake_response_from_file('brown/test_1.json')
    jsonresponse = json.loads(response.body_as_unicode())
    jsonrecord = jsonresponse["items"]["docs"][0]
    jsonrecord["uri"] = "brown/test_splash.html"

    splash_response = fake_response_from_file('brown/test_splash.html')
    splash_response.meta["jsonrecord"] = jsonrecord
    return spider.scrape_splash(splash_response)
Exemple #3
0
def parsed_node():
    """Return a parse call to a full record.

    Return type should be a Scrapy Request object.
    """
    spider = brown_spider.BrownSpider()
    response = fake_response_from_file('brown/test_1.json')
    jsonresponse = json.loads(response.body_as_unicode())
    jsonrecord = jsonresponse["items"]["docs"][0]
    response.meta["jsonrecord"] = jsonrecord

    return spider.parse(response).next()
Exemple #4
0
def no_year_no_author():
    """Test behaviour when no year given in thesis info line."""
    spider = brown_spider.BrownSpider()
    body = """
    <html>
        <div class="panel-body">
            <dl class="">
                <dt>Notes</dt>
                <dd>Thesis (Ph.D. -- Brown University</dd>
            </dl>
        </div>
    </html>

    """
    return fake_response_from_string(body)
Exemple #5
0
def parsed_node():
    """Return a parse call to a full record.

    Return type should be a Scrapy Request object.
    """
    spider = brown_spider.BrownSpider()
    response = fake_response_from_file('brown/test_1.json')
    jsonresponse = json.loads(response.body_as_unicode())
    jsonrecord = jsonresponse["items"]["docs"][0]
    response.meta["jsonrecord"] = jsonrecord

    with requests_mock.Mocker() as mock:
        mock.head(
            'https://repository.library.brown.edu/studio/item/bdr:11303/PDF/',
            headers={
                'Content-Type': 'application/pdf',
            },
        )
        return spider.parse(response).next()
Exemple #6
0
def record():
    """Return results from the Brown spider."""
    spider = brown_spider.BrownSpider()
    with requests_mock.Mocker() as mock:
        mock.head(
            'http://www.example.com/studio/item/bdr:11303/PDF/',
            headers={
                'Content-Type': 'text/html',
            },
        )
        response = fake_response_from_file('brown/test_1.json')
        jsonresponse = json.loads(response.body_as_unicode())
        jsonrecord = jsonresponse["items"]["docs"][0]
        jsonrecord["uri"] = "brown/test_splash.html"

        splash_response = fake_response_from_file('brown/test_splash.html')
        splash_response.meta["jsonrecord"] = jsonrecord

        parsed_item = spider.scrape_splash(splash_response)
        assert parsed_item
        assert parsed_item.record

        return parsed_item.record
Exemple #7
0
def test_no_author_in_thesis(no_year_no_author):
    """Test that there are no authors."""
    spider = brown_spider.BrownSpider()
    authors = spider._get_authors(no_year_no_author)

    assert not authors
Exemple #8
0
def test_no_year_in_thesis(no_year_no_author):
    """Test that there is no year."""
    spider = brown_spider.BrownSpider()
    year = spider._get_phd_year(no_year_no_author)

    assert not year