Example #1
0
def test_pretendbook():
    doc = scraper.Doc(url='http://umsu.de/papers/variations.pdf')
    doc.link = scraper.Link(url='http://umsu.de/papers/variations.pdf')
    doc.link.anchortext = 'Download'
    doc.link.context = 'Foo bar'
    doc.content = readfile(os.path.join(testdir, 'attitudes.txt')) * 10
    doc.numwords = 10200 * 10
    doc.numpages = 22 * 10
    assert doctyper.evaluate(doc) == 'book'
Example #2
0
def test_notreview():
    doc = scraper.Doc(url='http://umsu.de/papers/variations.pdf')
    doc.link = scraper.Link(url='http://umsu.de/papers/variations.pdf')
    doc.link.anchortext = 'xyz'
    doc.link.context = 'xyz forthcoming in Philosophical Review'
    doc.content = 'blah blah foo bar xyz\nForthcoming in The Philosophical Review\n'
    doc.content += readfile(os.path.join(testdir, 'attitudes.txt'))
    doc.numwords = 10200
    doc.numpages = 22
    assert doctyper.evaluate(doc) == 'article'
Example #3
0
def test_pretendreview():
    doc = scraper.Doc(url='http://umsu.de/papers/variations.pdf')
    doc.link = scraper.Link(url='http://umsu.de/papers/variations.pdf')
    doc.link.anchortext = 'Review of xyz'
    doc.link.context = 'Review of xyz abc'
    doc.content = 'Hans Kamp: xyz, Oxford University Press 2009, 210 pages\n'
    doc.content += readfile(os.path.join(testdir, 'attitudes.txt'))[:1000]
    doc.numwords = 1000
    doc.numpages = 3
    assert doctyper.evaluate(doc) == 'review'