Example #1
0
def test_ignore_regular_url(ext):

    """
    When the syllabus was scraped from a regular URL, don't write a row.
    """

    row = ext('http://yale.edu/syllabus.html')

    # Shouldn't write a row.
    assert Document_Date_Archive_Url.select().count() == 0
Example #2
0
def test_ignore_future_timestamp(ext):

    """
    Don't index timestamps from the future.
    """

    # Get now + 1 year.
    future = datetime.now() + relativedelta(years=1)
    timestamp = future.strftime(date_format)

    url1 = 'https://web.archive.org/web/'+timestamp
    url2 = 'http://yale.edu/syllabus.html'

    row = ext(url1+'/'+url2)

    # Shouldn't write a row.
    assert Document_Date_Archive_Url.select().count() == 0