def test_ignore_regular_url(ext): """ When the syllabus was scraped from a regular URL, don't write a row. """ row = ext('http://yale.edu/syllabus.html') # Shouldn't write a row. assert Document_Date_Archive_Url.select().count() == 0
def test_ignore_future_timestamp(ext): """ Don't index timestamps from the future. """ # Get now + 1 year. future = datetime.now() + relativedelta(years=1) timestamp = future.strftime(date_format) url1 = 'https://web.archive.org/web/'+timestamp url2 = 'http://yale.edu/syllabus.html' row = ext(url1+'/'+url2) # Shouldn't write a row. assert Document_Date_Archive_Url.select().count() == 0