def test_most_recent_download_date(self): test_directory = ( os.path.dirname(os.path.realpath(__file__)) + "\\test_" + datetime.datetime.today().strftime("%Y%m%d%H%M%S%f") ) file_manager = FileManager(test_directory) self.assertEqual(os.path.isdir(test_directory), False) # Make sure directory doesn't already exist self.assertEqual( file_manager.get_most_recent_download_date(), None ) # Directory doesn't exist. Should return None. os.mkdir(test_directory) self.assertEqual(file_manager.get_most_recent_download_date(), None) # Directory is empty. Should return None. test_filename = test_directory + "\\this_is_a_test.xml" open(test_filename, "a").close() self.assertEqual( file_manager.get_most_recent_download_date(), None ) # Directory with no properly formatted file names. Should return None. test_filename = test_directory + "\\research_gov_2001-07-02_2005-01-30.xml" open(test_filename, "a").close() self.assertEqual( file_manager.get_most_recent_download_date(), date(2005, 1, 30) ) # Only one valid file name. Make sure the date is what we expected. test_filename = test_directory + "\\research_gov_2001-07-02_2006-02-28.xml" open(test_filename, "a").close() test_filename = test_directory + "\\research_gov_2001-07-02_2002-03-14.xml" open(test_filename, "a").close() test_filename = test_directory + "\\research_gov_2001-07-02_2006-02-27.xml" open(test_filename, "a").close() self.assertEqual( file_manager.get_most_recent_download_date(), date(2006, 2, 28) ) # Several valid file names. Make sure the correct date is returned. # Clean up. Can also use shutil.rmtree, but that's a little scary. NOTE that if any test fails, clean up will # not occur... os.remove(test_directory + "\\this_is_a_test.xml") os.remove(test_directory + "\\research_gov_2001-07-02_2005-01-30.xml") os.remove(test_directory + "\\research_gov_2001-07-02_2006-02-28.xml") os.remove(test_directory + "\\research_gov_2001-07-02_2002-03-14.xml") os.remove(test_directory + "\\research_gov_2001-07-02_2006-02-27.xml") os.rmdir(test_directory)
def main(): file_manager = FileManager() file_manager.create_download_directory() last_run_date = file_manager.get_most_recent_download_date() if last_run_date is None: start_date = constants.EARLIEST_GRANT_AWARD_DATE else: start_date = last_run_date + datetime.timedelta(days=1) if start_date <= constants.MOST_RECENT_GRANT_AWARD_DATE: download_period = DownloadPeriod(start_date) while download_period: # This is really inefficient, but we're going to create a new browser session for each request. This is # because of how report results are presented. It would be difficult to determine which results go with which # date ranges otherwise. with Scraper() as scraper: if scraper.get_query_results(file_manager, download_period): download_period = download_period.increment_period() else: download_period.downgrade_period() else: print "Downloads directory is up to date. Quitting."