Esempio n. 1
0
    def test_most_recent_download_date(self):

        test_directory = (
            os.path.dirname(os.path.realpath(__file__))
            + "\\test_"
            + datetime.datetime.today().strftime("%Y%m%d%H%M%S%f")
        )

        file_manager = FileManager(test_directory)

        self.assertEqual(os.path.isdir(test_directory), False)  # Make sure directory doesn't already exist
        self.assertEqual(
            file_manager.get_most_recent_download_date(), None
        )  # Directory doesn't exist.  Should return None.

        os.mkdir(test_directory)
        self.assertEqual(file_manager.get_most_recent_download_date(), None)  # Directory is empty.  Should return None.

        test_filename = test_directory + "\\this_is_a_test.xml"
        open(test_filename, "a").close()
        self.assertEqual(
            file_manager.get_most_recent_download_date(), None
        )  # Directory with no properly formatted file names.  Should return None.

        test_filename = test_directory + "\\research_gov_2001-07-02_2005-01-30.xml"
        open(test_filename, "a").close()
        self.assertEqual(
            file_manager.get_most_recent_download_date(), date(2005, 1, 30)
        )  # Only one valid file name.  Make sure the date is what we expected.

        test_filename = test_directory + "\\research_gov_2001-07-02_2006-02-28.xml"
        open(test_filename, "a").close()
        test_filename = test_directory + "\\research_gov_2001-07-02_2002-03-14.xml"
        open(test_filename, "a").close()
        test_filename = test_directory + "\\research_gov_2001-07-02_2006-02-27.xml"
        open(test_filename, "a").close()
        self.assertEqual(
            file_manager.get_most_recent_download_date(), date(2006, 2, 28)
        )  # Several valid file names. Make sure the correct date is returned.

        # Clean up.  Can also use shutil.rmtree, but that's a little scary.  NOTE that if any test fails, clean up will
        # not occur...
        os.remove(test_directory + "\\this_is_a_test.xml")
        os.remove(test_directory + "\\research_gov_2001-07-02_2005-01-30.xml")
        os.remove(test_directory + "\\research_gov_2001-07-02_2006-02-28.xml")
        os.remove(test_directory + "\\research_gov_2001-07-02_2002-03-14.xml")
        os.remove(test_directory + "\\research_gov_2001-07-02_2006-02-27.xml")
        os.rmdir(test_directory)
Esempio n. 2
0
def main():

    file_manager = FileManager()
    file_manager.create_download_directory()
    last_run_date = file_manager.get_most_recent_download_date()
    if last_run_date is None:
        start_date = constants.EARLIEST_GRANT_AWARD_DATE
    else:
        start_date = last_run_date + datetime.timedelta(days=1)

    if start_date <= constants.MOST_RECENT_GRANT_AWARD_DATE:

        download_period = DownloadPeriod(start_date)
        while download_period:

            # This is really inefficient, but we're going to create a new browser session for each request.  This is
            # because of how report results are presented.  It would be difficult to determine which results go with which
            # date ranges otherwise.
            with Scraper() as scraper:
                if scraper.get_query_results(file_manager, download_period):
                    download_period = download_period.increment_period()
                else:
                    download_period.downgrade_period()

    else:

        print "Downloads directory is up to date.  Quitting."