def test_SF_state_data_files_2005_1yr(): print "Creating 2005 server" server = AcsServer(years=[2005], durs=[1], pums=False) assert_equals(server.state_data_files(2005, 1, ['ny', 'us', 'ma']), [{ 'url': u'http://www2.census.gov/programs-surveys/acs/summary_file/2005/data/NewYork/all_ny.zip', 'state': 'ny' }, { 'url': u'http://www2.census.gov/programs-surveys/acs/summary_file/2005/data/NewYork/nygeo.2005-1yr', 'state': 'ny' }, { 'url': 'http://www2.census.gov/programs-surveys/acs/summary_file/2005/data/0UnitedStates/all_us.zip', 'state': 'us' }, { 'url': 'http://www2.census.gov/programs-surveys/acs/summary_file/2005/data/0UnitedStates/usgeo.2005-1yr', 'state': 'us' }, { 'url': u'http://www2.census.gov/programs-surveys/acs/summary_file/2005/data/Massachusetts/all_ma.zip', 'state': 'ma' }, { 'url': u'http://www2.census.gov/programs-surveys/acs/summary_file/2005/data/Massachusetts/mageo.2005-1yr', 'state': 'ma' }])
def sf(states, baseurl, startyear, endyear, durs, overwrite, outdir, dryrun): """Download Summary File datafiles""" click.echo("Downloading SF") #logger = logging.getLogger() #click.echo(ctx.years) years = range(startyear, endyear+1) durations = [int(dur) for dur in durs] logger.debug("Years: {0}".format(years)) logger.debug("Durations: {0}".format(durations)) acs = AcsServer(baseurl=baseurl, years=years, durs=durations, pums=False) local = Local(os.path.normpath(outdir), overwrite=overwrite, pums=False) # Get list of files to download # Get list of data files # Get list of documentation # Download them # Extract downloads for year in years: for dur in durations: # Check for invalid combinations if (year <= 2006 and dur != 1) or (year <= 2008 and dur == 5) or (year > 2013 and dur == 3): logger.info("Skipping invalid year/duration combination: {0} {1}-year".format(year, dur)) continue #rooturl = acs.year_dur_url(year, dur) # Limit states to those where zip files don't still exist, unless overwrite new_states = [s for s in states if overwrite or not os.path.exists(local.destination_paths(year, dur, s)['zip_path'])] if len(new_states) == 0: logger.info("Skipping {year} {dur}-year: All states already downloaded ({states})".format(year=year, dur=dur, states=states)) logger.info(Fore.GREEN + Style.BRIGHT + "{0} {1}-year: {2}".format(year, dur, new_states) + Fore.RESET + Style.RESET_ALL) #logger.debug("New States: {0}".format(new_states)) state_data_files = acs.state_data_files(year, dur, new_states) logger.debug("Data files to download: \n{0}{1}{2}".format(Fore.MAGENTA, pprint.pformat([str(f['url']).replace(acs.urlroot, "") for f in state_data_files]), Fore.RESET)) stubs_and_doc_files = acs.stubs_and_documentation(year, dur) logger.debug("Documentation files to download: \n{0}{1}{2}".format(Fore.GREEN, pprint.pformat(stubs_and_doc_files), Fore.RESET)) #remote_files = acs.files_to_download(year, dur, new_states) #logger.debug(pprint.pformat(remote_files)) #logger.debug("Files to download: \n{0}{1}{2}".format(Fore.MAGENTA, pprint.pformat([str(f['url'] + f['file']).replace(acs.urlroot, "") for f in [filesets for filesets in remote_files]]), Fore.RESET)) if not dryrun: local.download_data_files(state_data_files, year, dur) local.download_stubs_and_docs(stubs_and_doc_files, year, dur)
def pums(states, baseurl, startyear, endyear, durs, overwrite, outdir, dryrun): """Download Summary File datafiles""" click.echo("Downloading PUMS") years = range(startyear, endyear + 1) durations = [int(dur) for dur in durs] logger.debug("Years: {0}".format(years)) logger.debug("Durations: {0}".format(durations)) acs = AcsServer(baseurl=baseurl, years=years, durs=durations, pums=True) local = Local(os.path.normpath(outdir), overwrite=overwrite, pums=True) click.echo(pprint.pformat(acs.rooturls)) for year in years: for dur in durations: # Check for invalid combinations if (year <= 2006 and dur != 1) or (year <= 2008 and dur == 5) or (year > 2013 and dur == 3): logger.info( "Skipping invalid year/duration combination: {0} {1}-year". format(year, dur)) continue new_states = [ s for s in states if overwrite or not os.path.exists( local.destination_paths(year, dur, s)['zip_path']) ] if len(new_states) == 0: logger.info( "Skipping {year} {dur}-year: All states already downloaded ({states})" .format(year=year, dur=dur, states=states)) #logger.info(Fore.GREEN + Style.BRIGHT + "{0} {1}-year: {2}".format(year, dur, new_states) + Fore.RESET + Style.RESET_ALL) #logger.debug("New States: {0}".format(new_states)) click.secho("{0} {1}-year: {2}".format(year, dur, new_states), fg='green') state_data_files = acs.state_data_files(year, dur, new_states) #logger.debug("Data files to download: \n{0}{1}{2}".format(Fore.MAGENTA, pprint.pformat([str(f['url']).replace(acs.urlroot, "") for f in state_data_files]), Fore.RESET)) #click.secho("Data files to download: \n{0}".format(pprint.pformat([str(f['url']).replace(acs.urlroot, "") for f in state_data_files])), fg='magenta') #stubs_and_doc_files = acs.stubs_and_documentation(year, dur) #logger.debug("Documentation files to download: \n{0}{1}{2}".format(Fore.GREEN, pprint.pformat(stubs_and_doc_files), Fore.RESET)) #remote_files = acs.files_to_download(year, dur, new_states) #logger.debug(pprint.pformat(remote_files)) #logger.debug("Files to download: \n{0}{1}{2}".format(Fore.MAGENTA, pprint.pformat([str(f['url'] + f['file']).replace(acs.urlroot, "") for f in [filesets for filesets in remote_files]]), Fore.RESET)) if not dryrun: local.download_data_files(state_data_files, year, dur)
def pums(states, baseurl, startyear, endyear, durs, overwrite, outdir, dryrun): """Download Summary File datafiles""" click.echo("Downloading PUMS") years = range(startyear, endyear+1) durations = [int(dur) for dur in durs] logger.debug("Years: {0}".format(years)) logger.debug("Durations: {0}".format(durations)) acs = AcsServer(baseurl=baseurl, years=years, durs=durations, pums=True) local = Local(os.path.normpath(outdir), overwrite=overwrite, pums=True) click.echo(pprint.pformat(acs.rooturls)) for year in years: for dur in durations: # Check for invalid combinations if (year <= 2006 and dur != 1) or (year <= 2008 and dur == 5) or (year > 2013 and dur == 3): logger.info("Skipping invalid year/duration combination: {0} {1}-year".format(year, dur)) continue new_states = [s for s in states if overwrite or not os.path.exists(local.destination_paths(year, dur, s)['zip_path'])] if len(new_states) == 0: logger.info("Skipping {year} {dur}-year: All states already downloaded ({states})".format(year=year, dur=dur, states=states)) #logger.info(Fore.GREEN + Style.BRIGHT + "{0} {1}-year: {2}".format(year, dur, new_states) + Fore.RESET + Style.RESET_ALL) #logger.debug("New States: {0}".format(new_states)) click.secho("{0} {1}-year: {2}".format(year, dur, new_states), fg='green') state_data_files = acs.state_data_files(year, dur, new_states) #logger.debug("Data files to download: \n{0}{1}{2}".format(Fore.MAGENTA, pprint.pformat([str(f['url']).replace(acs.urlroot, "") for f in state_data_files]), Fore.RESET)) #click.secho("Data files to download: \n{0}".format(pprint.pformat([str(f['url']).replace(acs.urlroot, "") for f in state_data_files])), fg='magenta') #stubs_and_doc_files = acs.stubs_and_documentation(year, dur) #logger.debug("Documentation files to download: \n{0}{1}{2}".format(Fore.GREEN, pprint.pformat(stubs_and_doc_files), Fore.RESET)) #remote_files = acs.files_to_download(year, dur, new_states) #logger.debug(pprint.pformat(remote_files)) #logger.debug("Files to download: \n{0}{1}{2}".format(Fore.MAGENTA, pprint.pformat([str(f['url'] + f['file']).replace(acs.urlroot, "") for f in [filesets for filesets in remote_files]]), Fore.RESET)) if not dryrun: local.download_data_files(state_data_files, year, dur) #local.download_stubs_and_docs(stubs_and_doc_files, year, dur)
def test_SF_state_data_files_2009_13_5yr(): print "Creating 2009/2013 server" server = AcsServer(years=[2009, 2013], durs=[1, 5], pums=False) assert_equals(server.state_data_files(2009, 5, ['ny']), [{'url': u'http://www2.census.gov/programs-surveys/acs/summary_file/2009/data/5_year_by_state/NewYork_All_Geographies_Not_Tracts_Block_Groups.zip', 'state': 'ny'}, {'url': u'http://www2.census.gov/programs-surveys/acs/summary_file/2009/data/5_year_by_state/NewYork_Tracts_Block_Groups_Only.zip', 'state': 'ny'}]) assert_equals(server.state_data_files(2013, 5, ['ny']), [{'url': u'http://www2.census.gov/programs-surveys/acs/summary_file/2013/data/5_year_by_state/NewYork_All_Geographies_Not_Tracts_Block_Groups.zip', 'state': 'ny'}, {'url': u'http://www2.census.gov/programs-surveys/acs/summary_file/2013/data/5_year_by_state/NewYork_Tracts_Block_Groups_Only.zip', 'state': 'ny'}])
def test_SF_state_data_files_2009_13_5yr(): print "Creating 2009/2013 server" server = AcsServer(years=[2009, 2013], durs=[1, 5], pums=False) assert_equals(server.state_data_files(2009, 5, ['ny']), [{ 'url': u'http://www2.census.gov/programs-surveys/acs/summary_file/2009/data/5_year_by_state/NewYork_All_Geographies_Not_Tracts_Block_Groups.zip', 'state': 'ny' }, { 'url': u'http://www2.census.gov/programs-surveys/acs/summary_file/2009/data/5_year_by_state/NewYork_Tracts_Block_Groups_Only.zip', 'state': 'ny' }]) assert_equals(server.state_data_files(2013, 5, ['ny']), [{ 'url': u'http://www2.census.gov/programs-surveys/acs/summary_file/2013/data/5_year_by_state/NewYork_All_Geographies_Not_Tracts_Block_Groups.zip', 'state': 'ny' }, { 'url': u'http://www2.census.gov/programs-surveys/acs/summary_file/2013/data/5_year_by_state/NewYork_Tracts_Block_Groups_Only.zip', 'state': 'ny' }])
def test_SF_state_data_files_2005_1yr(): print "Creating 2005 server" server = AcsServer(years=[2005], durs=[1], pums=False) assert_equals( server.state_data_files(2005, 1, ['ny','us', 'ma']), [{'url': u'http://www2.census.gov/programs-surveys/acs/summary_file/2005/data/NewYork/all_ny.zip', 'state': 'ny'}, {'url': u'http://www2.census.gov/programs-surveys/acs/summary_file/2005/data/NewYork/nygeo.2005-1yr', 'state': 'ny'}, {'url': 'http://www2.census.gov/programs-surveys/acs/summary_file/2005/data/0UnitedStates/all_us.zip', 'state': 'us'}, {'url': 'http://www2.census.gov/programs-surveys/acs/summary_file/2005/data/0UnitedStates/usgeo.2005-1yr', 'state': 'us'}, {'url': u'http://www2.census.gov/programs-surveys/acs/summary_file/2005/data/Massachusetts/all_ma.zip', 'state': 'ma'}, {'url': u'http://www2.census.gov/programs-surveys/acs/summary_file/2005/data/Massachusetts/mageo.2005-1yr', 'state': 'ma'}])
def test_PUMS_state_data_files(): server = AcsServer(years=[2009, 2013], durs=[1, 5], pums=True) print server.state_data_files(2009, 1, ['ny', 'us'])
def test_SF_stubs_and_doc_files(): server = AcsServer(years=[2009, 2013], durs=[1, 5], pums=False) print server.stubs_and_documentation(2009, 1) print server.stubs_and_documentation(2009, 5)
def create_server(years, durs, pums): return AcsServer(years=years, durs=durs, pums=pums)
def sf(states, baseurl, startyear, endyear, durs, overwrite, outdir, dryrun): """Download Summary File datafiles""" click.echo("Downloading SF") #logger = logging.getLogger() #click.echo(ctx.years) years = range(startyear, endyear + 1) durations = [int(dur) for dur in durs] logger.debug("Years: {0}".format(years)) logger.debug("Durations: {0}".format(durations)) acs = AcsServer(baseurl=baseurl, years=years, durs=durations, pums=False) local = Local(os.path.normpath(outdir), overwrite=overwrite, pums=False) # Get list of files to download # Get list of data files # Get list of documentation # Download them # Extract downloads for year in years: for dur in durations: # Check for invalid combinations if (year <= 2006 and dur != 1) or (year <= 2008 and dur == 5) or (year > 2013 and dur == 3): logger.info( "Skipping invalid year/duration combination: {0} {1}-year". format(year, dur)) continue #rooturl = acs.year_dur_url(year, dur) # Limit states to those where zip files don't still exist, unless overwrite new_states = [ s for s in states if overwrite or not os.path.exists( local.destination_paths(year, dur, s)['zip_path']) ] if len(new_states) == 0: logger.info( "Skipping {year} {dur}-year: All states already downloaded ({states})" .format(year=year, dur=dur, states=states)) logger.info(Fore.GREEN + Style.BRIGHT + "{0} {1}-year: {2}".format(year, dur, new_states) + Fore.RESET + Style.RESET_ALL) #logger.debug("New States: {0}".format(new_states)) state_data_files = acs.state_data_files(year, dur, new_states) logger.debug("Data files to download: \n{0}{1}{2}".format( Fore.MAGENTA, pprint.pformat([ str(f['url']).replace(acs.urlroot, "") for f in state_data_files ]), Fore.RESET)) stubs_and_doc_files = acs.stubs_and_documentation(year, dur) logger.debug("Documentation files to download: \n{0}{1}{2}".format( Fore.GREEN, pprint.pformat(stubs_and_doc_files), Fore.RESET)) #remote_files = acs.files_to_download(year, dur, new_states) #logger.debug(pprint.pformat(remote_files)) #logger.debug("Files to download: \n{0}{1}{2}".format(Fore.MAGENTA, pprint.pformat([str(f['url'] + f['file']).replace(acs.urlroot, "") for f in [filesets for filesets in remote_files]]), Fore.RESET)) if not dryrun: local.download_data_files(state_data_files, year, dur) local.download_stubs_and_docs(stubs_and_doc_files, year, dur)