コード例 #1
0
# Iterate through all the sources, and for anything that is an AUTOMATIC_DOWNLOAD
# get the file from the source url and store it at the desired path.

automatic_downloads = config.read_config(
    cc_by=True,
    cc_by_sa=True,
    google_tos=True,
    cc_by_nc=True,
    filter_by_fetch_method='AUTOMATIC_DOWNLOAD',
    filter_no_load_func=False,
    filter_no_data=False,
    filter_not_approved=args.whitelist)
todays_date = datetime.today().strftime('%Y-%m-%d')

for k in automatic_downloads:
    params = automatic_downloads[k]
    source_url = params['fetch']['source_url']
    path_for_today = path_utils.path_to_data_for_date(params, todays_date)
    print('Downloading data for: ', k)
    print('Source url: ', source_url)
    out_dir = path_for_today['dir']
    out_file = path_for_today['file']
    out_path = os.path.join(out_dir, out_file)
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
    output = wget.download(source_url, out_path)
    print('\nFile written to: ', output)

print('Done with fetch_automatic_downloads.py')
コード例 #2
0
                             filter_not_approved=args.allowlist)

spreadsheet_file = 'hospitalizations.xlsx'

most_recent_spreadsheet = path_utils.most_recent_subdir(
    path_utils.path_to('spreadsheets_dir'), spreadsheet_file)
if args.date:
    spreadsheet_date = str(args.date[0])
else:
    spreadsheet_date = str(most_recent_spreadsheet['date'])
spreadsheet_path = os.path.join(path_utils.path_to('spreadsheets_dir'),
                                spreadsheet_date, spreadsheet_file)

print('Fetching spreadsheet for date: ', spreadsheet_date)
print('Spreadsheet path: ', spreadsheet_path)

# This assumes that every data source with params['fetch']['method'] == 'SCRAPED' comes from a single spreadsheet.
# If that stops being the case, will need to update this.

for k in scraped:
    params = scraped[k]
    path_for_data = path_utils.path_to_data_for_date(params, spreadsheet_date)
    df = pd.read_excel(spreadsheet_path, k)
    print('Fetched data will be written to: ', path_for_data)
    out_dir = path_for_data['dir']
    out_file = path_for_data['file']
    out_path = os.path.join(out_dir, out_file)
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
    df.to_csv(out_path, index=False)