def test_move_most_recent_files(self): self.data1 = pd.DataFrame({'data': [1, 2, 3, 4, 5]}) self.data1.to_csv(get_home_dir() + '/Downloads/test1.csv') download_start = datetime.now().timestamp() self.data2 = pd.DataFrame({'data': [1, 2, 3, 4, 5]}) self.data2.to_csv( get_home_dir() + '/Downloads/Britain Coronavirus Disease Prevention Map Mar 06 2020 Id Id Colocation Map_2020-03-31.csv' ) self.assertTrue(os.path.exists(get_home_dir() + '/Downloads/test1.csv')) self.assertTrue( os.path.exists( get_home_dir() + '/Downloads/Britain Coronavirus Disease Prevention Map Mar 06 2020 Id Id Colocation Map_2020-03-31.csv' )) move_most_recent_files('./tmp1', ['url'], download_start) self.assertTrue(os.path.exists(get_home_dir() + '/Downloads/test1.csv')) self.assertTrue(os.path.exists('./tmp1/Britain_2020_03_31.csv')) os.remove(get_home_dir() + '/Downloads/test1.csv')
def main(_args): ''' download colocation data Parameters ---------- _args : listx Arg list secret_key, username and pass dir, csv file specifying download countries and ids, outdir. Returns ------- None. ''' username = input("Username: "******"Update datasets? (y/n): ") if update == 'y': update = True elif update == 'n': update = False else: sys.exit('Unknown update input. Choose "y", "n". Exiting.') #read target datasets data_target = pd.read_csv(_args[1]) for i, dataset_id in enumerate(data_target['id']): country_output = _args[len(_args) - 1] + "/" + data_target.loc[i, 'country'] + '_mobility' base_url = 'https://www.facebook.com/geoinsights-portal/downloads/vector/?id=' + str(dataset_id) + '&ds=' earliest_date = datetime(int(data_target.loc[i, 'year']), int(data_target.loc[i, 'month']), int(data_target.loc[i, 'day']), int(data_target.loc[i, 'hour'])) data_dates = get_file_dates(earliest_date) if update: data_dates = list(compress(data_dates, [x > get_update_date(country_output) for x in data_dates])) if len(data_dates) == 0: sys.exit('No datasets to download. Exiting.') urls = get_urls(base_url, data_dates) download_data(urls, keys) move_most_recent_files(country_output, urls) print('Success.')
def main(_args): ''' download colocation data Parameters ---------- _args : listx Arg list secret_key, username and pass dir, csv file specifying download countries and ids, outdir. Returns ------- None. ''' with open(_args[1], 'rb') as file: fernet = Fernet(file.read()) with open(_args[2] + '/username.key', 'rb') as file: username = file.read() with open(_args[2] + '/password.key', 'rb') as file: password = file.read() keys = [ fernet.decrypt(username).decode("utf-8"), fernet.decrypt(password).decode("utf-8") ] #read target datasets data_target = pd.read_csv(_args[3]) for i, dataset_id in enumerate(data_target['id']): base_url = 'https://www.facebook.com/geoinsights-portal/downloads/vector/?id=' + str( dataset_id) + '&ds=' earliest_date = datetime(int(data_target.loc[i, 'year']), int(data_target.loc[i, 'month']), int(data_target.loc[i, 'day']), int(data_target.loc[i, 'hour'])) data_dates = get_file_dates(earliest_date) urls = get_urls(base_url, data_dates) download_data(urls, keys) move_most_recent_files( _args[len(_args) - 1] + "/" + data_target.loc[i, 'country'] + '_mobility', urls) print('Success.')
def pull_population(outdir, keys, country, dl_variables, update, population_type): ''' Parameters ---------- outdir : str Output directory. keys : list user credentials [username, password]. country : str Country name - must match .config file exactly (names with spaces must replace ' ' with '_'). dl_variables : dict download specific variables in a dict, 'id' = dataset id, 'origin' = dataset origin datetime.datetime object. update : boolean Whether an existing dataset is being updated. Returns ------- None. ''' country_output = outdir + "/" + country + '_' + population_type base_url = 'https://www.facebook.com/geoinsights-portal/downloads/raster/?id=' + str( dl_variables['id']) + '&ds=' earliest_date = dl_variables['origin'] data_dates = get_file_dates(earliest_date) if update: data_dates = list( compress(data_dates, [x > get_update_date(country_output) for x in data_dates])) if len(data_dates) == 0: sys.exit('No datasets to download. Exiting.') urls = get_urls(base_url, data_dates) start_time = download_data(urls, keys) move_most_recent_files(country_output, urls, start_time) remove_empty_files(country_output) print('Success.')