def main(_args): ''' download colocation data Parameters ---------- _args : listx Arg list secret_key, username and pass dir, csv file specifying download countries and ids, outdir. Returns ------- None. ''' username = input("Username: "******"Update datasets? (y/n): ") if update == 'y': update = True elif update == 'n': update = False else: sys.exit('Unknown update input. Choose "y", "n". Exiting.') #read target datasets data_target = pd.read_csv(_args[1]) for i, dataset_id in enumerate(data_target['id']): country_output = _args[len(_args) - 1] + "/" + data_target.loc[i, 'country'] + '_mobility' base_url = 'https://www.facebook.com/geoinsights-portal/downloads/vector/?id=' + str(dataset_id) + '&ds=' earliest_date = datetime(int(data_target.loc[i, 'year']), int(data_target.loc[i, 'month']), int(data_target.loc[i, 'day']), int(data_target.loc[i, 'hour'])) data_dates = get_file_dates(earliest_date) if update: data_dates = list(compress(data_dates, [x > get_update_date(country_output) for x in data_dates])) if len(data_dates) == 0: sys.exit('No datasets to download. Exiting.') urls = get_urls(base_url, data_dates) download_data(urls, keys) move_most_recent_files(country_output, urls) print('Success.')
def main(_args): ''' download colocation data Parameters ---------- _args : listx Arg list secret_key, username and pass dir, csv file specifying download countries and ids, outdir. Returns ------- None. ''' with open(_args[1], 'rb') as file: fernet = Fernet(file.read()) with open(_args[2] + '/username.key', 'rb') as file: username = file.read() with open(_args[2] + '/password.key', 'rb') as file: password = file.read() keys = [ fernet.decrypt(username).decode("utf-8"), fernet.decrypt(password).decode("utf-8") ] #read target datasets data_target = pd.read_csv(_args[3]) for i, dataset_id in enumerate(data_target['id']): base_url = 'https://www.facebook.com/geoinsights-portal/downloads/vector/?id=' + str( dataset_id) + '&ds=' earliest_date = datetime(int(data_target.loc[i, 'year']), int(data_target.loc[i, 'month']), int(data_target.loc[i, 'day']), int(data_target.loc[i, 'hour'])) data_dates = get_file_dates(earliest_date) urls = get_urls(base_url, data_dates) download_data(urls, keys) move_most_recent_files( _args[len(_args) - 1] + "/" + data_target.loc[i, 'country'] + '_mobility', urls) print('Success.')
def pull_mobility(outdir, keys, country, dl_variables, update, movement_type): ''' Parameters ---------- outdir : str Output directory. keys : list user credentials [username, password]. country : str Country name - must match .config file exactly (names with spaces must replace ' ' with '_'). dl_variables : dict download specific variables in a dict, 'id' = dataset id, 'origin' = dataset origin datetime.datetime object. update : boolean Whether an existing dataset is being updated. Returns ------- None. ''' country_output = outdir + "/" + country + '_' + movement_type base_url = 'https://www.facebook.com/geoinsights-portal/downloads/vector/?id=' + str( dl_variables['id']) + '&ds=' earliest_date = dl_variables['origin'] data_dates = get_file_dates(earliest_date) if update: data_dates = list( compress(data_dates, [x > get_update_date(country_output) for x in data_dates])) if len(data_dates) == 0: sys.exit('No datasets to download. Exiting.') urls = get_urls(base_url, data_dates) start_time = download_data(urls, keys) move_most_recent_files(country_output, urls, start_time) remove_empty_files(country_output) print('Success.')
def test_get_urls(self): self.assertIsInstance(get_urls('url', get_file_dates(self.date1)), list) self.assertIsInstance( get_urls('url', get_file_dates(self.date1))[0], str)
def test_get_file_dates(self): self.assertIsInstance(get_file_dates(self.date1), list) self.assertIsInstance(get_file_dates(self.date1)[0], datetime.datetime)