Example #1
0
    def test_move_most_recent_files(self):

        self.data1 = pd.DataFrame({'data': [1, 2, 3, 4, 5]})
        self.data1.to_csv(get_home_dir() + '/Downloads/test1.csv')

        download_start = datetime.now().timestamp()

        self.data2 = pd.DataFrame({'data': [1, 2, 3, 4, 5]})
        self.data2.to_csv(
            get_home_dir() +
            '/Downloads/Britain Coronavirus Disease Prevention Map Mar 06 2020 Id Id Colocation Map_2020-03-31.csv'
        )

        self.assertTrue(os.path.exists(get_home_dir() +
                                       '/Downloads/test1.csv'))
        self.assertTrue(
            os.path.exists(
                get_home_dir() +
                '/Downloads/Britain Coronavirus Disease Prevention Map Mar 06 2020 Id Id Colocation Map_2020-03-31.csv'
            ))

        move_most_recent_files('./tmp1', ['url'], download_start)

        self.assertTrue(os.path.exists(get_home_dir() +
                                       '/Downloads/test1.csv'))
        self.assertTrue(os.path.exists('./tmp1/Britain_2020_03_31.csv'))

        os.remove(get_home_dir() + '/Downloads/test1.csv')
def main(_args):
    '''
    download colocation data
    
    Parameters
    ----------
    _args : listx
        Arg list secret_key, username and pass dir, csv file specifying download countries and ids, outdir.

    Returns
    -------
    None.

    '''
    
    username = input("Username: "******"Update datasets? (y/n): ")
    
    if update == 'y':
        update = True
    elif update == 'n':
        update = False
    else:
        sys.exit('Unknown update input. Choose "y", "n". Exiting.')
    
    #read target datasets
    data_target = pd.read_csv(_args[1])
    
    for i, dataset_id in enumerate(data_target['id']):
        
        country_output = _args[len(_args) - 1] + "/" + data_target.loc[i, 'country'] + '_mobility'
            
        base_url = 'https://www.facebook.com/geoinsights-portal/downloads/vector/?id=' + str(dataset_id) + '&ds='
    
        earliest_date = datetime(int(data_target.loc[i, 'year']), int(data_target.loc[i, 'month']), int(data_target.loc[i, 'day']), int(data_target.loc[i, 'hour']))    
        
        data_dates = get_file_dates(earliest_date)
                
        if update:
            data_dates = list(compress(data_dates, [x > get_update_date(country_output) for x in data_dates]))
        
        if len(data_dates) == 0:
            sys.exit('No datasets to download. Exiting.')
            
        urls = get_urls(base_url, data_dates)
        
        download_data(urls, keys)
        
    
        move_most_recent_files(country_output, urls)
    
    print('Success.')
def main(_args):
    '''
    download colocation data
    
    Parameters
    ----------
    _args : listx
        Arg list secret_key, username and pass dir, csv file specifying download countries and ids, outdir.

    Returns
    -------
    None.

    '''

    with open(_args[1], 'rb') as file:
        fernet = Fernet(file.read())

    with open(_args[2] + '/username.key', 'rb') as file:
        username = file.read()

    with open(_args[2] + '/password.key', 'rb') as file:
        password = file.read()

    keys = [
        fernet.decrypt(username).decode("utf-8"),
        fernet.decrypt(password).decode("utf-8")
    ]

    #read target datasets
    data_target = pd.read_csv(_args[3])

    for i, dataset_id in enumerate(data_target['id']):

        base_url = 'https://www.facebook.com/geoinsights-portal/downloads/vector/?id=' + str(
            dataset_id) + '&ds='

        earliest_date = datetime(int(data_target.loc[i, 'year']),
                                 int(data_target.loc[i, 'month']),
                                 int(data_target.loc[i, 'day']),
                                 int(data_target.loc[i, 'hour']))

        data_dates = get_file_dates(earliest_date)
        urls = get_urls(base_url, data_dates)

        download_data(urls, keys)

        move_most_recent_files(
            _args[len(_args) - 1] + "/" + data_target.loc[i, 'country'] +
            '_mobility', urls)

    print('Success.')
def pull_population(outdir, keys, country, dl_variables, update,
                    population_type):
    '''
    
    Parameters
    ----------
    outdir : str
        Output directory.
    keys : list
        user credentials [username, password].
    country : str
        Country name - must match .config file exactly (names with spaces must replace ' ' with '_').
    dl_variables : dict
        download specific variables in a dict, 'id' = dataset id, 'origin' = dataset origin datetime.datetime object.
    update : boolean
        Whether an existing dataset is being updated.

    Returns
    -------
    None.

    '''

    country_output = outdir + "/" + country + '_' + population_type

    base_url = 'https://www.facebook.com/geoinsights-portal/downloads/raster/?id=' + str(
        dl_variables['id']) + '&ds='

    earliest_date = dl_variables['origin']

    data_dates = get_file_dates(earliest_date)

    if update:
        data_dates = list(
            compress(data_dates,
                     [x > get_update_date(country_output)
                      for x in data_dates]))

    if len(data_dates) == 0:
        sys.exit('No datasets to download. Exiting.')

    urls = get_urls(base_url, data_dates)

    start_time = download_data(urls, keys)

    move_most_recent_files(country_output, urls, start_time)

    remove_empty_files(country_output)

    print('Success.')