Python Geo.Geo Exemples, utils.Geo.Geo Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : merge_datasets.py Projet : osome-iu/CoVaxxy-Misinfo

def clean_Twitter_csv(data_path):

    twitter_data = pd.read_csv(data_path)

    twitter_data = twitter_data.replace('St. Tammany Parish',
                                        'St Tammany Parish')
    twitter_data = twitter_data.replace('St. Joseph County',
                                        'St Joseph County')

    results_data_list = []

    for no_accounts in [1, 10, 50, 100]:
        for no_tweets in [1, 10, 50, 100, 200, 500]:
            suffix = f'{no_accounts}_accounts_{no_tweets}_tweets '

            thresholded_accounts = threshold_accounts(twitter_data,
                                                      no_accounts, no_tweets)
            if len(thresholded_accounts) < 1:
                continue

            thresholded_accounts = thresholded_accounts.rename(columns={
                'county': 'County',
                'state': 'State'
            })

            fips_map = Geo().get_county_state_to_fips_map(unique_fips=False)

            twitter_data_with_fips = pd.merge(thresholded_accounts,
                                              fips_map,
                                              on=['County', 'State'],
                                              how='left')

            missing = twitter_data_with_fips[
                twitter_data_with_fips.fips_code.isna()][['County', 'State']]
            if len(missing) > 0:
                print("For ", suffix)
                print("Missing the following counties' fips codes")
                print(missing)

            # These are the data columns from the Twitter data file we are going to
            # add suffix at the front of each of these for the output.
            # The filename has the thresholds for the numbers of tweets and accounts
            columns = 'Mean % low-credibility,Stderr % low-credibility,Min % low-credibility,Max % low-credibility,No. accounts,No. tweets'.split(
                ',')

            output_columns = {c: suffix + c for c in columns}

            twitter_data_to_output = twitter_data_with_fips.rename(
                columns=output_columns)

            data = twitter_data_to_output.rename(columns={'fips_code': 'FIPS'})

            data = data.melt(id_vars=["FIPS", "State", "County"],
                             value_vars=list(output_columns.values()))
            data = data[~data.FIPS.isna()]
            results_data_list += [
                data,
            ]

    return pd.concat(results_data_list, sort=False)

Exemple #2

0

Afficher le fichier

Fichier : merge_datasets.py Projet : osome-iu/CoVaxxy-Misinfo

def clean_Twitter_csv_state(data_path, just_state_identified_accounts=False):

    twitter_data = pd.read_csv(data_path)

    if just_state_identified_accounts:
        state_data = twitter_data[(df.county.isna()) | (
            df.county == 'None')].groupby('state').apply(
                lambda x: get_summary_stats(x))
    else:
        state_data = twitter_data.groupby('state').apply(
            lambda x: get_summary_stats(x))

    state_data = state_data.reset_index()
    state_data = state_data.rename(columns={
        'county': 'County',
        'state': 'State'
    })

    fips_map = Geo().get_state_to_fips_map()
    data = pd.merge(state_data, fips_map, on='State')
    data['County'] = ''

    data = data.melt(id_vars=["FIPS", "State", "County"],
                     value_vars=[
                         'No. accounts',
                         'No. tweets',
                         'Mean % low-credibility',
                         'Stderr % low-credibility',
                         'Min % low-credibility',
                         'Max % low-credibility',
                     ])
    return data

Exemple #3

0

Afficher le fichier

Fichier : merge_datasets.py Projet : osome-iu/CoVaxxy-Misinfo

def clean_OWID_vaccine_uptake_csv(config, early=False):
    """Clean vaccine uptake data"""

    data_path = os.path.join(config["PATHS"]["STATE_DATA_DIR"],
                             config["FILES"]["OWID_DATA_FILE"])

    cols = [
        "daily_vaccinations_per_million", "people_vaccinated_per_hundred",
        "people_fully_vaccinated_per_hundred", "share_doses_used"
    ]

    data = pd.read_csv(data_path,
                       usecols=["date", "location"] + cols,
                       dtype={
                           "location": str,
                           "date": str,
                       }.update({c: float
                                 for c in cols}))

    data['DateTime'] = pd.to_datetime(data['date'])

    start_date = "UPTAKE_START"
    end_date = "UPTAKE_END"

    if early:
        start_date = "UPTAKE_EARLY_START"
        end_date = "UPTAKE_EARLY_END"

    start = pd.to_datetime(config["DATES"][start_date])
    end = pd.to_datetime(config["DATES"][end_date])

    time_period = data[(data.DateTime >= start)
                       & (data.DateTime <= end)].copy()

    grouped_data = time_period.groupby(
        time_period.location).mean().reset_index()
    # Fix NY bug in OWID data
    grouped_data.loc[grouped_data.location == 'New York State',
                     'location'] = 'New York'

    merged_data = pd.merge(grouped_data,
                           Geo().get_state_to_fips_map(),
                           left_on='location',
                           right_on='State')
    merged_data['County'] = ''

    if early:
        old_cols = cols
        merged_data = merged_data.rename(
            columns={col: 'early_' + col
                     for col in old_cols}).copy()
        cols = ['early_' + col for col in old_cols]

    data = merged_data.melt(id_vars=["FIPS", "State", "County"],
                            value_vars=cols)

    return data

Exemple #4

0

Afficher le fichier

Fichier : merge_datasets.py Projet : osome-iu/CoVaxxy-Misinfo

def clean_FB_survey_csv(data_path, state_level=False):
    """Clean facebook survey county-level data."""

    data = pd.read_csv(
        data_path,
        usecols=["geo_value", "time_value", "value", "stderr", "sample_size"],
        dtype={
            "geo_value": str,
            "time_value": str,
            "value": float,
            "stderr": float,
            "sample_size":
            float  # This needs to be set as a float or the data doesn't load
        })
    data['DateTime'] = pd.to_datetime(data['time_value'])

    start_date = "REFUSAL_START"
    end_date = "REFUSAL_END"

    start = pd.to_datetime(config["DATES"][start_date])
    end = pd.to_datetime(config["DATES"][end_date])

    data = data[(data.DateTime >= start) & (data.DateTime <= end)].copy()

    data['num_accept'] = data.sample_size * (data.value / 100.0)
    aggregate = data.groupby('geo_value', as_index=False).sum()

    # Calculate the new variables for the aggregates
    aggregate[
        'mean_smoothed_covid_vaccinated_or_accept'] = aggregate.num_accept / aggregate.sample_size
    aggregate['stderr_smoothed_covid_vaccinated_or_accept'] = aggregate.apply(
        lambda x: get_stderr(x['num_accept'], x['sample_size']), axis=1)

    if not state_level:
        fips_map = Geo().get_county_state_to_fips_map(unique_fips=True)
        data = pd.merge(aggregate,
                        fips_map,
                        left_on='geo_value',
                        right_on='fips_code')
    else:
        state_abbr = Geo().get_state_to_fips_map()
        data = pd.merge(aggregate,
                        state_abbr,
                        left_on='geo_value',
                        right_on='abbr_lower')
        data['County'] = ''

    data = data.rename(
        columns={
            'fips_code': 'FIPS',
            'num_accept': 'num_smoothed_covid_vaccinated_or_accept',
            'sample_size':
            'sample_size_for_covid_vaccinated_or_accept_question'
        })

    # This will help to create a standard set of columns, which will be:
    #     ["FIPS","State", "County", "variable", "value"]
    data = data.melt(id_vars=["FIPS", "State", "County"],
                     value_vars=[
                         "mean_smoothed_covid_vaccinated_or_accept",
                         "stderr_smoothed_covid_vaccinated_or_accept",
                         "num_smoothed_covid_vaccinated_or_accept",
                         "sample_size_for_covid_vaccinated_or_accept_question"
                     ])

    return data

Exemple #5

0

Afficher le fichier

Fichier : merge_datasets.py Projet : osome-iu/CoVaxxy-Misinfo

        raise Exception(
            "CHANGE CURRENT WORKING DIRECTORY TO THE `src` PATH BEFORE RUNNING!!"
        )

    # Load config_file_path from commandline input
    args = parse_cl_args()
    config_file_path = args.config_file

    state_level = args.state_level
    keywords_filter = args.keywords_filter

    # Get config file object
    config = parse_config_file(config_file_path)

    # Intialize the Geo class and load lookup tables/dicts
    g = Geo()
    fip_lookup = g.load_fip_code_lookup()
    state_lookup = g.load_state_abbrv_lookup(as_dict=True)

    # Get base dir for county-level data and set data file paths
    county_data_dir = config["PATHS"]["COUNTY_DATA_DIR"]
    state_data_dir = config["PATHS"]["STATE_DATA_DIR"]
    covid_data_dir = config["PATHS"]["COVID_DATA_DIR"]
    intermediate_data_dir = config["PATHS"]["INTERMEDIATE_DATA_DIR"]

    people_file_path = os.path.join(county_data_dir,
                                    config["FILES"]["COUNTY_PEOPLE"])
    income_file_path = os.path.join(county_data_dir,
                                    config["FILES"]["COUNTY_INCOME"])
    gini_file_path = os.path.join(county_data_dir,
                                  config["FILES"]["COUNTY_GINI"])

Exemple #6

0

Afficher le fichier

 def __init__(self):
     self._logger = logging.getLogger('POI')
     self._session = requests.session()
     self._geo = Geo()