Esempio n. 1
0
import pandas as pd
import numpy as np
import DataPreprocessing as DPre
import sqlite3
import time
import sys

nan = np.nan

# init data preprocessing class
covid_data_processing = DPre.CovidDataProcessing()

# data source paths
covid_link = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv'
population_link = 'https://www2.census.gov/programs-surveys/popest/datasets/2010-2019/counties/totals/co-est2019-alldata.csv'
population_encoding = 'ISO-8859-1'
# create DFs of interest

print('---Beginning Preprocessing and Data Validation---')
covid_df = covid_data_processing.open_csv('url', covid_link, {'fips': 'str', 'cases': 'int', 'deaths': 'int'}, None)
covid_df['date'] = pd.to_datetime(covid_df['date'])
covid_df.query('fips != @nan', inplace=True)
print(len(covid_df))

population_df = covid_data_processing.open_csv('url', population_link, {'STATE': 'str', 'COUNTY': 'str', 'POPESTIMATE2019': 'int'}, population_encoding)

print('Read covid data')
print('Read population data')

# Create fips column from STATE and COUNTY columns in population df
population_df.loc[:,'fips'] = population_df['STATE'] + population_df['COUNTY']