def wrangle_nyc_percentage_daily_change(): df_confirmed = get_combined_counties() df_confirmed = pd.DataFrame( df_confirmed, columns=[ 'Albany', 'Allegany', 'Bronx', 'Broome', 'Cattaraugus', 'Cayuga', 'Chautauqua', 'Chemung', 'Chenango', 'Clinton', 'Columbia', 'Cortland', 'Delaware', 'Dutchess', 'Erie', 'Essex', 'Franklin', 'Fulton', 'Genesee', 'Greene', 'Hamilton', 'Herkimer', 'Jefferson', 'Kings', 'Lewis', 'Livingston', 'Madison', 'Manhattan', 'Monroe', 'Montgomery', 'Nassau', 'New York', 'Niagara', 'Oneida', 'Onondaga', 'Ontario', 'Orange', 'Orleans', 'Oswego', 'Otsego', 'Putnam', 'Queens', 'Rensselaer', 'Richmond', 'Rockland', 'Saratoga', 'Schenectady', 'Schoharie', 'Schuyler', 'Seneca', 'St Lawrence', 'St. Lawrence', 'Steuben', 'Suffolk', 'Sullivan', 'Tioga', 'Tompkins', 'Ulster', 'Warren', 'Washington', 'Wayne', 'Westchester', 'Wyoming', 'Yates', 'date', 'total' ]) df_confirmed_r = df_confirmed.iloc[::-1] df_confirmed_r = df_confirmed_r[[ 'Albany', 'Allegany', 'Broome', 'Cattaraugus', 'Cayuga', 'Chautauqua', 'Chemung', 'Chenango', 'Clinton', 'Columbia', 'Cortland', 'Delaware', 'Dutchess', 'Erie', 'Essex', 'Franklin', 'Fulton', 'Genesee', 'Greene', 'Hamilton', 'Herkimer', 'Jefferson', 'Lewis', 'Livingston', 'Madison', 'Monroe', 'Montgomery', 'Nassau', 'Niagara', 'Oneida', 'Onondaga', 'Ontario', 'Orange', 'Orleans', 'Oswego', 'Otsego', 'Putnam', 'Rensselaer', 'Rockland', 'Saratoga', 'Schenectady', 'Schoharie', 'Schuyler', 'Seneca', 'St Lawrence', 'Steuben', 'Suffolk', 'Sullivan', 'Tioga', 'Tompkins', 'Ulster', 'Warren', 'Washington', 'Wayne', 'Westchester', 'Wyoming', 'Yates', 'New York', 'Queens', 'Kings', 'Richmond', 'Bronx' ]] df_confirmed_r = df_confirmed_r.astype('float64') ddiff = df_confirmed_r.diff() df_confirmed_r = ddiff / df_confirmed_r df_confirmed_r = df_confirmed_r.fillna(0) df_confirmed_r = df_confirmed_r.replace(np.inf, 0) df_confirmed_r = df_confirmed_r.replace(-np.inf, 0) dc = df_confirmed.iloc[::-1] del df_confirmed dates = list(dc['date']) del dc df_confirmed_r['date'] = dates df_nyc_percentage_changes = df_confirmed_r[['New York', 'date']] del df_confirmed_r df_nyc_percentage_changes['New York'] = pd.Series( [round(val, 2) for val in df_nyc_percentage_changes['New York']], index=df_nyc_percentage_changes.index) df_nyc_percentage_changes['New York'] = pd.Series( [ "{0:.0f}%".format(val * 100) for val in df_nyc_percentage_changes['New York'] ], index=df_nyc_percentage_changes.index) return df_nyc_percentage_changes
def combine_counties_scraped_and_historical(): """ 1. Updates combined_county_table by concatenating scraped with combined table. 2. Returns a df with confirmed cases by county, date in the format of Month-day like April 08, and a total 3. Has commented out options to pull from historic table from database or wrangle from scratch from static > historical_counties_df.py up to April 08th. """ df = get_scraped_counties()# get scraped info from database df = pd.DataFrame(df, columns=['County', 'Confirmed', 'Deaths', 'Recoveries', 'Population','Deaths2Confirmed', 'Confirmed2Population','lastupdate']) df = df.drop(columns=['lastupdate']) # df_confirmed_historical_T = get_historic_counties_records()# get historical data from database df_confirmed_historical_T = get_combined_counties()# get combined table from from database df_confirmed_historical_T = pd.DataFrame(df_confirmed_historical_T, columns=['Albany', 'Allegany', 'Broome', 'Cattaraugus', 'Cayuga', 'Chautauqua', 'Chemung', 'Chenango', 'Clinton', 'Columbia', 'Cortland', 'Delaware', 'Dutchess', 'Erie', 'Essex', 'Franklin', 'Fulton', 'Genesee', 'Greene', 'Hamilton', 'Herkimer', 'Jefferson', 'Lewis', 'Livingston', 'Madison', 'Monroe', 'Montgomery', 'Nassau', 'Niagara', 'Oneida', 'Onondaga', 'Ontario', 'Orange', 'Orleans', 'Oswego', 'Otsego', 'Putnam', 'Rensselaer', 'Rockland', 'Saratoga', 'Schenectady', 'Schoharie', 'Schuyler', 'Seneca', 'St Lawrence', 'Steuben', 'Suffolk', 'Sullivan', 'Tioga', 'Tompkins', 'Ulster', 'Warren', 'Washington', 'Wayne', 'Westchester', 'Wyoming', 'Yates', 'date', 'New York', 'Queens', 'Kings', 'Richmond', 'Bronx', 'total']) # df_confirmed_historical_T = wrangle_historical_county_df()# for custom changes - option to wrangle from scratch df = df.T df = df.reset_index() df.columns = df.iloc[0] df = df.drop(df.index[0]) df = df.rename(columns={'County': 'Status'}) df.loc[:,'lastupdated'] = datetime.now(tz=pytz.timezone('EST')) df.loc[:,'date'] = pd.to_datetime(df['lastupdated'], format = '%Y-%m-%d') # this is not needed df.loc[:,'date'] = df['date'].apply(lambda x: x.strftime('%B %d')) df.loc[:,'Status'] = df['Status'].str.replace(' ', '') # df.loc[:,'total'] = df[['Albany', 'Allegany', 'Broome', 'Cattaraugus', 'Cayuga', 'Chautauqua', # 'Chemung', 'Chenango', 'Clinton', 'Columbia', 'Cortland', 'Delaware', # 'Dutchess', 'Erie', 'Essex', 'Franklin', 'Fulton', 'Genesee', 'Greene', # 'Hamilton', 'Herkimer', 'Jefferson', 'Lewis', 'Livingston', 'Madison', # 'Monroe', 'Montgomery', 'Nassau', 'Niagara', 'Oneida', 'Onondaga', # 'Ontario', 'Orange', 'Orleans', 'Oswego', 'Otsego', 'Putnam', # 'Rensselaer', 'Rockland', 'Saratoga', 'Schenectady', 'Schoharie', # 'Schuyler', 'Seneca', 'St Lawrence', 'Steuben', 'Suffolk', 'Sullivan', # 'Tioga', 'Tompkins', 'Ulster', 'Warren', 'Washington', 'Wayne', # 'Westchester', 'Wyoming', 'Yates', 'New York City a']].sum() df_confirmed = df[df['Status']=='Confirmed'] # filters in df_confirmed only Confirmed cases del df df_confirmed=df_confirmed.loc[:,~df_confirmed.columns.duplicated()] df_confirmed.loc[:,'New York'] = df_confirmed['New York City a'] # matches wikipedia scrape, may need to be changed later df_confirmed.loc[:,'Queens'] = df_confirmed['New York'] df_confirmed.loc[:,'Kings'] = df_confirmed['New York'] df_confirmed.loc[:,'Richmond'] = df_confirmed['New York'] df_confirmed.loc[:,'Bronx'] = df_confirmed['New York'] df_confirmed = pd.concat([df_confirmed_historical_T,df_confirmed]) df_confirmed.drop(columns=['Status','New York City a', 'lastupdated'], inplace=True) return df_confirmed
def combine_counties_scraped_and_historical(): """ 1. Updates combined_county_table by concatenating scraped with combined table. 2. Returns a df with confirmed cases by county, date in the format of Month-day like April 08, and a total 3. Has commented out options to pull from historic table from database or wrangle from scratch from static > historical_counties_df.py up to April 08th. 4. Log datetime in another table """ # df_confirmed_historical_T = get_historic_counties_records()# get historical data from database so toggle between this line and the next data = get_combined_counties() df_confirmed_historical_T = pd.DataFrame( data, columns=[ 'Albany', 'Allegany', 'Bronx', 'Broome', 'Cattaraugus', 'Cayuga', 'Chautauqua', 'Chemung', 'Chenango', 'Clinton', 'Columbia', 'Cortland', 'Delaware', 'Dutchess', 'Erie', 'Essex', 'Franklin', 'Fulton', 'Genesee', 'Greene', 'Hamilton', 'Herkimer', 'Jefferson', 'Kings', 'Lewis', 'Livingston', 'Madison', 'Monroe', 'Montgomery', 'Nassau', 'New York', 'Niagara', 'Oneida', 'Onondaga', 'Ontario', 'Orange', 'Orleans', 'Oswego', 'Otsego', 'Putnam', 'Queens', 'Rensselaer', 'Richmond', 'Rockland', 'Saratoga', 'Schenectady', 'Schoharie', 'Schuyler', 'Seneca', 'St. Lawrence', 'Steuben', 'Suffolk', 'Sullivan', 'Tioga', 'Tompkins', 'Ulster', 'Warren', 'Washington', 'Wayne', 'Westchester', 'Wyoming', 'Yates', 'date', 'total' ]) df_confirmed_historical_T.reset_index( drop=True, inplace=True ) # reset index for later retrieval of the latest numbers to compare if there was an update data = get_scraped_counties() #Todo retrieve column headers from database df = pd.DataFrame(data, columns=[ 'County', 'Confirmed', 'Deaths', 'Recoveries', 'Population', 'lastupdate' ]) df = df.T df = df.reset_index() df.columns = df.iloc[0] df = df.drop(df.index[0]) df = df.rename(columns={'County': 'Status'}) df.loc[:, 'date'] = datetime.now( tz=pytz.timezone('EST')) # - timedelta(days=1) # for yesterday df.loc[:, 'date'] = pd.to_datetime(df['date'], format='%Y-%m-%d') df.loc[:, 'date'] = df['date'].apply(lambda x: x.strftime('%B %d')) df.loc[:, 'Status'] = df['Status'].str.replace(' ', '') df_confirmed = df[ df['Status'] == 'Confirmed'] # filters in df_confirmed only Confirmed cases del df cols = df_confirmed.columns.drop(['Status', 'date']) # df_confirmed.loc[:,cols] = df_confirmed[cols].apply(pd.to_numeric, errors='coerce', downcast='signed')# convert to int df_confirmed.loc[:, cols] = df_confirmed[cols].astype('int') df_for_total = df_confirmed.drop(columns=['date', 'Status']) df_confirmed['total'] = df_for_total.sum(axis=1) del df_for_total last_historical_nyc_num = df_confirmed_historical_T['New York'][0] [scraped_nyc_num] = df_confirmed['New York City'].values.T.tolist() if last_historical_nyc_num < scraped_nyc_num: # if this update showed a different nyc number from before df_confirmed = df_confirmed.loc[:, ~df_confirmed.columns.duplicated()] # match wikipedia scrape df_confirmed.loc[:, 'New York'] = df_confirmed['New York City'] df_confirmed.loc[:, 'Queens'] = df_confirmed['New York'] df_confirmed.loc[:, 'Kings'] = df_confirmed['New York'] df_confirmed.loc[:, 'Richmond'] = df_confirmed['New York'] df_confirmed.loc[:, 'Bronx'] = df_confirmed['New York'] df_confirmed.drop(columns=['Status', 'New York City'], inplace=True) df_confirmed = pd.concat([df_confirmed, df_confirmed_historical_T], axis=0, ignore_index=True, sort=True) #combine tables del df_confirmed_historical_T current_time = datetime.now(tz=pytz.timezone('EST')) current_time = current_time.strftime('%B %d, %Y %H:%M') df_confirmed.at[ 0, 'Albany'] = 619 ###################################### bandaid solution return df_confirmed, current_time # return a tuple of the newly combined dataframe and current update time as a string else: print( f'The last confirmed number for nyc is: {last_historical_nyc_num}, and the just scraped confirmed number for nyc is: {scraped_nyc_num}' ) return None, None
def combine_counties_scraped_and_historical(): """ 1. Replaces combined_county_table with updated version of scraped plus historical data. 2. Returns a df with confirmed cases by county, date in the format of Month-day like April 08, and a total""" df = get_scraped_counties() # get info from database df = pd.DataFrame(df, columns=[ 'County', 'Confirmed', 'Deaths', 'Recoveries', 'Population', 'Deaths2Confirmed', 'Confirmed2Population', 'lastupdate' ]) df = df.drop(columns=['lastupdate']) # df_confirmed_historical_T = get_historic_counties_records()# get from database # df_confirmed_historical_T = pd.DataFrame(df_confirmed_historical_T, columns=['date', 'Albany', 'Allegany', 'Bronx', 'Broome', 'Cattaraugus', # 'Cayuga', 'Chautauqua', 'Chemung', 'Chenango', 'Clinton', 'Columbia', # 'Cortland', 'Delaware', 'Dutchess', 'Erie', 'Essex', 'Franklin', # 'Fulton', 'Genesee', 'Greene', 'Hamilton', 'Herkimer', 'Jefferson', # 'Kings', 'Lewis', 'Livingston', 'Madison', 'Monroe', 'Montgomery', # 'Nassau', 'New York', 'Niagara', 'Oneida', 'Onondaga', 'Ontario', # 'Orange', 'Orleans', 'Oswego', 'Otsego', 'Putnam', 'Queens', # 'Rensselaer', 'Richmond', 'Rockland', 'Saratoga', 'Schenectady', # 'Schoharie', 'Schuyler', 'Seneca', 'St Lawrence', 'Steuben', 'Suffolk', # 'Sullivan', 'Tioga', 'Tompkins', 'Ulster', 'Warren', 'Washington', # 'Wayne', 'Westchester', 'Wyoming', 'Yates']) df_confirmed_historical_T = get_combined_counties() # get from database df_confirmed_historical_T = pd.DataFrame( df_confirmed_historical_T, columns=[ 'Albany', 'Allegany', 'Broome', 'Cattaraugus', 'Cayuga', 'Chautauqua', 'Chemung', 'Chenango', 'Clinton', 'Columbia', 'Cortland', 'Delaware', 'Dutchess', 'Erie', 'Essex', 'Franklin', 'Fulton', 'Genesee', 'Greene', 'Hamilton', 'Herkimer', 'Jefferson', 'Lewis', 'Livingston', 'Madison', 'Monroe', 'Montgomery', 'Nassau', 'Niagara', 'Oneida', 'Onondaga', 'Ontario', 'Orange', 'Orleans', 'Oswego', 'Otsego', 'Putnam', 'Rensselaer', 'Rockland', 'Saratoga', 'Schenectady', 'Schoharie', 'Schuyler', 'Seneca', 'St Lawrence', 'Steuben', 'Suffolk', 'Sullivan', 'Tioga', 'Tompkins', 'Ulster', 'Warren', 'Washington', 'Wayne', 'Westchester', 'Wyoming', 'Yates', 'date', 'New York', 'Queens', 'Kings', 'Richmond', 'Bronx', 'total' ]) # df_confirmed_historical_T = wrangle_historical_county_df()# for custom changes df = df.T df = df.reset_index() df.columns = df.iloc[0] df = df.drop(df.index[0]) df = df.rename(columns={'County': 'Status'}) df['lastupdated'] = datetime.now(tz=pytz.timezone('EST')) # import datetime # df['lastupdated'] = df['lastupdated'] - datetime.timedelta(days=1) df['date'] = pd.to_datetime(df['lastupdated'], format='%Y-%m-%d') df['date'] = df['date'].apply(lambda x: x.strftime('%B %d')) df['Status'] = df['Status'].str.replace(' ', '') # collist = ['Albany', 'Allegany', 'Broome', 'Cattaraugus', 'Cayuga', # 'Chautauqua', 'Chemung', 'Chenango', 'Clinton', 'Columbia', 'Cortland', # 'Delaware', 'Dutchess', 'Erie', 'Essex', 'Franklin', 'Fulton', # 'Genesee', 'Greene', 'Hamilton', 'Herkimer', 'Jefferson', 'Lewis', # 'Livingston', 'Madison', 'Monroe', 'Montgomery', 'Nassau', 'Niagara', # 'Oneida', 'Onondaga', 'Ontario', 'Orange', 'Orleans', 'Oswego', # 'Otsego', 'Putnam', 'Rensselaer', 'Rockland', 'Saratoga', 'Schenectady', # 'Schoharie', 'Schuyler', 'Seneca', 'St Lawrence', 'Steuben', 'Suffolk', # 'Sullivan', 'Tioga', 'Tompkins', 'Ulster', 'Warren', 'Washington', # 'Wayne', 'Westchester', 'Wyoming', 'Yates', 'New York City a'] # for i in collist: # df[i] = df[i].str.replace(' ', '') # df[i] = df[i].str.replace(',', '') # df[i] = df[i].fillna(0) # df[i] = df[i].astype('float64') df_confirmed = df[df['Status'] == 'Confirmed'] del df df_confirmed['New York'] = df_confirmed['New York City a'] df_confirmed['Queens'] = df_confirmed['New York'] df_confirmed['Kings'] = df_confirmed['New York'] df_confirmed['Richmond'] = df_confirmed['New York'] df_confirmed['Bronx'] = df_confirmed['New York'] # df_confirmed = pd.concat([df_confirmed,df_confirmed_historical_T], sort=False, keys=['date','New York']) df_confirmed = pd.concat([df_confirmed, df_confirmed_historical_T], axis=0) df_confirmed['New York City a'] = df_confirmed['New York'] # del df_confirmed_historical_T df_confirmed = df_confirmed.drop(['lastupdated'], axis=1) df_confirmed = df_confirmed.drop(['New York City a'], axis=1) df_confirmed = df_confirmed.drop(['Status'], axis=1) df_confirmed['total'] = df_confirmed[[ 'Albany', 'Allegany', 'Broome', 'Cattaraugus', 'Cayuga', 'Chautauqua', 'Chemung', 'Chenango', 'Clinton', 'Columbia', 'Cortland', 'Delaware', 'Dutchess', 'Erie', 'Essex', 'Franklin', 'Fulton', 'Genesee', 'Greene', 'Hamilton', 'Herkimer', 'Jefferson', 'Lewis', 'Livingston', 'Madison', 'Monroe', 'Montgomery', 'Nassau', 'Niagara', 'Oneida', 'Onondaga', 'Ontario', 'Orange', 'Orleans', 'Oswego', 'Otsego', 'Putnam', 'Rensselaer', 'Rockland', 'Saratoga', 'Schenectady', 'Schoharie', 'Schuyler', 'Seneca', 'St Lawrence', 'Steuben', 'Suffolk', 'Sullivan', 'Tioga', 'Tompkins', 'Ulster', 'Warren', 'Washington', 'Wayne', 'Westchester', 'Wyoming', 'Yates', 'New York' ]].sum(axis=1) return df_confirmed
def wrangle_counties_new_daily_cases(): df_confirmed = get_combined_counties() df_confirmed = pd.DataFrame( df_confirmed, columns=[ 'Albany', 'Allegany', 'Broome', 'Cattaraugus', 'Cayuga', 'Chautauqua', 'Chemung', 'Chenango', 'Clinton', 'Columbia', 'Cortland', 'Delaware', 'Dutchess', 'Erie', 'Essex', 'Franklin', 'Fulton', 'Genesee', 'Greene', 'Hamilton', 'Herkimer', 'Jefferson', 'Lewis', 'Livingston', 'Madison', 'Monroe', 'Montgomery', 'Nassau', 'Niagara', 'Oneida', 'Onondaga', 'Ontario', 'Orange', 'Orleans', 'Oswego', 'Otsego', 'Putnam', 'Rensselaer', 'Rockland', 'Saratoga', 'Schenectady', 'Schoharie', 'Schuyler', 'Seneca', 'St Lawrence', 'Steuben', 'Suffolk', 'Sullivan', 'Tioga', 'Tompkins', 'Ulster', 'Warren', 'Washington', 'Wayne', 'Westchester', 'Wyoming', 'Yates', 'date', 'New York', 'Queens', 'Kings', 'Richmond', 'Bronx', 'total' ]) df_confirmed_r = df_confirmed.iloc[::-1] df_confirmed_r = df_confirmed_r[[ 'Albany', 'Allegany', 'Broome', 'Cattaraugus', 'Cayuga', 'Chautauqua', 'Chemung', 'Chenango', 'Clinton', 'Columbia', 'Cortland', 'Delaware', 'Dutchess', 'Erie', 'Essex', 'Franklin', 'Fulton', 'Genesee', 'Greene', 'Hamilton', 'Herkimer', 'Jefferson', 'Lewis', 'Livingston', 'Madison', 'Monroe', 'Montgomery', 'Nassau', 'Niagara', 'Oneida', 'Onondaga', 'Ontario', 'Orange', 'Orleans', 'Oswego', 'Otsego', 'Putnam', 'Rensselaer', 'Rockland', 'Saratoga', 'Schenectady', 'Schoharie', 'Schuyler', 'Seneca', 'St Lawrence', 'Steuben', 'Suffolk', 'Sullivan', 'Tioga', 'Tompkins', 'Ulster', 'Warren', 'Washington', 'Wayne', 'Westchester', 'Wyoming', 'Yates', 'New York', 'Queens', 'Kings', 'Richmond', 'Bronx' ]] df_confirmed_r = df_confirmed_r.astype('float64') ddiff = df_confirmed_r.diff() del df_confirmed_r dc = df_confirmed.iloc[::-1] dates = list(dc['date']) del df_confirmed ddiff['date'] = dates ddiff['total'] = ddiff[[ 'Albany', 'Allegany', 'Broome', 'Cattaraugus', 'Cayuga', 'Chautauqua', 'Chemung', 'Chenango', 'Clinton', 'Columbia', 'Cortland', 'Delaware', 'Dutchess', 'Erie', 'Essex', 'Franklin', 'Fulton', 'Genesee', 'Greene', 'Hamilton', 'Herkimer', 'Jefferson', 'Lewis', 'Livingston', 'Madison', 'Monroe', 'Montgomery', 'Nassau', 'Niagara', 'Oneida', 'Onondaga', 'Ontario', 'Orange', 'Orleans', 'Oswego', 'Otsego', 'Putnam', 'Rensselaer', 'Rockland', 'Saratoga', 'Schenectady', 'Schoharie', 'Schuyler', 'Seneca', 'St Lawrence', 'Steuben', 'Suffolk', 'Sullivan', 'Tioga', 'Tompkins', 'Ulster', 'Warren', 'Washington', 'Wayne', 'Westchester', 'Wyoming', 'Yates', 'New York' ]].sum(axis=1) ddiff['average'] = ddiff[[ 'Albany', 'Allegany', 'Broome', 'Cattaraugus', 'Cayuga', 'Chautauqua', 'Chemung', 'Chenango', 'Clinton', 'Columbia', 'Cortland', 'Delaware', 'Dutchess', 'Erie', 'Essex', 'Franklin', 'Fulton', 'Genesee', 'Greene', 'Hamilton', 'Herkimer', 'Jefferson', 'Lewis', 'Livingston', 'Madison', 'Monroe', 'Montgomery', 'Nassau', 'Niagara', 'Oneida', 'Onondaga', 'Ontario', 'Orange', 'Orleans', 'Oswego', 'Otsego', 'Putnam', 'Rensselaer', 'Rockland', 'Saratoga', 'Schenectady', 'Schoharie', 'Schuyler', 'Seneca', 'St Lawrence', 'Steuben', 'Suffolk', 'Sullivan', 'Tioga', 'Tompkins', 'Ulster', 'Warren', 'Washington', 'Wayne', 'Westchester', 'Wyoming', 'Yates', 'New York' ]].mean(axis=1) ddiff.fillna(0, inplace=True) return ddiff # def wrangle_counties_for_timeslider(): # pass