def write_death_lisa_usafacts(): month_day = get_month_day() gdf = geopandas.read_file( os.path.join(repo_root, "download/usafacts_deaths.geojson".format(month_day))) gdf.columns = rename_column_usafacts(gdf.columns.tolist()) cluster = gdf.copy() counties = pygeoda.geopandas_to_geoda(gdf) w = pygeoda.weights.queen(counties) select_vars = list(gdf.columns[12:-1]) int_data = [gdf[c].tolist() for c in select_vars] n = len(select_vars) lisa = pygeoda.batch_local_moran(w, int_data, nCPUs=1, perm=999) for i in range(n): cluster.iloc[:, i + 12] = lisa.GetClusterIndicators(i) death = cluster.iloc[:, :-1].to_dict(orient="records") death = {"type": "death", "source": "USAFacts", "features": death} with open( os.path.join(dir_path, '_working/lisa_county_death_usafacts.json'), 'w') as fp: json.dump(death, fp) print("Successfully wrote USAFacts death lisa") write_to_s3('lisa_county_death_usafacts.json')
def write_new_json_state(): gdf = geopandas.read_file(os.path.join(repo_root, "data/states_update.geojson")) cluster = gdf.copy() counties = pygeoda.geopandas_to_geoda(gdf) w = pygeoda.weights.queen(counties) select_vars = list(gdf.columns[16:-3]) int_data = [gdf[c].tolist() for c in select_vars] n = len(select_vars) lisa = pygeoda.batch_local_moran(w, int_data, nCPUs=1, perm=999) for i in range(n): cluster.iloc[:,i+16] = lisa.GetClusterIndicators(i) confirmed = cluster.loc[:,:'d2020-01-21'].iloc[:,:-1] confirmed = confirmed.to_dict(orient="records") confirmed = {"type": "confirmed", "source": "1P3A", "features": confirmed} with open(os.path.join(dir_path, '_working/lisa_state_confirmed_1P3A.json'), 'w') as fp: json.dump(confirmed, fp) death = cluster[list(cluster.iloc[:,0:16]) + list(cluster.loc[:,"d2020-01-21":])].iloc[:,:-3] death = death.to_dict(orient="records") death = [{k.replace("d", ""):v for k,v in d.items()} for d in death] death = {"type": "death", "source": "1P3A", "features": death} with open(os.path.join(dir_path, '_working/lisa_state_death_1P3A.json'), 'w') as fp: json.dump(death, fp) print("Successfully wrote 1P3A state lisa") write_to_s3('lisa_state_confirmed_1P3A.json') write_to_s3('lisa_state_death_1P3A.json')
def update_lisa_usafacts(type_): # Check if new file exists file = os.path.join(repo_root, "download/usafacts_{}.geojson".format(type_)) if not os.path.isfile(file): print("USAFacts - {}: No Updates!".format(type_)) return None # Read file and import geometry type_ = "death" if type_ == "deaths" else type_ s3.Bucket('geoda-covid-atlas').download_file( 'lisa_county_{}_usafacts.json'.format(type_), os.path.join(dir_path, '_working/lisa_county_{}_usafacts.json'.format(type_))) with open( os.path.join( dir_path, '_working/lisa_county_{}_usafacts.json'.format(type_))) as f: old_lisa = json.load(f) gdf = geopandas.read_file(file) gdf.columns = write_lisa_json.rename_column_usafacts(gdf.columns.tolist()) # Find new dates lisa_dates = list(old_lisa["features"][0].keys()) dates = list(gdf.filter(regex='^[0-9]+', axis=1).columns) select_vars = [i for i in dates if i not in lisa_dates] if not select_vars: print("USAFacts - {}: Already Updated!".format(type_)) return None # Calcualte new lisa counties = pygeoda.geopandas_to_geoda(gdf) w = pygeoda.weights.queen(counties) int_data = [gdf[c].tolist() for c in select_vars] n = len(select_vars) lisa = pygeoda.batch_local_moran(w, int_data, nCPUs=1, perm=999) cluster = gdf.loc[:, ["GEOID"]] for i in range(n): cluster[select_vars[i]] = lisa.GetClusterIndicators(i) features = [] for county in old_lisa["features"]: geoid = county["GEOID"] county.update( cluster[cluster["GEOID"] == geoid].to_dict(orient="records")[0]) features.append(county) data = {"type": type_, "source": "USAFacts", "features": features} filename = 'lisa_county_{}_usafacts.json'.format(type_) with open(os.path.join(dir_path, '_working/{}'.format(filename)), 'w') as fp: json.dump(data, fp) write_to_s3(filename) print("Update for {} completed!".format( datetime.today().strftime("%-m.%-d")))
def write_confirmed_lisa_usafacts(): gdf = geopandas.read_file("../download/usafacts_confirmed_9.14.geojson") gdf.columns = rename_column_usafacts(gdf.columns.tolist()) cluster = gdf.copy() counties = pygeoda.geopandas_to_geoda(gdf) w = pygeoda.weights.queen(counties) select_vars = list(gdf.columns[12:-1]) int_data = [gdf[c].tolist() for c in select_vars] n = len(select_vars) lisa = pygeoda.batch_local_moran(w, int_data, nCPUs=1, perm=999) for i in range(n): cluster.iloc[:,i+12] = lisa.GetClusterIndicators(i) confirmed = cluster.iloc[:,:-1].to_dict(orient="records") confirmed = {"type": "confirmed", "source": "USAFacts", "features": confirmed} with open('lisa/lisa_county_confirmed_usafacts.json', 'w') as fp: json.dump(confirmed, fp) print("Successfully wrote USAFacts confirmed lisa") pass
def county_update_lisa_1P3A(): # Read in files with open('lisa/lisa_county_confirmed_1P3A.json') as f: old_lisa_confirmed = json.load(f) with open('lisa/lisa_county_death_1P3A.json') as f: old_lisa_death = json.load(f) gdf = geopandas.read_file("../data/counties_update.geojson") # Find new dates lisa_dates = list(old_lisa_confirmed["features"][0].keys()) dates = list(gdf.loc[:, :'d2020-01-21'].iloc[:, :-1].columns) select_vars = [i for i in dates if i not in lisa_dates] if not select_vars: print("1P3A - County: Already Updated!") return None select_vars.extend(["d" + i for i in select_vars]) # Calculate moran counties = pygeoda.geopandas_to_geoda(gdf) w = pygeoda.weights.queen(counties) int_data = [gdf[c].tolist() for c in select_vars] n = len(select_vars) lisa = pygeoda.batch_local_moran(w, int_data, nCPUs=1, perm=999) cluster = gdf.loc[:, ["GEOID"]] for i in range(n): cluster[select_vars[i]] = lisa.GetClusterIndicators(i) # Update Confirmed LISA json confirmed = cluster[["GEOID"] + cluster.filter(regex='^2020').columns.tolist()] features = [] for county in old_lisa_confirmed["features"]: geoid = county["GEOID"] county.update(confirmed[confirmed["GEOID"] == geoid].to_dict( orient="records")[0]) features.append(county) confirmed = {"type": "confirmed", "source": "1P3A", "features": features} with open('lisa/lisa_county_confirmed_1P3A.json', 'w') as fp: json.dump(confirmed, fp) # Update Death LISA json death = cluster[["GEOID"] + cluster.filter(regex='^d').columns.tolist()] features = [] for county in old_lisa_death["features"]: geoid = county["GEOID"] county.update( death[death["GEOID"] == geoid].to_dict(orient="records")[0]) features.append(county) features = [{k.replace("d", ""): v for k, v in d.items()} for d in features] death = {"type": "death", "source": "1P3A", "features": features} with open('lisa/lisa_county_confirmed_1P3A.json', 'w') as fp: json.dump(death, fp) pass
def calculate_lisa(lisa_dic, k, seven_dates): '''Calculate lisa''' df = lisa_dic[k] counties = pygeoda.geopandas_to_geoda(df) w = pygeoda.weights.queen(counties) int_data = [df[c].tolist() for c in seven_dates] lisa = pygeoda.batch_local_moran(w, int_data, nCPUs=1, perm=999) for i, col in enumerate(seven_dates): df[col] = lisa.GetClusterIndicators(i) dic = df.to_dict(orient="records") dic = {"type": k, "source": "USAFacts", "features": dic} lisa_dic[k] = dic print(k + " updated!")
def test_batch_moran(self): blm = pygeoda.batch_local_moran(self.queen_w, self.data) # get results for first variable: Crm_prp lms = blm.lisa_values(0) self.assertAlmostEqual(lms[0], 0.015431978309803657) self.assertAlmostEqual(lms[1], 0.3270633223656033) self.assertAlmostEqual(lms[2], 0.021295296214118884) pvals = blm.lisa_pvalues(0) self.assertAlmostEqual(pvals[0], 0.41399999999999998) self.assertAlmostEqual(pvals[1], 0.123) self.assertAlmostEqual(pvals[2], 0.001) # get results from second variable: Crm_prs lms = blm.lisa_values(1) self.assertAlmostEqual(lms[0], 0.516120231288079) self.assertAlmostEqual(lms[1], 0.818275138495031) self.assertAlmostEqual(lms[2], 0.794086559694542) pvals = blm.lisa_pvalues(1) self.assertAlmostEqual(pvals[0], 0.197000000000000) self.assertAlmostEqual(pvals[1], 0.013000000000000) self.assertAlmostEqual(pvals[2], 0.023000000000000)
def calculate_seven_day_lisa(): month_day = util.get_month_day() gdf = geopandas.read_file( "../download/usafacts_confirmed_{}.geojson".format(month_day)) gdf.columns = util.rename_column_usafacts(gdf.columns.tolist()) # Select informational columns and calculate 7-day average for last 7 days seven_day = gdf.iloc[:, 1:13] seven_day_adjusted = gdf.iloc[:, 1:13] stable_adjusted = gdf.iloc[:, 1:13] stable_unadjusted = gdf.iloc[:, 1:13] for i in range(-8, -1): seven_day[gdf.columns[i]] = (gdf.iloc[:, i] - gdf.iloc[:, i - 7]) / 7 seven_day_adjusted[gdf.columns[i]] = ( (gdf.iloc[:, i] - gdf.iloc[:, i - 7]) / 7) * 100000 / gdf['population'] stable_unadjusted[gdf.columns[i]] = gdf.iloc[:, i] stable_adjusted[ gdf.columns[i]] = gdf.iloc[:, i] * 100000 / gdf['population'] seven_day["average"] = seven_day.iloc[:, -7:].mean(axis=1) seven_day_adjusted["average"] = seven_day_adjusted.iloc[:, -7:].mean(axis=1) stable_unadjusted["average"] = stable_unadjusted.iloc[:, -7:].mean(axis=1) stable_adjusted["average"] = stable_adjusted.iloc[:, -7:].mean(axis=1) seven_day["geometry"] = gdf["geometry"] seven_day_adjusted["geometry"] = gdf["geometry"] stable_unadjusted["geometry"] = gdf["geometry"] stable_adjusted["geometry"] = gdf["geometry"] # Weight parameters for LISA counties = pygeoda.geopandas_to_geoda(seven_day) w = pygeoda.weights.queen(counties) # Unadjusted Seven Day select_col = util.get_date(ndays=7) int_data = [seven_day[c].tolist() for c in select_col] lisa = pygeoda.batch_local_moran(w, int_data, nCPUs=1, perm=999) for i, col in enumerate(select_col): seven_day[col] = lisa.GetClusterIndicators(i) seven_day = seven_day.to_dict(orient="records") seven_day = { "type": "7 day average unadjusted", "source": "USAFacts", "features": seven_day } print("7 day average unadjusted") # Adjusted Seven Day int_data = [seven_day_adjusted[c].tolist() for c in select_col] lisa = pygeoda.batch_local_moran(w, int_data, nCPUs=1, perm=999) for i, col in enumerate(select_col): seven_day_adjusted[col] = lisa.GetClusterIndicators(i) seven_day_adjusted = seven_day_adjusted.to_dict(orient="records") seven_day_adjusted = { "type": "7 day average adjusted", "source": "USAFacts", "features": seven_day_adjusted } print("7 day average adjusted") # Unadjusted Stable int_data = [stable_unadjusted[c].tolist() for c in select_col] lisa = pygeoda.batch_local_moran(w, int_data, nCPUs=1, perm=999) for i, col in enumerate(select_col): stable_unadjusted[col] = lisa.GetClusterIndicators(i) stable_unadjusted = stable_unadjusted.to_dict(orient="records") stable_unadjusted = { "type": "stable unadjusted", "source": "USAFacts", "features": stable_unadjusted } print("stable unadjusted") # Adjusted Stable int_data = [stable_adjusted[c].tolist() for c in select_col] lisa = pygeoda.batch_local_moran(w, int_data, nCPUs=1, perm=999) for i, col in enumerate(select_col): stable_adjusted[col] = lisa.GetClusterIndicators(i) stable_adjusted = stable_adjusted.to_dict(orient="records") stable_adjusted = { "type": "stable adjusted", "source": "USAFacts", "features": stable_adjusted } print("stable adjusted") return seven_day, seven_day_adjusted, stable_unadjusted, stable_adjusted
def state_update_lisa_1P3A(): # Read in files s3.Bucket('geoda-covid-atlas').download_file( 'lisa_state_confirmed_1P3A.json', os.path.join(dir_path, '_working/lisa_state_confirmed_1P3A.json')) with open(os.path.join(dir_path, '_working/lisa_state_confirmed_1P3A.json')) as f: old_lisa_confirmed = json.load(f) s3.Bucket('geoda-covid-atlas').download_file( 'lisa_state_death_1P3A.json', os.path.join(dir_path, '_working/lisa_state_death_1P3A.json')) with open(os.path.join(dir_path, '_working/lisa_state_death_1P3A.json')) as f: old_lisa_death = json.load(f) gdf = geopandas.read_file( os.path.join(repo_root, "data/states_update.geojson")) # Find new dates lisa_dates = list(old_lisa_confirmed["features"][0].keys()) dates = list([col for col in gdf.columns if len(col) == 10]) select_vars = [i for i in dates if i not in lisa_dates] if not select_vars: print("1P3A - State: Already Updated!") return None select_vars.extend(["d" + i for i in select_vars]) # Calculate moran states = pygeoda.geopandas_to_geoda(gdf) w = pygeoda.weights.queen(states) int_data = [gdf[c].tolist() for c in select_vars] n = len(select_vars) lisa = pygeoda.batch_local_moran(w, int_data, nCPUs=1, perm=999) cluster = gdf.loc[:, ["GEOID"]] for i in range(n): cluster[select_vars[i]] = lisa.GetClusterIndicators(i) # Update Confirmed LISA json confirmed = cluster[["GEOID"] + cluster.filter(regex='^202').columns.tolist()] features = [] for county in old_lisa_confirmed["features"]: geoid = county["GEOID"] county.update(confirmed[confirmed["GEOID"] == geoid].to_dict( orient="records")[0]) features.append(county) confirmed = {"type": "confirmed", "source": "1P3A", "features": features} with open( os.path.join(dir_path, '_working/lisa_state_confirmed_1P3A.json'), 'w') as fp: json.dump(confirmed, fp) # Update Deaths LISA json death = cluster[["GEOID"] + cluster.filter(regex='^d').columns.tolist()] features = [] for county in old_lisa_death["features"]: geoid = county["GEOID"] county.update( death[death["GEOID"] == geoid].to_dict(orient="records")[0]) features.append(county) features = [{k.replace("d", ""): v for k, v in d.items()} for d in features] death = {"type": "death", "source": "1P3A", "features": features} with open(os.path.join(dir_path, '_working/lisa_state_death_1P3A.json'), 'w') as fp: json.dump(death, fp) write_to_s3('lisa_state_confirmed_1P3A.json') write_to_s3('lisa_state_death_1P3A.json')