# Script that agglomerates by radius based on the unified data # Imports all the necesary functions import agglomeration_functions as agg import general_functions as gf # Other imports import os, sys from pathlib import Path import pandas as pd from global_config import config data_dir = config.get_property('data_dir') key_string = config.get_property('key_string') # Method Name method_name = 'radial' # Reads the parameters from excecution location_name = sys.argv[1] # location namme location_folder_name = sys.argv[2] # location folder namme radius = int(sys.argv[3]) # radius # Sets the location location_folder = os.path.join(data_dir, 'data_stages', location_folder_name) # Checks if its encrypted encrypted = gf.is_encrypted(location_folder_name) # Creates the folders if the don't exist
import pandas as pd import numpy as np import constants as con import general_functions as gf from datetime import timedelta import locations.colombia_functions as col_fun import matplotlib.pyplot as plt import seaborn as sns sns.set_style("whitegrid") #Directories from global_config import config data_dir = config.get_property('data_dir') analysis_dir = config.get_property('analysis_dir') def clean_name(s): d = {} d['Santaf- de Bogot-'] = 'Bogotá' d['Medell-n'] = 'Medellín' d['Santiago de Cali'] = 'Cali' d['Cartagena de Indias'] = 'Cartagena' d['Santa Marta (Dist. Esp.)'] = 'Santa Marta' for k in d: s = s.replace(k, d[k])
import os import numpy as np import pandas as pd from shapely import wkt import geopandas as gpd # Directories from global_config import config data_dir = config.get_property('data_dir') GADM = os.path.join(data_dir, "data_stages", "colombia", "raw", "geo", "gadm36_COL_shp", "gadm36_COL_2.shp") polygons = os.path.join(data_dir, "data_stages", "colombia", "agglomerated", "geometry", "polygons.csv") output_path = os.path.join(data_dir, "data_stages", "colombia", "raw", "geo", "gadm36_COL_shp", "gadm36_COL_2_population_density.csv") gdf_GADM = gpd.read_file(GADM) gdf_GADM.crs = 'epsg:4326' gdf_GADM.to_crs("epsg:3410", inplace=True) # Use equal area projection df_polygons = pd.read_csv(polygons) df_polygons['geometry'] = df_polygons['geometry'].apply(wkt.loads) gdf_polygons = gpd.GeoDataFrame(df_polygons, geometry='geometry') gdf_polygons.crs = 'epsg:4326' gdf_polygons.to_crs("epsg:3410", inplace=True) # Use equal area projection def get_population_density(polygon, gdf): polygons = gdf[['poly_id', 'attr_population', 'attr_area', 'geometry']].copy()
# Encrypts a new file import sys import pandas as pd import general_functions as gf from global_config import config key_string = config.get_property('key_string') origin = sys.argv[1] # origin destination = sys.argv[2] # destination if origin.upper().endswith('.CSV'): df = pd.read_csv(origin) elif origin.upper().endswith('.XLSX'): df = pd.read_excel(origin) else: t = origin.split('.')[-1] raise ValueError("Unsupported file extension: " + t) print( f"Encrypting File: {origin.split('/')[-1]} into {destination.split('/')[-1]}" ) gf.encrypt_df(df, destination, key_string) print('Done')
# Scripts that edits and copies the website core data import pandas as pd import shutil import os #Directories from global_config import config data_dir = config.get_property('data_dir') analysis_dir = config.get_property('analysis_dir') reports_dir = config.get_property('report_dir') website_dir = config.get_property('website_dir') def save_pandas(df, name, folder): ''' Saves Pandas dataframe into CSV (without indixes) ''' final_folder = os.path.join(website_dir, folder) # Makes the folders if not os.path.exists(final_folder): os.makedirs(final_folder) #print(os.path.join(final_folder, name)) df.to_csv(os.path.join(final_folder, name), index=False) ident = ' ' print(ident + 'Generating Website Data')
from selenium.webdriver.common.keys import Keys from selenium.common.exceptions import NoSuchElementException from selenium.common.exceptions import StaleElementReferenceException from selenium.webdriver.common.desired_capabilities import DesiredCapabilities from selenium.webdriver.firefox.options import Options from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support import expected_conditions as expect import geo_functions as geo #Directories from global_config import config data_dir = config.get_property('data_dir') analysis_dir = config.get_property('analysis_dir') # Facebook user and password user = config.get_property('fb_user') pwd = config.get_property('fb_pwd') data_stages_location = os.path.join(data_dir, 'data_stages') MOVEMENT_COLS = set([ 'geometry', 'date_time', 'start_polygon_id', 'start_polygon_name', 'end_polygon_id', 'end_polygon_name', 'length_km', 'tile_size', 'country', 'level', 'n_crisis', 'n_baseline', 'n_difference', 'percent_change', 'is_statistically_significant', 'z_score', 'start_lat', 'start_lon', 'end_lat', 'end_lon', 'start_quadkey', 'end_quadkey' ])
# Imports the necessary libraries import pandas as pd import numpy as np import os import sys # Excecution Functions import excecution_functions as ef # Attribute agglomerator from attr_agglomeration_functions import * # Directories from global_config import config data_dir = config.get_property('data_dir') # Data Directory # Ident ident = ' ' # Reads the parameter inputs location_name = sys.argv[1] # location name location_folder_name = sys.argv[2] # location folder name source_agglomeration = sys.argv[ 3] # Type of agglomeration to build upon (source) # Debug #location_name = 'Colombia' #location_folder_name = 'colombia' #source_agglomeration = 'geometry'
# Bogota Extractor # Extracts the cases from the BigQuery Database mantained by Servinformacion # Loads the different libraries import numpy as np import pandas as pd import os, sys import bigquery_functions as bqf import general_functions as gf # Global Directories from global_config import config data_dir = config.get_property('data_dir') analysis_dir = config.get_property('analysis_dir') key_string = config.get_property('key_string') # For Encryption # Starts the BigQuery Client client = bqf.get_client() # Constants location_folder_name = "bogota" # Ident for printing ident = ' ' # Location Folder location_folder = os.path.join(data_dir, 'data_stages', location_folder_name) # Extracts the description df_description = gf.get_description(location_folder_name)
def build_cases_geo(self): # Reads cases cases = gf.decrypt_df(os.path.join(self.raw_folder,'cases', self.get('cases_file_name')), config.get_property('key_string') ) cases[symptoms_start_col] = cases[symptoms_start_col].apply(lambda x: pd.to_datetime(x, errors="coerce")) cases = cases[[symptoms_start_col,current_state_col,geo_id_col, geo_name_col, x_coord_col, y_coord_col, location_col]].rename(columns = {symptoms_start_col:'date_time', geo_id_col:'geo_id',geo_name_col:'location',x_coord_col:'lon',y_coord_col:'lat'}) # Cleans the state cases[current_state_col].fillna('Infectado', inplace = True) cases[current_state_col] = cases[current_state_col].apply(lambda s: s.replace(' ','')) cases.loc[cases[current_state_col] == '', current_state_col] = 'Infectado' cases = cases[cases[current_state_col].isin(['Recuperado','Fallecido','Infectado'])].dropna() # Discriminates by status cases['num_cases'] = 1 cases.loc[cases[current_state_col] == 'Recuperado','num_recovered'] = 1 cases.loc[cases[current_state_col] == 'Infectado','num_infected'] = 1 cases.loc[cases[current_state_col] == 'Fallecido','num_diseased'] = 1 # Add in Hospital cases.loc[(cases[current_state_col] == 'Infectado') & (cases[location_col].isin(['Hospital UCI', 'Hospital'])),'num_infected_in_hospital'] = 1 cases.loc[(cases[current_state_col]== 'Infectado') & (~cases[location_col].isin(['Hospital UCI', 'Hospital'])),'num_infected_in_house'] = 1 # Add in ICU cases.loc[(cases[current_state_col]== 'Infectado') & (cases[location_col].isin(['Hospital UCI'])),'num_infected_in_icu'] = 1 # Removes temporary columns cases = cases.fillna(0).drop([current_state_col,location_col], axis = 1) # Convert to numeric cases.lon = cases.lon.apply(lambda l: float(str(l).replace(',','.'))) cases.lat = cases.lat.apply(lambda l: float(str(l).replace(',','.'))) # Groups cases = cases.groupby(['date_time','geo_id','location','lon','lat']).sum().reset_index() return(cases)