def get_layers(tempdir, minio_edge_access, minio_edge_secret, layers=LAYER_FILES, minio_lake_access=None, minio_lake_secret=None): for layer, layer_classification, layer_bucket, layer_minio_prefix in layers: local_path = os.path.join(tempdir, layer) if layer_classification is minio_utils.DataClassification.EDGE: minio_access, minio_secret = minio_edge_access, minio_edge_secret else: minio_access, minio_secret = minio_lake_access, minio_lake_secret minio_utils.minio_to_file( filename=local_path, minio_filename_override=layer_minio_prefix + layer, minio_bucket=layer_bucket, minio_key=minio_access, minio_secret=minio_secret, data_classification=layer_classification, ) read_df_func = geopandas.read_file if local_path.endswith( ".geojson") else pandas.read_json layer_gdf = read_df_func(local_path) yield layer, local_path, layer_gdf
def get_data(minio_key, minio_access, minio_secret): with tempfile.NamedTemporaryFile() as temp_datafile: minio_utils.minio_to_file( filename=temp_datafile.name, minio_filename_override=DATA_RESTRICTED_PREFIX + minio_key, minio_bucket=MINIO_BUCKET, minio_key=minio_access, minio_secret=minio_secret, data_classification=MINIO_CLASSIFICATION, ) data_df = pandas.read_csv(temp_datafile.name) return data_df
def get_layers(directorate_file_prefix, tempdir, minio_access, minio_secret): for layer_filename, layer_props in LAYER_PROPERTIES_TUPLES: is_choropleth = layer_filename in service_request_map_layers_to_minio.CHOROPLETH_LAYERS # Deciding between the directorate and time period specific layer or not directorate_layer_filename = ( f'{directorate_file_prefix}_{layer_filename}' if is_choropleth else layer_filename) local_path = os.path.join(tempdir, directorate_layer_filename) layer_minio_path = ( f"{service_request_map_layers_to_minio.SERVICE_REQUEST_MAP_PREFIX}" f"{directorate_layer_filename}") minio_utils.minio_to_file( filename=local_path, minio_filename_override=layer_minio_path, minio_bucket=MINIO_BUCKET, minio_key=minio_access, minio_secret=minio_secret, data_classification=MINIO_CLASSIFICATION, ) layer_gdf = geopandas.read_file(local_path) *_, has_metadata = layer_props if has_metadata: metadata_filename = os.path.splitext( directorate_layer_filename)[0] + ".json" metadata_local_path = os.path.join(tempdir, metadata_filename) metadata_minio_path = ( f"{service_request_map_layers_to_minio.SERVICE_REQUEST_MAP_PREFIX}" f"{metadata_filename}") minio_utils.minio_to_file( filename=metadata_local_path, minio_filename_override=metadata_minio_path, minio_bucket=MINIO_BUCKET, minio_key=minio_access, minio_secret=minio_secret, data_classification=MINIO_CLASSIFICATION, ) with open(metadata_local_path, "r") as metadata_file: layer_metadata = json.load(metadata_file) else: layer_metadata = {} yield directorate_layer_filename, (local_path, layer_gdf, is_choropleth, layer_metadata, layer_props)
def get_subdistrict_populations(minio_access, minio_secret): with tempfile.NamedTemporaryFile() as temp_datafile: minio_utils.minio_to_file( filename=temp_datafile.name, minio_filename_override=epi_map_case_layers_to_minio.DATA_RESTRICTED_PREFIX + SUBDISTRICT_POP_FILE, minio_bucket=epi_map_case_layers_to_minio.MINIO_COVID_BUCKET, minio_key=minio_access, minio_secret=minio_secret, data_classification=epi_map_case_layers_to_minio.EDGE_MINIO_CLASSIFICATION, ) population_df = pandas.read_csv(temp_datafile.name) population_df.set_index(epi_map_case_layers_to_minio.SUBDISTRICT_COL, inplace=True) return population_df
def minio_to_df(minio_filename_override, minio_bucket, data_classification, reader="csv"): logging.debug("Pulling data from Minio bucket...") if reader == "csv": file_reader = pd.read_csv elif reader == "parquet": file_reader = pd.read_parquet else: logging.error("reader is not 'csv' or 'parquet") sys.exit(-1) with tempfile.NamedTemporaryFile() as temp_data_file: result = minio_utils.minio_to_file( filename=temp_data_file.name, minio_filename_override=minio_filename_override, minio_bucket=minio_bucket, data_classification=data_classification, ) if not result: logging.debug(f"Could not get data from minio bucket") sys.exit(-1) else: logging.debug( f"Reading in raw data from '{temp_data_file.name}'...") df = file_reader(temp_data_file.name) return df
def minio_csv_to_df(minio_filename_override, minio_bucket, minio_key, minio_secret, data_classification): """ function to pull minio csv file to python dict :param minio_filename_override: (str) minio override string (prefix and file name) :param minio_bucket: (str) minio bucket name :param minio_key: (str) the minio access key :param minio_secret: (str) the minio key secret :param data_classification: minio classification (edge | lake) :return: pandas dataframe """ logging.debug("Pulling data from Minio bucket...") with tempfile.NamedTemporaryFile() as temp_data_file: result = minio_utils.minio_to_file( filename=temp_data_file.name, minio_filename_override=minio_filename_override, minio_bucket=minio_bucket, minio_key=minio_key, minio_secret=minio_secret, data_classification=data_classification, ) if not result: logging.debug(f"Could not get data from minio bucket") sys.exit(-1) else: logging.debug( f"Reading in raw data from '{temp_data_file.name}'...") df = pd.read_csv(temp_data_file.name) return df
def get_plot_df(minio_key, minio_access, minio_secret): ''' Read HR org unit status .csv file ''' with tempfile.NamedTemporaryFile() as temp_datafile: minio_utils.minio_to_file( filename=temp_datafile.name, minio_filename_override=minio_key, minio_bucket=MINIO_BUCKET, minio_key=minio_access, minio_secret=minio_secret, data_classification=MINIO_CLASSIFICATION, ) data_df = pd.read_csv(temp_datafile.name) return data_df
def get_case_data(minio_access, minio_secret): with tempfile.NamedTemporaryFile() as temp_datafile: minio_utils.minio_to_file( filename=temp_datafile.name, minio_filename_override=DATA_RESTRICTED_PREFIX + PROV_CASE_DATA_FILENAME, minio_bucket=MINIO_COVID_BUCKET, minio_key=minio_access, minio_secret=minio_secret, data_classification=EDGE_MINIO_CLASSIFICATION, ) case_data_df = pandas.read_csv(temp_datafile.name, encoding=PROV_CASE_FILE_ENCODING) for col in (DATE_DIAGNOSIS_COL, DATE_ADMITTED_COL, DATE_DEATH_COL): case_data_df[col] = pandas.to_datetime(case_data_df[col]) return case_data_df
def get_mobile_data(minio_access, minio_secret): with tempfile.NamedTemporaryFile() as temp_datafile: minio_utils.minio_to_file( filename=temp_datafile.name, minio_filename_override=CT_MOBILE_METRICS, minio_bucket=MINIO_COVID_BUCKET, minio_key=minio_access, minio_secret=minio_secret, data_classification=MINIO_EDGE_CLASSIFICATION, ) mobile_data_df = pandas.read_csv(temp_datafile.name) for col in (TIMESTAMP_COL, ): mobile_data_df[col] = pandas.to_datetime(mobile_data_df[col]) mobile_data_df.set_index([TIMESTAMP_COL, HOURLY_METRIC_POLYGON_ID], inplace=True) mobile_data_df.sort_index(inplace=True) return mobile_data_df
def _fetch_layer(tempdir, layer_filename_prefix, layer_suffix, apply_prefix, has_metadata, minio_path_prefix, minio_access, minio_secret): layer_filename = (f"{layer_filename_prefix}_{layer_suffix}" if apply_prefix else layer_suffix) local_path = os.path.join(tempdir, layer_filename) layer_minio_path = (f"{minio_path_prefix}" f"{layer_filename}") logging.debug(layer_minio_path) minio_utils.minio_to_file( filename=local_path, minio_filename_override=layer_minio_path, minio_bucket=MINIO_BUCKET, minio_key=minio_access, minio_secret=minio_secret, data_classification=MINIO_CLASSIFICATION, ) layer_gdf = geopandas.read_file(local_path) # Getting the layer's metadata if has_metadata: metadata_filename = os.path.splitext(layer_filename)[0] + ".json" metadata_local_path = os.path.join(tempdir, metadata_filename) metadata_minio_path = (f"{minio_path_prefix}" f"{metadata_filename}") minio_utils.minio_to_file( filename=metadata_local_path, minio_filename_override=metadata_minio_path, minio_bucket=MINIO_BUCKET, minio_key=minio_access, minio_secret=minio_secret, data_classification=MINIO_CLASSIFICATION, ) with open(metadata_local_path, "r") as metadata_file: layer_metadata = json.load(metadata_file) else: layer_metadata = {} return local_path, layer_gdf, layer_metadata
def get_data(minio_key, minio_access, minio_secret): with tempfile.NamedTemporaryFile() as temp_datafile: minio_utils.minio_to_file( filename=temp_datafile.name, minio_filename_override=DATA_RESTRICTED_PREFIX + minio_key, minio_bucket=MINIO_BUCKET, minio_key=minio_access, minio_secret=minio_secret, data_classification=MINIO_CLASSIFICATION, ) data_df = pandas.read_csv(temp_datafile.name) data_df[DATE_COL] = pandas.to_datetime(data_df[DATE_COL]) data_df[SOCIAL_NETWORK_COL].fillna(NA_VALUE, inplace=True) data_df[CATEGORY_COL].fillna(NA_VALUE, inplace=True) logging.debug(f"data_df.columns=\n{data_df.columns}") logging.debug( f"data_df.columns=\n{pprint.pformat(data_df.dtypes.to_dict())}") return data_df
def get_data_df(filename, minio_access, minio_secret): with tempfile.NamedTemporaryFile() as temp_data_file: logging.debug("Pulling data from Minio bucket...") result = minio_utils.minio_to_file(temp_data_file.name, BUCKET, minio_access, minio_secret, CLASSIFICATION, minio_filename_override=filename) assert result logging.debug(f"Reading in raw data from '{temp_data_file.name}'...") data_df = pandas.read_csv(temp_data_file) return data_df
def minio_csv_to_df(minio_filename_override, minio_bucket, minio_key, minio_secret): logging.debug("Pulling data from Minio bucket...") with tempfile.NamedTemporaryFile() as temp_data_file: result = minio_utils.minio_to_file(filename=temp_data_file.name, minio_filename_override=minio_filename_override, minio_bucket=minio_bucket, minio_key=minio_key, minio_secret=minio_secret, data_classification=MINIO_CLASSIFICATION, ) if not result: logging.debug(f"Could not get data from minio bucket") sys.exit(-1) else: logging.debug(f"Reading in raw data from '{temp_data_file.name}'...") df = pd.read_csv(temp_data_file, engine='c', encoding='ISO-8859-1') return df
logging.info("Fetch[ed] current totals from dfs") # get plot files logging.info("Fetch[ing] email plot files") attachments_file_paths_dict = defaultdict(str) attachment_zip = [] with tempfile.TemporaryDirectory() as temp_dir: for plot_level in [ TOP_LEVEL, DEPARTMENT, SUBDISTRICT, STAFF_TYPE, RISK ]: plot_file = f"{OUTFILE_PREFIX}_{plot_level}.png" minio_filename_override = f"{VACC_PLOT_PREFIX}{plot_file}" tmp_file_name = str(pathlib.Path(temp_dir, plot_file)) minio_result = minio_utils.minio_to_file( filename=tmp_file_name, minio_filename_override=minio_filename_override, minio_bucket=COVID_BUCKET, data_classification=EDGE_CLASSIFICATION, ) if not minio_result: logging.debug(f"Could not get data from minio bucket") sys.exit(-1) attachments_file_paths_dict[plot_level] = plot_file attachment_zip.append((plot_file, tmp_file_name)) logging.info("Fetch[ed] email plot files") # set email params logging.info("Load[ing] email template") email_template_path = os.path.join(RESOURCES_PATH, EMAIL_TEMPLATE_FILENAME)