def ingest_to_db(InRaster, run_id, *, model_name, start, included_months, total_months, params, basin): # Add metadata object to DB meta = Metadata(run_id=run_id, model=model_name, raw_output_link= f"https://model-service.worldmodelers.com/results/PIHM_results/{run_id}.tif", run_label=f"{model_name} run for {basin} Basin.", point_resolution_meters=200) db_session.add(meta) db_session.commit() # Add parameters to DB print("Storing parameters...") for pp, vv in params.items(): if pp == 'basin': p_type = 'string' else: p_type = 'float' param = Parameters(run_id=run_id, model=model_name, parameter_name=pp, parameter_value=vv, parameter_type=p_type ) db_session.add(param) db_session.commit() # iterate over the bands that should be included (1 per month) for month in range(1, included_months + 2): date_ = start + relativedelta(months=month-1) date_str = date_.strftime("%m/%d/%Y") print(f"Processing {model_name} {date_str}") # Convert Raster to GeoPandas feature_name = m['outputs'][0]['name'] feature_description = m['outputs'][0]['description'] gdf = raster2gpd(InRaster,feature_name,band=month) print(f"Performing spatial merge") # Spatial merge on GADM to obtain admin areas gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects') # Iterate over years for each band to ensure that there is continous # annual data # Set run fields: datetime, run_id, model gdf['datetime'] = date_ gdf['run_id'] = run_id gdf['model'] = model_name gdf['feature_description'] = feature_description if 'geometry' in gdf: del(gdf['geometry']) del(gdf['index_right']) # perform bulk insert of entire geopandas DF print(f"Ingesting {date_str} of {model_name} for basin {basin} to database\n") db_session.bulk_insert_mappings(Output, gdf.to_dict(orient="records")) db_session.commit()
def ingest2db(df_, fsc, params, run_id, model_name): # Add metadata object to DB meta = Metadata( run_id=run_id, model=model_name, raw_output_link= f"https://model-service.worldmodelers.com/results/{model_name}_results/{run_id}.csv", run_label=f"{model_name} run for {params['shocked_region']} region.", point_resolution_meters=100000) db_session.add(meta) db_session.commit() # Add parameters to DB for pp, vv in params.items(): param = Parameters(run_id=run_id, model=model_name, parameter_name=pp, parameter_value=vv, parameter_type="string") db_session.add(param) db_session.commit() # Ingest outputs to DB feature_name = fsc['outputs'][0]['name'] feature_description = fsc['outputs'][0]['description'] df_['datetime'] = datetime(year=2018, month=1, day=1) df_['run_id'] = run_id df_['model'] = model_name df_['feature_name'] = feature_name df_['feature_description'] = feature_description df_['feature_value'] = df_[feature_name].apply(lambda x: int(x)) db_session.bulk_insert_mappings(Output, df_.to_dict(orient="records")) db_session.commit()
def ingest_to_db(InRaster, run_id, *, model_name, params, m): # Add metadata object to DB meta = Metadata( run_id=run_id, model=model_name, raw_output_link= f"https://model-service.worldmodelers.com/results/{model_name}_results/{run_id}.tif", run_label=f"{model_name} run.", point_resolution_meters=1000000) db_session.add(meta) db_session.commit() # Add parameters to DB print("Storing parameters...") for pp, vv in params.items(): if pp == 'year' or pp == 'month': p_type = 'integer' else: p_type = 'string' param = Parameters(run_id=run_id, model=model_name, parameter_name=pp, parameter_value=vv, parameter_type=p_type) db_session.add(param) db_session.commit() band = bands[params['commodity']] # Convert Raster to GeoPandas feature_name = m['outputs'][0]['name'] feature_description = m['outputs'][0]['description'] gdf = raster2gpd(InRaster, feature_name, band=band) print(f"GDF size is {gdf.shape[0]} before rounding lat/lon") gdf = gdf.drop_duplicates() print(f"GDF size is {gdf.shape[0]} after rounding lat/lon") print(f"Performing spatial merge") # Spatial merge on GADM to obtain admin areas gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects') # Iterate over years for each band to ensure that there is continous # annual data # Set run fields: datetime, run_id, model gdf['datetime'] = datetime(year=params['year'], month=params['month'], day=1) gdf['run_id'] = run_id gdf['model'] = model_name gdf['feature_description'] = feature_description if 'geometry' in gdf: del (gdf['geometry']) del (gdf['index_right']) # perform bulk insert of entire geopandas DF db_session.bulk_insert_mappings(Output, gdf.to_dict(orient="records")) db_session.commit()
def batch_create(cls, user_ids): """ Batch create new user objects, from a list of user_ids This offers better performance compaired to individual create """ db_session.bulk_insert_mappings( User, [dict(id=user_id) for user_id in user_ids]) db_session.commit()
def ingest_json(class_table): """ Ingest tests json in database Arguments: class_table {class} -- sqlalchemy model (DeclarativeMeta) """ with open('{}/{}.json'.format( 'tests/data', class_table.__tablename__ )) as json_file: data = json.load(json_file) db_session.bulk_insert_mappings( class_table, [dict(i) for i in data] ) db_session.commit()
def ingest_to_db(InRaster, run_id, *, model_name, m): # Add metadata object to DB meta = Metadata( run_id=run_id, model=model_name, raw_output_link= f"https://model-service.worldmodelers.com/results/{model_name}_results/{run_id}.tif", run_label=f"{model_name} run.", point_resolution_meters=480) db_session.add(meta) db_session.commit() # iterate over the bands that should be included (1 per month) for year in range(2009, 2020): band = year - 2008 print(f"Processing {model_name} for year {year}") # Convert Raster to GeoPandas feature_name = m['outputs'][0]['name'] feature_description = m['outputs'][0]['description'] gdf = raster2gpd(InRaster, feature_name, band=band, nodataval=np.float64(0.0)) print(f"Performing spatial merge") # Spatial merge on GADM to obtain admin areas gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects') # Iterate over years for each band to ensure that there is continous # annual data # Set run fields: datetime, run_id, model gdf['datetime'] = datetime(year=year, month=1, day=1) gdf['run_id'] = run_id gdf['model'] = model_name gdf['feature_description'] = feature_description if 'geometry' in gdf: del (gdf['geometry']) del (gdf['index_right']) # perform bulk insert of entire geopandas DF print(f"Ingesting {year} of {model_name} to database\n") db_session.bulk_insert_mappings(Output, gdf.to_dict(orient="records")) db_session.commit()
def ingest2db(self): init_db() # Load Admin2 shape from GADM logging.info("Loading GADM shapes...") admin2 = gpd.read_file(f'{self.gadm}/gadm36_2.shp') admin2['country'] = admin2['NAME_0'] admin2['state'] = admin2['NAME_1'] admin2['admin1'] = admin2['NAME_1'] admin2['admin2'] = admin2['NAME_2'] admin2 = admin2[['geometry','country','state','admin1','admin2']] # Add metadata object to DB # TODO: add run_label and run_description meta = Metadata(run_id=self.run_id, model=self.name, run_description=self.features[self._type]['run_description'], raw_output_link= f'https://s3.amazonaws.com/world-modelers/{self.key}', point_resolution_meters=5000) logging.info("Storing metadata...") db_session.add(meta) db_session.commit() # Add parameters to DB logging.info("Storing parameters...") for param_name, param_val in self.model_config.items(): if param_name != 'run_id': if param_name == 'year': param_type = 'integer' elif param_name == 'bbox': param_type = 'array' param_val = json.dumps(param_val) elif param_name == 'dekad': param_type = 'integer' param_val = int(param_val) else: param_type = 'string' param = Parameters(run_id=self.run_id, model=self.name, parameter_name=param_name, parameter_value=param_val, parameter_type=param_type) db_session.add(param) db_session.commit() # Process tiff file into point data logging.info("Processing tiff...") InRaster = f"{self.result_path}/{self.result_name}.tiff" feature_name = self.features[self._type]['feature_name'] feature_description = self.features[self._type]['feature_description'] gdf = raster2gpd(InRaster,feature_name) # Spatial merge on GADM to obtain admin areas gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects') # Set run fields: datetime, run_id, model # first convert dekad of year to day of year # note: dekad is a 10 day period so dekad 25 ends the 250th day of the year # since dekad 01 contains days 1 through 10 so dekad 01 should yield Jan 1 gdf['datetime'] = datetime(self.year, 1, 1) + timedelta((int(self.dekad) * 10) - 11) gdf['run_id'] = self.run_id gdf['model'] = self.name gdf['feature_description'] = feature_description del(gdf['geometry']) del(gdf['index_right']) # perform bulk insert of entire geopandas DF logging.info("Storing point data output...") db_session.bulk_insert_mappings(Output, gdf.to_dict(orient="records")) db_session.commit()
# iterate over the 4 bands for band, years in bands.items(): print(f"Processing {model_name} band {band}") # Convert Raster to GeoPandas InRaster = f"data/{atlas_lookup[model_name]['tif']}" feature_name = atlas_lookup[model_name]['feature_name'] feature_description = atlas_lookup[model_name][ 'feature_description'] gdf = raster2gpd(InRaster, feature_name, band=band) print(f"Performing spatial merge") # Spatial merge on GADM to obtain admin areas gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects') # Iterate over years for each band to ensure that there is continous # annual data for year in years: # Set run fields: datetime, run_id, model gdf['datetime'] = datetime(year=year, month=1, day=1) gdf['run_id'] = run_id gdf['model'] = model_config['name'] gdf['feature_description'] = feature_description if 'geometry' in gdf: del (gdf['geometry']) del (gdf['index_right']) # perform bulk insert of entire geopandas DF print(f"Ingesting {year} of {model_name} to database\n") db_session.bulk_insert_mappings(Output, gdf.to_dict(orient="records")) db_session.commit()
def ingest2db(self): init_db() # Load Admin2 shape from GADM logging.info("Loading GADM shapes...") admin2 = gpd.read_file(f"{self.config['GADM']['GADM_PATH']}/gadm36_2.shp") admin2['country'] = admin2['NAME_0'] admin2['state'] = admin2['NAME_1'] admin2['admin1'] = admin2['NAME_1'] admin2['admin2'] = admin2['NAME_2'] admin2 = admin2[['geometry','country','state','admin1','admin2']] # Add metadata object to DB # TODO: add run_label and run_description logging.info("Storing metadata...") meta = Metadata(run_id=self.run_id, model=self.name, run_description=self.descriptions['management_practice'][self.model_config['management_practice']], raw_output_link= f'https://s3.amazonaws.com/world-modelers/{self.key}', # 5 arc minutes (~10km) point_resolution_meters=10000) logging.info("Storing metadata...") db_session.add(meta) db_session.commit() # Add parameters to DB logging.info("Storing parameters...") for param_name, param_val in self.model_config.items(): if param_name == 'run_id': pass else: param = Parameters(run_id=self.run_id, model=self.name, parameter_name=param_name, parameter_value=param_val, parameter_type=self.descriptions['parameters'][param_name]) db_session.add(param) db_session.commit() # Process CSV and normalize it logging.info("Processing points...") # get result file path if self.model_config["management_practice"] == "combined": # combined CSV path = f"{self.result_path}/out/eth_docker/test/pp.csv" else: # individual management practices m = self.model_config["management_practice"] path = f"{self.result_path}/out/eth_docker/test/{m}/pp_{m}.csv" df = pd.read_csv(path, index_col=False) df['latitude'] = df['LATITUDE'] df['longitude'] = df['LONGITUDE'] df['geometry'] = df.apply(lambda x: Point(x.longitude, x.latitude), axis=1) df['year'] = df['HDAT'].apply(lambda x: int(str(x)[:4])) df['days'] = df['HDAT'].apply(lambda x: int(str(x)[4:])) df['datetime'] = df.apply(lambda x: datetime(x.year, 1, 1) + timedelta(x.days - 1), axis=1) df['run_id'] = self.run_id df['model'] = self.name df['Production'] = df['HWAH'] * df['HARVEST_AREA'] # for combined runs only we need to convert the run name to an encoded # float so that it can go into the database if 'RUN_NAME' in df: df['management_practice'] = df['RUN_NAME'].apply(lambda x: self.descriptions['encoding'][x]) gdf = gpd.GeoDataFrame(df) # Spatial merge on GADM to obtain admin areas gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects') base_cols = ['run_id','model','latitude','longitude', 'datetime','admin1','admin2','state', 'country'] feature_cols = ['feature_name','feature_description','feature_value'] # Need to iterate over features to generate one GDF per feature # then upload the GDF per feature to ensure that rows are added for each # feature for feature_name, feature_description in self.descriptions['features'].items(): # specific handling for "combined" file if feature_name == 'management_practice': if self.model_config["management_practice"] != "combined": # if not a combined file, then just move onto the next # in the for loop and do nothing for this feature_name continue cols_to_select = base_cols + [feature_name] gdf_ = gdf[cols_to_select] # generate new interim GDF gdf_['feature_name'] = feature_name gdf_['feature_description'] = feature_description gdf_['feature_value'] = gdf_[feature_name] gdf_ = gdf_[base_cols + feature_cols] # perform bulk insert of entire geopandas DF logging.info(f"Storing point data output for {feature_name}...") db_session.bulk_insert_mappings(Output, gdf_.to_dict(orient="records")) db_session.commit()
def ingest2db(year, df, filename): model_name = "flood_index_model" run_id = gen_run_id(year) init_db() # Load Admin2 shape from GADM print("Loading GADM shapes...") admin2 = gpd.read_file(f"{config['GADM']['GADM_PATH']}/gadm36_2.shp") admin2['country'] = admin2['NAME_0'] admin2['state'] = admin2['NAME_1'] admin2['admin1'] = admin2['NAME_1'] admin2['admin2'] = admin2['NAME_2'] admin2 = admin2[['geometry', 'country', 'state', 'admin1', 'admin2']] # Add metadata object to DB # TODO: add run_label and run_description print("Storing metadata...") meta = Metadata( run_id=run_id, model=model_name, run_description=f"{model_name} run for {year}", raw_output_link= f'https://s3.amazonaws.com/world-modelers/flood_index_model/{filename}.nc', # 0.1 degrees (~10km) point_resolution_meters=10000) print("Storing metadata...") db_session.add(meta) db_session.commit() # Add parameters to DB print("Storing parameters...") param = Parameters(run_id=run_id, model=model_name, parameter_name="year", parameter_value=year, parameter_type="integer") db_session.add(param) db_session.commit() # Process CSV and normalize it print("Processing points...") df['geometry'] = df.apply(lambda x: Point(x.longitude, x.latitude), axis=1) df['run_id'] = run_id df['model'] = model_name gdf = gpd.GeoDataFrame(df) # Spatial merge on GADM to obtain admin areas gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects') base_cols = [ 'run_id', 'model', 'latitude', 'longitude', 'datetime', 'admin1', 'admin2', 'state', 'country' ] feature_cols = ['feature_name', 'feature_description', 'feature_value'] # Need to iterate over features to generate one GDF per feature # then upload the GDF per feature to ensure that rows are added for each # feature for feature_name, feature_description in features.items(): cols_to_select = base_cols + [feature_name] gdf_ = gdf[cols_to_select] # generate new interim GDF gdf_['feature_name'] = feature_name gdf_['feature_description'] = feature_description gdf_['feature_value'] = gdf_[feature_name] gdf_ = gdf_[base_cols + feature_cols] # perform bulk insert of entire geopandas DF print(f"Storing point data output for {feature_name}...") db_session.bulk_insert_mappings(Output, gdf_.to_dict(orient="records")) db_session.commit()
def ingest2db(self): init_db() # Add metadata object to DB desc = f"{self.name} run for {self.scenario_type} scenario with start year {self.start_year} and end year {self.end_year}" if self.scenario_type == 'production_failure_scenario': desc += f". Shock severity was set to {self.shock_severity} and the shocked region was {self.shocked_region}." logging.info("Storing metadata...") meta = Metadata(run_id=self.run_id, model=self.name, run_label=f"{self.name}: {self.scenario_type}", run_description=desc, raw_output_link= f'https://s3.amazonaws.com/world-modelers/{self.key}') logging.info("Storing metadata...") db_session.add(meta) db_session.commit() # Add parameters to DB logging.info("Storing parameters...") for param_name, param_val in self.model_config.items(): if param_name == 'run_id': pass else: param = Parameters( run_id=self.run_id, model=self.name, parameter_name=param_name, parameter_value=param_val, parameter_type=self.descriptions['parameters'][param_name]) db_session.add(param) db_session.commit() # Process CSV and normalize it logging.info("Processing timeseries...") df = pd.read_csv(self.output) df = df.transpose().reset_index() df = df.rename( columns=dict(zip(list(df.columns), list(df.iloc[0]))))[1:] df = df.rename(columns={'Unnamed: 0': 'Date'}) df['datetime'] = df.Date.apply(lambda x: datetime( year=int(x.split('.')[1]), month=int(x.split('.')[0]), day=1)) del (df['Date']) df['run_id'] = self.run_id df['model'] = self.name base_cols = ['run_id', 'model', 'datetime'] feature_cols = ['feature_name', 'feature_description', 'feature_value'] for feature_name, feature_description in self.descriptions[ 'features'].items(): # some multi_twist outputs will not be present depending on the scenario type # so first check if feature_name in df: logging.info( f"Storing point data output for {feature_name}...") cols_to_select = base_cols + [feature_name] df_ = df[cols_to_select] # generate new interim DF df_['feature_name'] = feature_name df_['feature_description'] = feature_description.split('.')[0] df_['feature_value'] = df_[feature_name] df_ = df_[base_cols + feature_cols] # perform bulk insert of entire geopandas DF db_session.bulk_insert_mappings(Output, df_.to_dict(orient="records")) db_session.commit()
def ingest2db(self): init_db() # Load Admin2 shape from GADM logging.info("Loading GADM shapes...") admin2 = gpd.read_file(f'{self.gadm}/gadm36_2.shp') admin2['country'] = admin2['NAME_0'] admin2['state'] = admin2['NAME_1'] admin2['admin1'] = admin2['NAME_1'] admin2['admin2'] = admin2['NAME_2'] admin2 = admin2[['geometry', 'country', 'state', 'admin1', 'admin2']] # Add metadata object to DB # TODO: add run_label and run_description meta = Metadata(run_id=self.run_id, model=self.name, run_description=self.run_description, raw_output_link= f'https://s3.amazonaws.com/world-modelers/{self.key}', point_resolution_meters=1000) logging.info("Storing metadata...") db_session.add(meta) db_session.commit() # Add parameters to DB logging.info("Storing parameters...") for param_name, param_val in self.model_config['config'].items(): if param_name == 'run_id': pass else: if param_name == 'year': param_type = 'integer' elif param_name == 'month': param_type = 'integer' else: param_type = 'string' param = Parameters(run_id=self.run_id, model=self.name, parameter_name=param_name, parameter_value=param_val, parameter_type=param_type) db_session.add(param) db_session.commit() # Process tiff file into point data logging.info("Processing tiff...") InRaster = f"{self.install_path}/output/{self.key}" feature_name = self.feature_name feature_description = self.feature_description gdf = raster2gpd(InRaster, feature_name, band=self.band) # Spatial merge on GADM to obtain admin areas gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects') # Set run fields: datetime, run_id, model gdf['datetime'] = self.start_time gdf['run_id'] = self.run_id gdf['model'] = self.name gdf['feature_description'] = feature_description del (gdf['geometry']) del (gdf['index_right']) # perform bulk insert of entire geopandas DF logging.info("Storing point data output...") db_session.bulk_insert_mappings(Output, gdf.to_dict(orient="records")) db_session.commit()