def insert_data(self, timeseries, tms_id, fgt, upsert=False): """ Insert timeseries to Data table in the database :param tms_id: hash value :param fgt: forecast generated time :param timeseries: list of [time, value] lists :param boolean upsert: If True, upsert existing values ON DUPLICATE KEY. Default is False. Ref: 1). https://stackoverflow.com/a/14383794/1461060 2). https://chartio.com/resources/tutorials/how-to-insert-if-row-does-not-exist-upsert-in-mysql/ :return: row count if insertion was successful, else raise DatabaseAdapterError """ new_timeseries = [] for t in [i for i in timeseries]: if len(t) > 1: # Insert EventId in front of timestamp, value list t.insert(0, tms_id) t.insert(2, fgt) new_timeseries.append(t) else: logger.warning('Invalid timeseries data:: %s', t) row_count = 0 connection = self.pool.connection() try: with connection.cursor() as cursor: if upsert: sql_statement = "INSERT INTO `data` (`id`, `time`, `fgt`, `value`) VALUES (%s, %s, %s, %s) " \ "ON DUPLICATE KEY UPDATE `value`=VALUES(`value`)" else: sql_statement = "INSERT INTO `data` (`id`, `time`, `fgt`, `value`) VALUES (%s, %s, %s, %s)" row_count = cursor.executemany(sql_statement, new_timeseries) connection.commit() return row_count except Exception as exception: connection.rollback() error_message = "Data insertion to data table for tms id {}, upsert={} failed.".format( timeseries[0][0], upsert) logger.error(error_message) traceback.print_exc() raise exception finally: if connection is not None: connection.close()
def extract_distrubuted_hechms_outputs(output_file_name, output_dir, run_date, run_time): """ Config.json { "output_file_name": "DailyDischarge.csv", "output_dir": "", "run_date": "2019-05-24", "run_time": "00:00:00", "utc_offset": "", "sim_tag": "hourly_run", "model": "HECHMS", "version": "single", "unit": "m3/s", "unit_type": "Instantaneous", "variable": "Discharge", "station_name": "Hanwella" } """ try: config = json.loads(open('config.json').read()) # output related details output_file_name = output_file_name output_dir = output_dir run_date = run_date run_time = run_time utc_offset = read_attribute_from_config_file('utc_offset', config, False) if utc_offset is None: utc_offset = '' # sim tag sim_tag = read_attribute_from_config_file('sim_tag', config, True) # source details model = read_attribute_from_config_file('model', config, True) version = read_attribute_from_config_file('version', config, True) # unit details unit = read_attribute_from_config_file('unit', config, True) unit_type = UnitType.getType( read_attribute_from_config_file('unit_type', config, True)) # variable details variable = read_attribute_from_config_file('variable', config, True) # station details station_name = read_attribute_from_config_file('station_name', config, True) out_file_path = os.path.join(output_dir, output_file_name) if not os.path.exists(out_file_path): msg = 'no file :: {}'.format(out_file_path) logger.warning(msg) print(msg) exit(1) fgt = get_file_last_modified_time(out_file_path) print("fgt, ", fgt) timeseries = read_csv(out_file_path) pool = get_Pool(host=CURW_FCST_HOST, port=CURW_FCST_PORT, db=CURW_FCST_DATABASE, user=CURW_FCST_USERNAME, password=CURW_FCST_PASSWORD) hechms_stations = get_hechms_stations(pool=pool) station_id = hechms_stations.get(station_name)[0] lat = str(hechms_stations.get(station_name)[1]) lon = str(hechms_stations.get(station_name)[2]) source_id = get_source_id(pool=pool, model=model, version=version) variable_id = get_variable_id(pool=pool, variable=variable) unit_id = get_unit_id(pool=pool, unit=unit, unit_type=unit_type) tms_meta = { 'sim_tag': sim_tag, 'model': model, 'version': version, 'variable': variable, 'unit': unit, 'unit_type': unit_type.value, 'latitude': lat, 'longitude': lon, 'station_id': station_id, 'source_id': source_id, 'variable_id': variable_id, 'unit_id': unit_id } utcOffset = getUTCOffset(utc_offset, default=True) if utcOffset != timedelta(): tms_meta['utcOffset'] = utcOffset # Push timeseries to database save_forecast_timeseries_to_db(pool=pool, timeseries=timeseries, run_date=run_date, run_time=run_time, tms_meta=tms_meta, fgt=fgt) except Exception as e: logger.error('JSON config data loading error.') print('JSON config data loading error.') traceback.print_exc() finally: logger.info("Process finished.") print("Process finished.")
unit = read_attribute_from_config_file('unit', config, True) unit_type = UnitType.getType( read_attribute_from_config_file('unit_type', config, True)) # variable details variable = read_attribute_from_config_file('variable', config, True) # station details station_name = read_attribute_from_config_file('station_name', config, True) out_file_path = os.path.join(output_dir, output_file_name) if not os.path.exists(out_file_path): msg = 'no file :: {}'.format(out_file_path) logger.warning(msg) print(msg) exit(1) fgt = get_file_last_modified_time(out_file_path) print("fgt, ", fgt) timeseries = read_csv(out_file_path) pool = get_Pool(host=CURW_FCST_HOST, port=CURW_FCST_PORT, db=CURW_FCST_DATABASE, user=CURW_FCST_USERNAME, password=CURW_FCST_PASSWORD) hechms_stations = get_hechms_stations(pool=pool)
def extract_distrubuted_hechms_outputs(target_model, db_user, db_pwd, db_host, db_name, out_file_path, run_date, run_time): """ Config.json { "output_file_name": "DailyDischarge.csv", "output_dir": "", "run_date": "2019-05-24", "run_time": "00:00:00", "utc_offset": "", "sim_tag": "hourly_run", "model": "HECHMS", "version": "single", "unit": "m3/s", "unit_type": "Instantaneous", "variable": "Discharge", "station_name": "Hanwella" } """ try: config = json.loads( open('/home/curw/git/distributed_hechms/uploads/config.json').read( )) # output related details run_date = run_date run_time = run_time utc_offset = read_attribute_from_config_file('utc_offset', config, False) if utc_offset is None: utc_offset = '' # sim tag sim_tag = read_attribute_from_config_file('sim_tag', config, True) print("extract_distrubuted_hechms_outputs|sim_tag : ", sim_tag) # source details model = read_attribute_from_config_file('model', config, True) print("extract_distrubuted_hechms_outputs|model : ", model) version_config = read_attribute_from_config_file( 'version_config', config, True) print("extract_distrubuted_hechms_outputs|version_config : ", version_config) version = version_config[target_model] print("extract_distrubuted_hechms_outputs|version : ", version) # unit details unit = read_attribute_from_config_file('unit', config, True) print("extract_distrubuted_hechms_outputs|unit : ", unit) unit_type = UnitType.getType( read_attribute_from_config_file('unit_type', config, True)) print("extract_distrubuted_hechms_outputs|unit_type : ", unit_type) # variable details variable = read_attribute_from_config_file('variable', config, True) print("extract_distrubuted_hechms_outputs|variable : ", variable) # station details station_name = read_attribute_from_config_file('station_name', config, True) print("extract_distrubuted_hechms_outputs|station_name : ", station_name) if not os.path.exists(out_file_path): msg = 'no file :: {}'.format(out_file_path) logger.warning(msg) print(msg) exit(1) fgt = get_file_last_modified_time(out_file_path) print("extract_distrubuted_hechms_outputs|fgt : ", fgt) timeseries = read_csv(out_file_path) pool = get_Pool(host=db_host, port=3306, db=db_name, user=db_user, password=db_pwd) hechms_stations = get_hechms_stations(pool=pool) print("extract_distrubuted_hechms_outputs|hechms_stations : ", hechms_stations) station_id = hechms_stations.get(station_name)[0] lat = str(hechms_stations.get(station_name)[1]) lon = str(hechms_stations.get(station_name)[2]) print("extract_distrubuted_hechms_outputs|[station_id, lat, lon] : ", [station_id, lat, lon]) source_id = get_source_id(pool=pool, model=model, version=version) print("extract_distrubuted_hechms_outputs|source_id : ", source_id) variable_id = get_variable_id(pool=pool, variable=variable) print("extract_distrubuted_hechms_outputs|variable_id : ", variable_id) unit_id = get_unit_id(pool=pool, unit=unit, unit_type=unit_type) print("extract_distrubuted_hechms_outputs|unit_id : ", unit_id) tms_meta = { 'sim_tag': sim_tag, 'model': model, 'version': version, 'variable': variable, 'unit': unit, 'unit_type': unit_type.value, 'latitude': lat, 'longitude': lon, 'station_id': station_id, 'source_id': source_id, 'variable_id': variable_id, 'unit_id': unit_id } print("extract_distrubuted_hechms_outputs|tms_meta : ", tms_meta) utcOffset = getUTCOffset(utc_offset, default=True) if utcOffset != timedelta(): tms_meta['utcOffset'] = utcOffset # Push timeseries to database save_forecast_timeseries_to_db(pool=pool, timeseries=timeseries, run_date=run_date, run_time=run_time, tms_meta=tms_meta, fgt=fgt) return {'Result': 'Success'} except Exception as e: logger.error('JSON config data loading error.') print('JSON config data loading error.') traceback.print_exc() return {'Result': 'Fail'}
CURW_SIM_DATABASE = read_attribute_from_config_file('CURW_SIM_DATABASE', config) def read_attribute_from_config_file(attribute, config): """ :param attribute: key name of the config json file :param config: loaded json file :return: """ if attribute in config and (config[attribute]!=""): return config[attribute] else: logger.error("{} not specified in config file.".format(attribute)) def set_db_config_file_path(db_config_file_path): global DB_CONFIG_FILE_PATH DB_CONFIG_FILE_PATH = db_config_file_path set_variables() try: set_variables() except FileNotFoundError: logger.warning("db_adapter_config.json file does not exists in the calling directory path !!!")
def read_netcdf_file(pool, rainnc_net_cdf_file_path, tms_meta): """ :param pool: database connection pool :param rainnc_net_cdf_file_path: :param source_id: :param variable_id: :param unit_id: :param tms_meta: :return: rainc_unit_info: mm lat_unit_info: degree_north time_unit_info: minutes since 2019-04-02T18:00:00 """ if not os.path.exists(rainnc_net_cdf_file_path): msg = 'no rainnc netcdf :: {}'.format(rainnc_net_cdf_file_path) logger.warning(msg) email_content[datetime.now().strftime(COMMON_DATE_TIME_FORMAT)] = msg return False else: try: """ RAINNC netcdf data extraction """ fgt = get_file_last_modified_time(rainnc_net_cdf_file_path) nnc_fid = Dataset(rainnc_net_cdf_file_path, mode='r') time_unit_info = nnc_fid.variables['XTIME'].units time_unit_info_list = time_unit_info.split(' ') lats = nnc_fid.variables['XLAT'][0, :, 0] lons = nnc_fid.variables['XLONG'][0, 0, :] lon_min = lons[0].item() lat_min = lats[0].item() lon_max = lons[-1].item() lat_max = lats[-1].item() lat_inds = np.where((lats >= lat_min) & (lats <= lat_max)) lon_inds = np.where((lons >= lon_min) & (lons <= lon_max)) rainnc = nnc_fid.variables['RAINNC'][:, lat_inds[0], lon_inds[0]] times = nnc_fid.variables['XTIME'][:] start_date = fgt end_date = fgt nnc_fid.close() diff = get_per_time_slot_values(rainnc) width = len(lons) height = len(lats) ts = Timeseries(pool) for y in range(height): for x in range(width): lat = float('%.6f' % lats[y]) lon = float('%.6f' % lons[x]) tms_meta['latitude'] = str(lat) tms_meta['longitude'] = str(lon) station_prefix = 'wrf_{}_{}'.format(lat, lon) station_id = wrf_v3_stations.get(station_prefix) if station_id is None: add_station(pool=pool, name=station_prefix, latitude=lat, longitude=lon, description="WRF point", station_type=StationEnum.WRF) station_id = get_station_id( pool=pool, latitude=lat, longitude=lon, station_type=StationEnum.WRF) tms_id = ts.get_timeseries_id_if_exists(tms_meta) if tms_id is None: tms_id = ts.generate_timeseries_id(tms_meta) run_meta = { 'tms_id': tms_id, 'sim_tag': tms_meta['sim_tag'], 'start_date': start_date, 'end_date': end_date, 'station_id': station_id, 'source_id': tms_meta['source_id'], 'unit_id': tms_meta['unit_id'], 'variable_id': tms_meta['variable_id'] } try: ts.insert_run(run_meta) except Exception: logger.error( "Exception occurred while inserting run entry {}" .format(run_meta)) traceback.print_exc() data_list = [] # generate timeseries for each station for i in range(len(diff)): ts_time = datetime.strptime( time_unit_info_list[2], '%Y-%m-%dT%H:%M:%S') + timedelta( minutes=times[i + 1].item()) t = datetime_utc_to_lk(ts_time, shift_mins=0) data_list.append([ tms_id, t.strftime('%Y-%m-%d %H:%M:%S'), fgt, float(diff[i, y, x]) ]) push_rainfall_to_db(ts=ts, ts_data=data_list, tms_id=tms_id, fgt=fgt) return True except Exception as e: msg = "netcdf file at {} reading error.".format( rainnc_net_cdf_file_path) logger.error(msg) traceback.print_exc() email_content[datetime.now().strftime( COMMON_DATE_TIME_FORMAT)] = msg return False