def set_glob_attr(nc_file_obj, data, metadata): """ Set generic global attributes in netcdf file object :param nc_file_obj: NetCDF4 object already opened :param data: :param metadata: :return: """ setattr(nc_file_obj, 'title', metadata['title']) setattr(nc_file_obj, 'site_code', metadata['site_code']) setattr(nc_file_obj, 'site_name', metadata['site_name']) setattr(nc_file_obj, 'instrument_maker', metadata['instrument_maker']) setattr(nc_file_obj, 'instrument_model', metadata['instrument_model']) setattr(nc_file_obj, 'waverider_type', metadata['waverider_type']) setattr(nc_file_obj, 'water_depth', metadata['water_depth']) setattr(nc_file_obj, 'water_depth_units', metadata['water_depth_units']) setattr(nc_file_obj, 'wmo_id', metadata['wmo_id']) setattr(nc_file_obj, 'geospatial_lat_min', metadata['latitude']) setattr(nc_file_obj, 'geospatial_lat_max', metadata['latitude']) setattr(nc_file_obj, 'geospatial_lon_min', metadata['longitude']) setattr(nc_file_obj, 'geospatial_lon_max', metadata['longitude']) setattr(nc_file_obj, 'time_coverage_start', data.datetime.dt.strftime('%Y-%m-%dT%H:%M:%SZ').values.min()) setattr(nc_file_obj, 'time_coverage_end', data.datetime.dt.strftime('%Y-%m-%dT%H:%M:%SZ').values.max()) setattr(nc_file_obj, 'date_created', pd.datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")) setattr(nc_file_obj, 'local_time_zone', metadata['timezone']) setattr(nc_file_obj, 'method', METHOD_COMMENT) setattr(nc_file_obj, 'original_filename', metadata['original_filename']) github_comment = 'Product created with %s' % get_git_revision_script_url( os.path.realpath(__file__)) nc_file_obj.lineage = ( '%s %s' % (getattr(nc_file_obj, 'lineage', ''), github_comment))
def set_glob_attr(nc_file_obj, data, metadata): """ Set generic global attributes in netcdf file object :param nc_file_obj: NetCDF4 object already opened :param data: :param metadata: :return: """ setattr(nc_file_obj, 'title', metadata['title']) setattr(nc_file_obj, 'site_code', metadata['site_code']) setattr(nc_file_obj, 'site_name', metadata['site_name']) setattr(nc_file_obj, 'instrument_maker', metadata['instrument_maker']) setattr(nc_file_obj, 'instrument_model', metadata['instrument_model']) setattr(nc_file_obj, 'waverider_type', metadata['waverider_type']) setattr(nc_file_obj, 'water_depth', metadata['water_depth']) setattr(nc_file_obj, 'water_depth_units', metadata['water_depth_units']) setattr(nc_file_obj, 'wmo_id', metadata['wmo_id']) setattr(nc_file_obj, 'geospatial_lat_min', metadata['latitude']) setattr(nc_file_obj, 'geospatial_lat_max', metadata['latitude']) setattr(nc_file_obj, 'geospatial_lon_min', metadata['longitude']) setattr(nc_file_obj, 'geospatial_lon_max', metadata['longitude']) setattr(nc_file_obj, 'time_coverage_start', data.datetime.dt.strftime('%Y-%m-%dT%H:%M:%SZ').values.min()) setattr(nc_file_obj, 'time_coverage_end', data.datetime.dt.strftime('%Y-%m-%dT%H:%M:%SZ').values.max()) setattr(nc_file_obj, 'date_created', pd.datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")) setattr(nc_file_obj, 'local_time_zone', metadata['timezone']) setattr(nc_file_obj, 'method', METHOD_COMMENT) github_comment = 'Product created with %s' % get_git_revision_script_url(os.path.realpath(__file__)) nc_file_obj.lineage = ('%s %s' % (getattr(nc_file_obj, 'lineage', ''), github_comment))
def set_glob_attr(nc_file_obj, data, metadata): """ Set generic global attributes in netcdf file object :param nc_file_obj: NetCDF4 object already opened :param data: :param metadata: :param deployment_code: :return: """ setattr(nc_file_obj, 'title', 'Waverider buoys measurements during {deploy} deployment at {sitename}.'.format( deploy=metadata['DEPLOYMENT CODE'], sitename=metadata['SITE NAME'])) setattr(nc_file_obj, 'data_collected_readme_url', README_URL) setattr(nc_file_obj, 'instrument_maker', metadata['INSTRUMENT MAKE']) setattr(nc_file_obj, 'instrument_model', metadata['INSTRUMENT MODEL']) setattr(nc_file_obj, 'deployment_code', metadata['DEPLOYMENT CODE']) setattr(nc_file_obj, 'site_code', metadata['SITE CODE']) setattr(nc_file_obj, 'site_name', metadata['SITE NAME']) setattr(nc_file_obj, 'waverider_type', metadata['DATA TYPE']) if isinstance(metadata['DEPTH'], str): setattr(nc_file_obj, 'water_depth', float(metadata['DEPTH'].strip('m'))) setattr(nc_file_obj, 'water_depth_units', 'meters') setattr(nc_file_obj, 'geospatial_lat_min', metadata['LATITUDE']) setattr(nc_file_obj, 'geospatial_lat_max', metadata['LATITUDE']) setattr(nc_file_obj, 'geospatial_lon_min', metadata['LONGITUDE']) setattr(nc_file_obj, 'geospatial_lon_max', metadata['LONGITUDE']) setattr(nc_file_obj, 'time_coverage_start', data.datetime.dt.strftime('%Y-%m-%dT%H:%M:%SZ').values.min()) setattr(nc_file_obj, 'time_coverage_end', data.datetime.dt.strftime('%Y-%m-%dT%H:%M:%SZ').values.max()) setattr(nc_file_obj, 'date_created', pd.datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")) setattr(nc_file_obj, 'local_time_zone', metadata['TIMEZONE']) github_comment = 'Product created with %s' % get_git_revision_script_url(os.path.realpath(__file__)) nc_file_obj.lineage = ('%s %s' % (getattr(nc_file_obj, 'lineage', ''), github_comment))
def set_glob_attr(nc_file_obj, data, metadata, site_info): """ Set generic global attributes in netcdf file object :param nc_file_obj: NetCDF4 object already opened :param data: :param metadata: information of site from metadata file :param site_info: information of site from KML :return: """ deployment_code = metadata[1]['deployment_code'] if deployment_code in metadata[0].index: """ Special Case: A corupted metadata file where the same deployment code is written more than once wrongly. In this case, we assume the following -> """ if len(metadata[0].loc[deployment_code]) > 1: setattr(nc_file_obj, 'instrument_maker', "NORTEK") setattr(nc_file_obj, 'instrument_model', "1 MHz AWAC") else: setattr(nc_file_obj, 'instrument_maker', metadata[0].loc[deployment_code]['instrument_maker']) setattr(nc_file_obj, 'instrument_model', metadata[0].loc[deployment_code]['instrument_model']) if metadata[0].loc[deployment_code]['comment']: setattr(nc_file_obj, 'comment', metadata[0].loc[deployment_code]['comment']) else: """ in case the deployment code is not known in the metadata file """ setattr(nc_file_obj, 'instrument_maker', "NORTEK") setattr(nc_file_obj, 'instrument_model', "1 MHz AWAC") setattr(nc_file_obj, 'data_collected_readme_url', README_URL) setattr(nc_file_obj, 'deployment_code', deployment_code) setattr(nc_file_obj, 'site_code', metadata[1]['site_code']) setattr(nc_file_obj, 'site_name', metadata[1]['site_name']) setattr(nc_file_obj, 'water_depth', metadata[1]['water_depth']) setattr(nc_file_obj, 'water_depth_units', 'meters') setattr(nc_file_obj, 'geospatial_lat_min', metadata[1]['lat_lon'][0]) setattr(nc_file_obj, 'geospatial_lat_max', metadata[1]['lat_lon'][0]) setattr(nc_file_obj, 'geospatial_lon_min', metadata[1]['lat_lon'][1]) setattr(nc_file_obj, 'geospatial_lon_max', metadata[1]['lat_lon'][1]) setattr(nc_file_obj, 'time_coverage_start', data.datetime.dt.strftime('%Y-%m-%dT%H:%M:%SZ').values.min()) setattr(nc_file_obj, 'time_coverage_end', data.datetime.dt.strftime('%Y-%m-%dT%H:%M:%SZ').values.max()) setattr(nc_file_obj, 'date_created', pd.datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")) setattr(nc_file_obj, 'local_time_zone', metadata[1]['timezone']) setattr(nc_file_obj, 'original_data_url', site_info['text_zip_url']) github_comment = 'Product created with %s' % get_git_revision_script_url( os.path.realpath(__file__)) nc_file_obj.lineage = ( '%s %s' % (getattr(nc_file_obj, 'lineage', ''), github_comment))
def set_glob_attr(nc_file_obj, data, metadata, site_info): """ Set generic global attributes in netcdf file object :param nc_file_obj: NetCDF4 object already opened :param data: :param metadata: information of site from metadata file :param site_info: information of site from KML :return: """ deployment_code = metadata[1]['deployment_code'] if deployment_code in metadata[0].index: """ Special Case: A corupted metadata file where the same deployment code is written more than once wrongly. In this case, we assume the following -> """ if len(metadata[0].loc[deployment_code]) > 1: setattr(nc_file_obj, 'instrument_maker', "NORTEK") setattr(nc_file_obj, 'instrument_model', "1 MHz AWAC") else: setattr(nc_file_obj, 'instrument_maker', metadata[0].loc[deployment_code]['instrument_maker']) setattr(nc_file_obj, 'instrument_model', metadata[0].loc[deployment_code]['instrument_model']) if metadata[0].loc[deployment_code]['comment']: setattr(nc_file_obj, 'comment', metadata[0].loc[deployment_code]['comment']) else: """ in case the deployment code is not known in the metadata file """ setattr(nc_file_obj, 'instrument_maker', "NORTEK") setattr(nc_file_obj, 'instrument_model', "1 MHz AWAC") setattr(nc_file_obj, 'data_collected_readme_url', README_URL) setattr(nc_file_obj, 'deployment_code', deployment_code) setattr(nc_file_obj, 'site_code', metadata[1]['site_code']) setattr(nc_file_obj, 'site_name', metadata[1]['site_name']) setattr(nc_file_obj, 'water_depth', metadata[1]['water_depth']) setattr(nc_file_obj, 'water_depth_units', 'meters') setattr(nc_file_obj, 'geospatial_lat_min', metadata[1]['lat_lon'][0]) setattr(nc_file_obj, 'geospatial_lat_max', metadata[1]['lat_lon'][0]) setattr(nc_file_obj, 'geospatial_lon_min', metadata[1]['lat_lon'][1]) setattr(nc_file_obj, 'geospatial_lon_max', metadata[1]['lat_lon'][1]) setattr(nc_file_obj, 'time_coverage_start', data.datetime.dt.strftime('%Y-%m-%dT%H:%M:%SZ').values.min()) setattr(nc_file_obj, 'time_coverage_end', data.datetime.dt.strftime('%Y-%m-%dT%H:%M:%SZ').values.max()) setattr(nc_file_obj, 'date_created', pd.datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")) setattr(nc_file_obj, 'local_time_zone', metadata[1]['timezone']) setattr(nc_file_obj, 'original_data_url', site_info['text_zip_url']) github_comment = 'Product created with %s' % get_git_revision_script_url(os.path.realpath(__file__)) nc_file_obj.lineage = ('%s %s' % (getattr(nc_file_obj, 'lineage', ''), github_comment))
def generate_qld_netcdf(resource_id, metadata, output_path): """ generate a netcdf file (wave or current) for a resource_id :param resource_id: :param metadata: :param output_path: :return: """ last_mod_date = get_last_modification_date_resource_id( metadata['package_name'], resource_id) if last_mod_date == None: # creating an epoch date last_mod_date = datetime.datetime(1970, 1, 1, 0, 0) wave_df = retrieve_json_data(resource_id) if wave_df is None: logger.error( 'No valid data to process for resource_id {resource_id}'.format( resource_id=resource_id)) return if 'Current Speed' in wave_df.columns.values or 'Current Direction' in wave_df.columns.values: logger.info('Processing Current data') data_code = 'V' else: logger.info('Processing Wave data') data_code = 'W' var_mapping = param_mapping_parser(QLD_WAVE_PARAMETER_MAPPING) date_start_str = wave_df.index.strftime('%Y%m%dT%H%M%SZ').values.min() date_end_str = wave_df.index.strftime('%Y%m%dT%H%M%SZ').values.max() nc_file_name = 'DES-QLD_{data_code}_{date_start}_{deployment_code}_WAVERIDER_FV01_END-{date_end}.nc'.format( date_start=date_start_str, data_code=data_code, deployment_code=metadata['site_name'].replace(' ', '-'), date_end=date_end_str) nc_file_path = os.path.join(output_path, nc_file_name) logger.info( 'Creating NetCDF {netcdf} from resource_id {resource_id}'.format( netcdf=os.path.basename(nc_file_path), resource_id=resource_id)) with Dataset(nc_file_path, 'w', format='NETCDF4') as nc_file_obj: nc_file_obj.createDimension("TIME", wave_df.index.shape[0]) nc_file_obj.createDimension("station_id_strlen", 30) nc_file_obj.createVariable("LATITUDE", "d", fill_value=FILLVALUE) nc_file_obj.createVariable("LONGITUDE", "d", fill_value=FILLVALUE) nc_file_obj.createVariable("STATION_ID", "S1", ("TIME", "station_id_strlen")) nc_file_obj["LATITUDE"][:] = metadata['latitude'] nc_file_obj["LONGITUDE"][:] = metadata['longitude'] nc_file_obj["STATION_ID"][:] = [stringtochar(np.array(metadata['site_name'], 'S30'))] * \ wave_df.shape[0] var_time = nc_file_obj.createVariable("TIME", "d", "TIME") # add gatts and variable attributes as stored in config files generate_netcdf_att(nc_file_obj, NC_ATT_CONFIG, conf_file_point_of_truth=True) time_val_dateobj = date2num(wave_df.index.to_pydatetime(), var_time.units, var_time.calendar) var_time[:] = time_val_dateobj df_varname_ls = list(wave_df[wave_df.keys()].columns.values) for df_varname in df_varname_ls: df_varname_mapped_equivalent = df_varname mapped_varname = var_mapping.loc[df_varname_mapped_equivalent][ 'VARNAME'] dtype = wave_df[df_varname].values.dtype if dtype == np.dtype('int64'): dtype = np.dtype('int16') # short else: dtype = np.dtype('f') nc_file_obj.createVariable(mapped_varname, dtype, "TIME", fill_value=FILLVALUE) set_var_attr(nc_file_obj, var_mapping, mapped_varname, df_varname_mapped_equivalent, dtype) setattr(nc_file_obj[mapped_varname], 'coordinates', "TIME LATITUDE LONGITUDE") try: nc_file_obj[mapped_varname][:] = wave_df[df_varname].values except ValueError: pass setattr(nc_file_obj, 'operator', metadata['owner']) setattr( nc_file_obj, 'title', 'Delayed mode wave data measured at {site}'.format( site=metadata['site_name'])) setattr(nc_file_obj, 'site_code', metadata['site_code']) setattr(nc_file_obj, 'site_name', metadata['site_name']) if not np.isnan(metadata['wmo_id']): setattr(nc_file_obj, 'wmo_id', int(metadata['wmo_id'])) setattr(nc_file_obj, 'geospatial_lat_min', metadata['latitude']) setattr(nc_file_obj, 'geospatial_lat_max', metadata['latitude']) setattr(nc_file_obj, 'geospatial_lon_min', metadata['longitude']) setattr(nc_file_obj, 'geospatial_lon_max', metadata['longitude']) setattr(nc_file_obj, 'time_coverage_start', wave_df.index.strftime('%Y-%m-%dT%H:%M:%SZ').values.min()) setattr(nc_file_obj, 'time_coverage_end', wave_df.index.strftime('%Y-%m-%dT%H:%M:%SZ').values.max()) setattr(nc_file_obj, 'date_created', pd.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")) data_url = '{base_url_data}{id}&limit={limit}'.format( base_url_data=BASE_URL_DATA, id=resource_id, limit=LIMIT_VALUES) setattr(nc_file_obj, 'data_original_url', data_url) setattr( nc_file_obj, 'glossary', 'https://www.qld.gov.au/environment/coasts-waterways/beach/waves-glossary' ) setattr( nc_file_obj, 'wave_monitoring_faq', 'https://www.qld.gov.au/environment/coasts-waterways/beach/waves') setattr(nc_file_obj, 'first_deployment_date', metadata.first_deployment_date.strftime("%Y-%m-%dT%H:%M:%SZ")) setattr(nc_file_obj, 'water_depth', metadata.water_depth) setattr(nc_file_obj, 'water_depth_units', 'meters') setattr(nc_file_obj, 'site_information_url', metadata.source_url) setattr(nc_file_obj, 'owner', metadata.owner) setattr(nc_file_obj, 'instrument_model', metadata.instrument_model) setattr(nc_file_obj, 'instrument_maker', metadata.instrument_maker) setattr(nc_file_obj, 'waverider_type', metadata.waverider_type) github_comment = 'Product created with %s' % get_git_revision_script_url( os.path.realpath(__file__)) nc_file_obj.lineage = ( '%s %s' % (getattr(nc_file_obj, 'lineage', ''), github_comment)) # save to pickle file the new last downloaded date for future run pickle_file = os.path.join(WIP_DIR, 'last_downloaded_date_resource_id.pickle') last_downloaded_date_resources = load_pickle_db(pickle_file) if not last_downloaded_date_resources: last_downloaded_date_resources = dict() last_modification = last_mod_date last_downloaded_date_resources[resource_id] = last_modification with open(pickle_file, 'wb') as p_write: pickle.dump(last_downloaded_date_resources, p_write) return nc_file_path
def generate_qld_netcdf(resource_id, metadata, output_path): """ generate a netcdf file (wave or current) for a resource_id :param resource_id: :param metadata: :param output_path: :return: """ last_mod_date = get_last_modification_date_resource_id(metadata['package_name'], resource_id) if last_mod_date == None: # creating an epoch date last_mod_date = datetime.datetime(1970, 1, 1, 0, 0) wave_df = retrieve_json_data(resource_id) if wave_df is None: logger.error('No valid data to process for resource_id {resource_id}'.format(resource_id=resource_id)) return if 'Current Speed' in wave_df.columns.values or 'Current Direction' in wave_df.columns.values: logger.info('Processing Current data') data_code = 'V' else: logger.info('Processing Wave data') data_code = 'W' var_mapping = param_mapping_parser(QLD_WAVE_PARAMETER_MAPPING) date_start_str = wave_df.index.strftime('%Y%m%dT%H%M%SZ').values.min() date_end_str = wave_df.index.strftime('%Y%m%dT%H%M%SZ').values.max() nc_file_name = 'DES-QLD_{data_code}_{date_start}_{deployment_code}_WAVERIDER_FV01_END-{date_end}.nc'.format( date_start=date_start_str, data_code=data_code, deployment_code=metadata['site_name'].replace(' ', '-'), date_end=date_end_str) nc_file_path = os.path.join(output_path, nc_file_name) logger.info('Creating NetCDF {netcdf} from resource_id {resource_id}'.format( netcdf=os.path.basename(nc_file_path), resource_id=resource_id)) with Dataset(nc_file_path, 'w', format='NETCDF4') as nc_file_obj: nc_file_obj.createDimension("TIME", wave_df.index.shape[0]) nc_file_obj.createDimension("station_id_strlen", 30) nc_file_obj.createVariable("LATITUDE", "d", fill_value=FILLVALUE) nc_file_obj.createVariable("LONGITUDE", "d", fill_value=FILLVALUE) nc_file_obj.createVariable("STATION_ID", "S1", ("TIME", "station_id_strlen")) nc_file_obj["LATITUDE"][:] = metadata['latitude'] nc_file_obj["LONGITUDE"][:] = metadata['longitude'] nc_file_obj["STATION_ID"][:] = [stringtochar(np.array(metadata['site_name'], 'S30'))] * \ wave_df.shape[0] var_time = nc_file_obj.createVariable("TIME", "d", "TIME") # add gatts and variable attributes as stored in config files generate_netcdf_att(nc_file_obj, NC_ATT_CONFIG, conf_file_point_of_truth=True) time_val_dateobj = date2num(wave_df.index.to_pydatetime(), var_time.units, var_time.calendar) var_time[:] = time_val_dateobj df_varname_ls = list(wave_df[wave_df.keys()].columns.values) for df_varname in df_varname_ls: df_varname_mapped_equivalent = df_varname mapped_varname = var_mapping.loc[df_varname_mapped_equivalent]['VARNAME'] dtype = wave_df[df_varname].values.dtype if dtype == np.dtype('int64'): dtype = np.dtype('int16') # short else: dtype = np.dtype('f') nc_file_obj.createVariable(mapped_varname, dtype, "TIME", fill_value=FILLVALUE) set_var_attr(nc_file_obj, var_mapping, mapped_varname, df_varname_mapped_equivalent, dtype) setattr(nc_file_obj[mapped_varname], 'coordinates', "TIME LATITUDE LONGITUDE") try: nc_file_obj[mapped_varname][:] = wave_df[df_varname].values except ValueError: pass setattr(nc_file_obj, 'operator', metadata['owner']) setattr(nc_file_obj, 'title', 'Delayed mode wave data measured at {site}'.format(site=metadata['site_name'])) setattr(nc_file_obj, 'site_code', metadata['site_code']) setattr(nc_file_obj, 'site_name', metadata['site_name']) if not np.isnan(metadata['wmo_id']): setattr(nc_file_obj, 'wmo_id', int(metadata['wmo_id'])) setattr(nc_file_obj, 'geospatial_lat_min', metadata['latitude']) setattr(nc_file_obj, 'geospatial_lat_max', metadata['latitude']) setattr(nc_file_obj, 'geospatial_lon_min', metadata['longitude']) setattr(nc_file_obj, 'geospatial_lon_max', metadata['longitude']) setattr(nc_file_obj, 'time_coverage_start', wave_df.index.strftime('%Y-%m-%dT%H:%M:%SZ').values.min()) setattr(nc_file_obj, 'time_coverage_end', wave_df.index.strftime('%Y-%m-%dT%H:%M:%SZ').values.max()) setattr(nc_file_obj, 'date_created', pd.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")) data_url = '{base_url_data}{id}&limit={limit}'.format(base_url_data=BASE_URL_DATA, id=resource_id, limit=LIMIT_VALUES) setattr(nc_file_obj, 'data_original_url', data_url) setattr(nc_file_obj, 'glossary', 'https://www.qld.gov.au/environment/coasts-waterways/beach/waves-glossary') setattr(nc_file_obj, 'wave_monitoring_faq', 'https://www.qld.gov.au/environment/coasts-waterways/beach/waves') setattr(nc_file_obj, 'first_deployment_date', metadata.first_deployment_date.strftime("%Y-%m-%dT%H:%M:%SZ")) setattr(nc_file_obj, 'water_depth', metadata.water_depth) setattr(nc_file_obj, 'water_depth_units', 'meters') setattr(nc_file_obj, 'site_information_url', metadata.source_url) setattr(nc_file_obj, 'owner', metadata.owner) setattr(nc_file_obj, 'instrument_model', metadata.instrument_model) setattr(nc_file_obj, 'instrument_maker', metadata.instrument_maker) setattr(nc_file_obj, 'waverider_type', metadata.waverider_type) github_comment = 'Product created with %s' % get_git_revision_script_url(os.path.realpath(__file__)) nc_file_obj.lineage = ('%s %s' % (getattr(nc_file_obj, 'lineage', ''), github_comment)) # save to pickle file the new last downloaded date for future run pickle_file = os.path.join(WIP_DIR, 'last_downloaded_date_resource_id.pickle') last_downloaded_date_resources = load_pickle_db(pickle_file) if not last_downloaded_date_resources: last_downloaded_date_resources = dict() last_modification = last_mod_date last_downloaded_date_resources[resource_id] = last_modification with open(pickle_file, 'wb') as p_write: pickle.dump(last_downloaded_date_resources, p_write) return nc_file_path
def generate_fv02_netcdf(temp_gridded, time_1d_interp, depth_1d_interp, nc_file_list, output_dir): """ generated the FV02 temperature gridded product netcdf file """ output_netcdf_file_path = os.path.join( output_dir, generate_fv02_filename(time_1d_interp, nc_file_list)) with Dataset(nc_file_list[0], 'r') as input_netcdf_obj, Dataset( output_netcdf_file_path, "w", format="NETCDF4") as output_netcdf_obj: output_netcdf_obj.date_created = datetime.now().strftime( "%Y-%m-%dT%H:%M:%SZ") # read gatts from input, add them to output. Some gatts will be overwritten input_gatts = input_netcdf_obj.ncattrs() gatt_to_dispose = [ 'author', 'author_email', 'cdm_data_type', 'comment', 'Conventions', 'toolbox_input_file', 'toolbox_version', 'file_version', 'file_version_quality_control', 'quality_control_set', 'quality_control_log', 'CoordSysBuilder_', 'date_created', 'netcdf_filename', 'metadata', 'instrument', 'instrument_serial_number', 'instrument_nominal_depth', 'instrument_nominal_height', 'instrument_sample_interval', 'compliance_checker_version', 'compliance_checker_last_updated', 'geospatial_vertical_min', 'geospatial_vertical_max', 'keywords', 'featureType', 'compliance_checks_passed', 'compliance_checker_imos_version', 'time_deployment_start_origin', 'time_deployment_end_origin' ] for gatt in input_gatts: if gatt not in gatt_to_dispose: setattr(output_netcdf_obj, gatt, getattr(input_netcdf_obj, gatt)) setattr(output_netcdf_obj, 'featureType', "timeSeriesProfile") setattr(output_netcdf_obj, 'temporal_resolution', np.float64(temporal_res_in_minutes)) setattr(output_netcdf_obj, 'vertical_resolution', np.float32(vertical_res_in_metres)) setattr( output_netcdf_obj, 'history', output_netcdf_obj.date_created + " - " + os.path.basename(__file__) + ".") setattr( output_netcdf_obj, 'keywords', 'Temperature regridded, TIME, LATITUDE, LONGITUDE, DEPTH, TEMP') nc_file_list, instrument_nominal_depth, instrument_sample_interval, instrument_serial_number = list_instrument_meta( nc_file_list) setattr(output_netcdf_obj, 'input_file', ", ".join([os.path.basename(x) for x in nc_file_list])) setattr(output_netcdf_obj, 'instrument_nominal_depth', ", ".join(map(str, instrument_nominal_depth))) setattr(output_netcdf_obj, 'instrument_sample_interval', ", ".join(map(str, instrument_sample_interval))) setattr(output_netcdf_obj, 'instrument_serial_number', ", ".join(instrument_serial_number)) output_netcdf_obj.createDimension("TIME", temp_gridded.shape[1]) output_netcdf_obj.createDimension("DEPTH", temp_gridded.shape[0]) var_time = output_netcdf_obj.createVariable("TIME", "d", "TIME") var_time.comment = "Time stamp corresponds to the centre of the averaging cell." var_depth = output_netcdf_obj.createVariable("DEPTH", "f", "DEPTH") var_depth.axis = "Z" var_depth[:] = depth_1d_interp var_id = output_netcdf_obj.createVariable("TIMESERIESPROFILE", "i", ()) var_id.long_name = "unique_identifier_for_each_timeseriesprofile_feature_instance_in_this_file" var_id.cf_role = "timeseries_id" var_id[:] = 1 var_lat = output_netcdf_obj.createVariable("LATITUDE", "d", ()) var_lon = output_netcdf_obj.createVariable("LONGITUDE", "d", ()) var_lat[:] = input_netcdf_obj['LATITUDE'][:] var_lon[:] = input_netcdf_obj['LONGITUDE'][:] var_temp = output_netcdf_obj.createVariable( "TEMP", "f", ("TIME", "DEPTH"), fill_value=get_imos_parameter_info('TEMP', '_FillValue'), zlib=True, complevel=1, shuffle=True, chunksizes=(temp_gridded.shape[1], temp_gridded.shape[0])) var_temp.coordinates = "TIME LATITUDE LONGITUDE DEPTH" var_temp[:] = np.transpose(temp_gridded) # add gatts and variable attributes as stored in config files conf_file_generic = os.path.join(os.path.dirname(__file__), 'generate_nc_file_att') generate_netcdf_att(output_netcdf_obj, conf_file_generic, conf_file_point_of_truth=True) def add_var_att_from_input_nc_to_output_nc(var): input_var_object = input_netcdf_obj[var] input_var_list_att = input_var_object.ncattrs() var_att_disposable = ['name', \ '_FillValue', 'ancillary_variables', \ 'ChunkSize', 'coordinates', 'comment', 'quality_control_set'] for var_att in [ att for att in input_var_list_att if att not in var_att_disposable ]: setattr(output_netcdf_obj[var], var_att, getattr(input_netcdf_obj[var], var_att)) add_var_att_from_input_nc_to_output_nc('TIME') add_var_att_from_input_nc_to_output_nc('LATITUDE') add_var_att_from_input_nc_to_output_nc('LONGITUDE') add_var_att_from_input_nc_to_output_nc('TEMP') if 'DEPTH' in input_netcdf_obj.variables: add_var_att_from_input_nc_to_output_nc('DEPTH') else: var_depth.standard_name = "depth" var_depth.long_name = "depth" var_depth.units = "m" var_depth.valid_min = np.float32(0) var_depth.valid_min = np.float32(12000) var_depth.reference_datum = "sea surface" var_depth.positive = "down" var_depth.comment = "Depth values were actually documented from instrument_nominal_depth values." time_val_dateobj = date2num(time_1d_interp, var_time.units, var_time.calendar) var_time[:] = time_val_dateobj output_netcdf_obj.time_coverage_start = min(time_1d_interp).strftime( '%Y-%m-%dT%H:%M:%SZ') output_netcdf_obj.time_coverage_end = max(time_1d_interp).strftime( '%Y-%m-%dT%H:%M:%SZ') output_netcdf_obj.geospatial_vertical_min = float( np.min(depth_1d_interp)) output_netcdf_obj.geospatial_vertical_max = float( np.max(depth_1d_interp)) output_netcdf_obj.abstract = (( "This product aggregates Temperature logger data collected at " "on a mooring line during a deployment by averaging them temporally in cells of %s minutes wide and interpolating them " "vertically every %s metres at consistent depths. ") % (output_netcdf_obj.temporal_resolution, output_netcdf_obj.vertical_resolution) + output_netcdf_obj.abstract) github_comment = 'Product created with %s' % get_git_revision_script_url( os.path.realpath(__file__)) output_netcdf_obj.lineage = ( '%s %s' % (getattr(output_netcdf_obj, 'lineage', ''), github_comment)) return output_netcdf_file_path
def generate_fv02_netcdf(temp_gridded, time_1d_interp, depth_1d_interp, nc_file_list, output_dir): """ generated the FV02 temperature gridded product netcdf file """ output_netcdf_file_path = os.path.join(output_dir, generate_fv02_filename(time_1d_interp, nc_file_list)) with Dataset(nc_file_list[0], 'r') as input_netcdf_obj, Dataset(output_netcdf_file_path, "w", format="NETCDF4") as output_netcdf_obj: output_netcdf_obj.date_created = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ") # read gatts from input, add them to output. Some gatts will be overwritten input_gatts = input_netcdf_obj.ncattrs() gatt_to_dispose = ['author', 'author_email', 'cdm_data_type', 'comment', 'Conventions', 'toolbox_input_file', 'toolbox_version', 'file_version', 'file_version_quality_control', 'quality_control_set', 'quality_control_log', 'CoordSysBuilder_', 'date_created', 'netcdf_filename', 'metadata', 'instrument', 'instrument_serial_number', 'instrument_nominal_depth', 'instrument_nominal_height', 'instrument_sample_interval', 'compliance_checker_version', 'compliance_checker_last_updated', 'geospatial_vertical_min', 'geospatial_vertical_max', 'keywords', 'featureType', 'compliance_checks_passed', 'compliance_checker_imos_version', 'time_deployment_start_origin' , 'time_deployment_end_origin'] for gatt in input_gatts: if gatt not in gatt_to_dispose: setattr(output_netcdf_obj, gatt, getattr(input_netcdf_obj, gatt)) setattr(output_netcdf_obj, 'featureType', "timeSeriesProfile") setattr(output_netcdf_obj, 'temporal_resolution', np.float64(temporal_res_in_minutes)) setattr(output_netcdf_obj, 'vertical_resolution', np.float32(vertical_res_in_metres)) setattr(output_netcdf_obj, 'history', output_netcdf_obj.date_created + " - " + os.path.basename(__file__) + ".") setattr(output_netcdf_obj, 'keywords', 'Temperature regridded, TIME, LATITUDE, LONGITUDE, DEPTH, TEMP') nc_file_list, instrument_nominal_depth, instrument_sample_interval, instrument_serial_number = list_instrument_meta(nc_file_list) setattr(output_netcdf_obj, 'input_file', ", ".join([os.path.basename(x) for x in nc_file_list])) setattr(output_netcdf_obj, 'instrument_nominal_depth', ", ".join(map(str, instrument_nominal_depth))) setattr(output_netcdf_obj, 'instrument_sample_interval', ", ".join(map(str, instrument_sample_interval))) setattr(output_netcdf_obj, 'instrument_serial_number', ", ".join(instrument_serial_number)) output_netcdf_obj.createDimension("TIME", temp_gridded.shape[1]) output_netcdf_obj.createDimension("DEPTH", temp_gridded.shape[0]) var_time = output_netcdf_obj.createVariable("TIME", "d", "TIME") var_time.comment = "Time stamp corresponds to the centre of the averaging cell." var_depth = output_netcdf_obj.createVariable("DEPTH", "f", "DEPTH") var_depth.axis = "Z" var_depth[:] = depth_1d_interp var_id = output_netcdf_obj.createVariable("TIMESERIESPROFILE", "i", ()) var_id.long_name = "unique_identifier_for_each_timeseriesprofile_feature_instance_in_this_file" var_id.cf_role = "timeseries_id" var_id[:] = 1 var_lat = output_netcdf_obj.createVariable("LATITUDE", "d", ()) var_lon = output_netcdf_obj.createVariable("LONGITUDE", "d", ()) var_lat[:] = input_netcdf_obj['LATITUDE'][:] var_lon[:] = input_netcdf_obj['LONGITUDE'][:] var_temp = output_netcdf_obj.createVariable("TEMP", "f", ("TIME", "DEPTH"), fill_value=get_imos_parameter_info('TEMP', '_FillValue'), zlib=True, complevel=1, shuffle=True, chunksizes=(temp_gridded.shape[1], temp_gridded.shape[0])) var_temp.coordinates = "TIME LATITUDE LONGITUDE DEPTH" var_temp[:] = np.transpose(temp_gridded) # add gatts and variable attributes as stored in config files conf_file_generic = os.path.join(os.path.dirname(__file__), 'generate_nc_file_att') generate_netcdf_att(output_netcdf_obj, conf_file_generic, conf_file_point_of_truth=True) def add_var_att_from_input_nc_to_output_nc(var): input_var_object = input_netcdf_obj[var] input_var_list_att = input_var_object.ncattrs() var_att_disposable = ['name', \ '_FillValue', 'ancillary_variables', \ 'ChunkSize', 'coordinates', 'comment', 'quality_control_set'] for var_att in [att for att in input_var_list_att if att not in var_att_disposable]: setattr(output_netcdf_obj[var], var_att, getattr(input_netcdf_obj[var], var_att)) add_var_att_from_input_nc_to_output_nc('TIME') add_var_att_from_input_nc_to_output_nc('LATITUDE') add_var_att_from_input_nc_to_output_nc('LONGITUDE') add_var_att_from_input_nc_to_output_nc('TEMP') if 'DEPTH' in input_netcdf_obj.variables: add_var_att_from_input_nc_to_output_nc('DEPTH') else: var_depth.standard_name = "depth" var_depth.long_name = "depth" var_depth.units = "m" var_depth.valid_min = np.float32(0) var_depth.valid_min = np.float32(12000) var_depth.reference_datum = "sea surface" var_depth.positive = "down" var_depth.comment = "Depth values were actually documented from instrument_nominal_depth values." time_val_dateobj = date2num(time_1d_interp, var_time.units, var_time.calendar) var_time[:] = time_val_dateobj output_netcdf_obj.time_coverage_start = min(time_1d_interp).strftime('%Y-%m-%dT%H:%M:%SZ') output_netcdf_obj.time_coverage_end = max(time_1d_interp).strftime('%Y-%m-%dT%H:%M:%SZ') output_netcdf_obj.geospatial_vertical_min = float(np.min(depth_1d_interp)) output_netcdf_obj.geospatial_vertical_max = float(np.max(depth_1d_interp)) output_netcdf_obj.abstract = (("This product aggregates Temperature logger data collected at " "on a mooring line during a deployment by averaging them temporally in cells of %s minutes wide and interpolating them " "vertically every %s metres at consistent depths. ") % (output_netcdf_obj.temporal_resolution, output_netcdf_obj.vertical_resolution) + output_netcdf_obj.abstract) github_comment = 'Product created with %s' % get_git_revision_script_url(os.path.realpath(__file__)) output_netcdf_obj.lineage = ('%s %s' % (getattr(output_netcdf_obj, 'lineage', ''), github_comment)) return output_netcdf_file_path
def create_burst_average_netcdf(input_netcdf_file_path, output_dir): """ generate the burst netcdf file for WQM product. see variable conf_file if editing of gatt and var att need to be done """ input_file_rel_path = get_input_file_rel_path(input_netcdf_file_path) input_netcdf_obj = Dataset(input_netcdf_file_path, 'r') global INSTRUMENT_SAMPLE_INTERVAL INSTRUMENT_SAMPLE_INTERVAL = getattr(input_netcdf_obj, 'instrument_sample_interval', 1) burst_vars = create_burst_average_var(input_netcdf_obj) time_burst_vals = burst_vars.values()[0]['time_mean'] tmp_netcdf_dir = tempfile.mkdtemp() output_netcdf_file_path = os.path.join(tmp_netcdf_dir, generate_netcdf_burst_filename(input_netcdf_file_path, burst_vars)) output_netcdf_obj = Dataset(output_netcdf_file_path, "w", format="NETCDF4") # read gatts from input, add them to output. Some gatts will be overwritten input_gatts = input_netcdf_obj.__dict__.keys() gatt_to_dispose = ['author', 'file_version_quality_control', 'quality_control_set', 'compliance_checker_version', 'compliance_checker_last_updated', 'quality_control_log'] for gatt in input_gatts: if gatt not in gatt_to_dispose: setattr(output_netcdf_obj, gatt, getattr(input_netcdf_obj, gatt)) if 'WQM' in output_netcdf_obj.instrument: output_netcdf_obj.title = 'Burst-averaged biogeochemical measurements at %s' % (input_netcdf_obj.site_code) elif 'CTD' in output_netcdf_obj.instrument: output_netcdf_obj.title = 'Burst-averaged moored CTD measurements at %s' % (input_netcdf_obj.site_code) m = re.match('.*\.nc', input_file_rel_path) output_netcdf_obj.input_file = m.group() output_netcdf_obj.date_created = DATE_UTC_NOW.strftime("%Y-%m-%dT%H:%M:%SZ") depth_burst_mean_val = burst_vars['DEPTH']['var_mean'] if np.isnan(depth_burst_mean_val).all(): output_netcdf_obj.geospatial_vertical_min = np.double(input_netcdf_obj['NOMINAL_DEPTH'][:]) output_netcdf_obj.geospatial_vertical_max = np.double(input_netcdf_obj['NOMINAL_DEPTH'][:]) else: output_netcdf_obj.geospatial_vertical_min = np.nanmin(depth_burst_mean_val) output_netcdf_obj.geospatial_vertical_max = np.nanmax(depth_burst_mean_val) # set up dimensions and variables output_netcdf_obj.createDimension("TIME", len(time_burst_vals)) var_time = output_netcdf_obj.createVariable("TIME", input_netcdf_obj["TIME"].dtype, ("TIME",)) dimensionless_var = list_dimensionless_var(input_netcdf_obj) # No FillValue for dimensions as for IMOS conventions for var in dimensionless_var: output_netcdf_obj.createVariable(var, input_netcdf_obj[var].dtype) output_netcdf_obj[var][:] = input_netcdf_obj[var][:] for var in burst_vars.keys(): var_dtype = input_netcdf_obj[var].dtype fillvalue = getattr(input_netcdf_obj[var], '_FillValue', None) output_var_mean = output_netcdf_obj.createVariable(var, var_dtype, ("TIME",), fill_value=fillvalue) output_var_min = output_netcdf_obj.createVariable('%s_burst_min' % var, var_dtype, ("TIME",), fill_value=fillvalue) output_var_max = output_netcdf_obj.createVariable('%s_burst_max' % var, var_dtype, ("TIME",), fill_value=fillvalue) output_var_sd = output_netcdf_obj.createVariable('%s_burst_sd' % var, var_dtype, ("TIME",), fill_value=fillvalue) output_var_num_obs = output_netcdf_obj.createVariable('%s_num_obs' % var, "i4", ("TIME",)) # set up 'bonus' var att from original FV01 file into FV02 input_var_object = input_netcdf_obj[var] input_var_list_att = input_var_object.__dict__.keys() var_att_disposable = ['name', 'long_name', \ '_FillValue', 'ancillary_variables', \ 'ChunkSize', 'coordinates'] for var_att in [att for att in input_var_list_att if att not in var_att_disposable]: setattr(output_netcdf_obj[var], var_att, getattr(input_netcdf_obj[var], var_att)) if var_att != 'comment': setattr(output_var_min, var_att, getattr(input_netcdf_obj[var], var_att)) setattr(output_var_max, var_att, getattr(input_netcdf_obj[var], var_att)) setattr(output_var_sd, var_att, getattr(input_netcdf_obj[var], var_att)) # make sur standard_deviation variable doesnt have a standard_name attr if hasattr(output_var_sd, 'standard_name'): delattr(output_var_sd, 'standard_name') setattr(output_var_mean, 'coordinates', getattr(input_netcdf_obj[var], 'coordinates', '')) setattr(output_var_mean, 'ancillary_variables', ('%s_num_obs %s_burst_sd %s_burst_min %s_burst_max' % (var, var, var, var))) setattr(output_var_mean, 'cell_methods', 'TIME: mean') setattr(output_var_min, 'cell_methods', 'TIME: minimum') setattr(output_var_max, 'cell_methods', 'TIME: maximum') setattr(output_var_sd, 'cell_methods', 'TIME: standard_deviation') setattr(output_var_sd, 'long_name', 'Standard deviation of values in burst, after rejection of flagged data') setattr(output_var_num_obs, 'long_name', 'Number of observations included in the averaging process') setattr(output_var_min, 'long_name', 'Minimum data value in burst, after rejection of flagged data') setattr(output_var_max, 'long_name', 'Maximum data value in burst, after rejection of flagged data') setattr(output_var_mean, 'long_name', 'Mean of %s values in burst, after rejection of flagged data' % (getattr(input_netcdf_obj[var], 'standard_name', getattr(input_netcdf_obj[var], 'long_name', '')))) output_var_num_obs.units = "1" var_units = getattr(input_netcdf_obj[var], 'units') if var_units: output_var_mean.units = var_units output_var_min.units = var_units output_var_max.units = var_units output_var_sd.units = var_units var_stdname = getattr(input_netcdf_obj[var], 'standard_name', '') if var_stdname != '': output_var_num_obs.standard_name = "%s number_of_observations" % var_stdname # set up var values output_var_mean[:] = np.ma.masked_invalid(burst_vars[var]['var_mean']) output_var_min[:] = np.ma.masked_invalid(burst_vars[var]['var_min']) output_var_max[:] = np.ma.masked_invalid(burst_vars[var]['var_max']) output_var_sd[:] = np.ma.masked_invalid(burst_vars[var]['var_sd']) output_var_num_obs[:] = np.ma.masked_invalid(burst_vars[var]['var_num_obs']) # add gatts and variable attributes as stored in config files conf_file_generic = os.path.join(os.path.dirname(__file__), 'generate_nc_file_att') generate_netcdf_att(output_netcdf_obj, conf_file_generic, conf_file_point_of_truth=True) # set up original varatts for the following dim, var varnames = dimensionless_var varnames.append('TIME') for varname in varnames: for varatt in input_netcdf_obj[varname].__dict__.keys(): output_netcdf_obj.variables[varname].setncattr(varatt, getattr(input_netcdf_obj[varname], varatt)) time_comment = '%s. Time stamp corresponds to the middle of the burst measurement.' % getattr(input_netcdf_obj['TIME'], 'comment', '') output_netcdf_obj.variables['TIME'].comment = time_comment.lstrip('. ') time_burst_val_dateobj = num2date(time_burst_vals, input_netcdf_obj['TIME'].units, input_netcdf_obj['TIME'].calendar) output_netcdf_obj.time_coverage_start = time_burst_val_dateobj.min().strftime('%Y-%m-%dT%H:%M:%SZ') output_netcdf_obj.time_coverage_end = time_burst_val_dateobj.max().strftime('%Y-%m-%dT%H:%M:%SZ') # append original gatt to burst average gatt gatt = 'comment' if hasattr(input_netcdf_obj, gatt): setattr(output_netcdf_obj, gatt, getattr(input_netcdf_obj, gatt)) gatt = 'history' setattr(output_netcdf_obj, gatt, ('%s. %s' % (getattr(input_netcdf_obj, gatt, ''), 'Created %s' % time.ctime(time.time()))).lstrip('. ')) gatt = 'abstract' setattr(output_netcdf_obj, gatt, ('%s. %s' % (getattr(output_netcdf_obj, gatt, ''), \ 'Data from the bursts have been cleaned and averaged to create data products. This file is one such product.')).lstrip('. ')) # add burst keywords gatt = 'keywords' keywords_burst = 'AVERAGED, BINNED' setattr(output_netcdf_obj, gatt, ('%s, %s' % (getattr(input_netcdf_obj, gatt, ''), keywords_burst)).lstrip(', ')) # add values to variables output_netcdf_obj['TIME'][:] = np.ma.masked_invalid(time_burst_vals) github_comment = 'Product created with %s' % get_git_revision_script_url(os.path.realpath(__file__)) output_netcdf_obj.lineage = ('%s. %s' % (getattr(output_netcdf_obj, 'lineage', ''), github_comment)).lstrip('. ') output_netcdf_obj.close() input_netcdf_obj.close() shutil.move(output_netcdf_file_path, output_dir) shutil.rmtree(tmp_netcdf_dir) return os.path.join(output_dir, os.path.basename(output_netcdf_file_path))