def processL1a(fp, calibrationMap): ''' Reads a raw binary file and generates a L1a HDF5 file ''' (_, fileName) = os.path.split(fp) # Generate root attributes root = HDFRoot() root.id = "/" root.attributes["HYPERINSPACE"] = MainConfig.settings["version"] root.attributes["CAL_FILE_NAMES"] = ','.join(calibrationMap.keys()) root.attributes["WAVELENGTH_UNITS"] = "nm" root.attributes["LI_UNITS"] = "count" root.attributes["LT_UNITS"] = "count" root.attributes["ES_UNITS"] = "count" root.attributes["SATPYR_UNITS"] = "count" root.attributes["RAW_FILE_NAME"] = fileName root.attributes["PROCESSING_LEVEL"] = "1a" now = dt.datetime.now() timestr = now.strftime("%d-%b-%Y %H:%M:%S") root.attributes["FILE_CREATION_TIME"] = timestr # SZA Filter configuration parameter added to attributes below msg = f"ProcessL1a.processL1a: {timestr}" print(msg) Utilities.writeLogFile(msg) contextMap = collections.OrderedDict() for key in calibrationMap: cf = calibrationMap[key] gp = HDFGroup() gp.id = cf.instrumentType contextMap[cf.id] = gp # print("contextMap:", list(contextMap.keys())) # print("calibrationMap:", list(calibrationMap.keys())) print('Reading in raw binary data may take a moment.') RawFileReader.readRawFile(fp, calibrationMap, contextMap, root) # Populate HDF group attributes for key in calibrationMap: cf = calibrationMap[key] gp = contextMap[cf.id] # Don't add contexts that did not match any data in RawFileReader if 'CalFileName' not in gp.attributes: continue gp.attributes["InstrumentType"] = cf.instrumentType gp.attributes["Media"] = cf.media gp.attributes["MeasMode"] = cf.measMode gp.attributes["FrameType"] = cf.frameType gp.getTableHeader(cf.sensorType) gp.attributes[ "DISTANCE_1"] = "Pressure " + cf.sensorType + " 1 1 0" gp.attributes["DISTANCE_2"] = "Surface " + cf.sensorType + " 1 1 0" # gp.attributes["SensorDataList"] = ", ".join([x for x in gp.datasets.keys()]) gp.attributes["SensorDataList"] = ", ".join( list(gp.datasets.keys())) if gp.id != 'SAS' and gp.id != 'Reference': root.groups.append(gp) # Insure essential data groups are present before proceeding hld = 0 hsl = 0 hse = 0 hed = 0 gps = 0 for gp in root.groups: if gp.id.startswith("HLD"): hld += 1 if gp.id.startswith("HSL"): hsl += 1 if gp.id.startswith("HSE"): hse += 1 if gp.id.startswith("HED"): hed += 1 if gp.id.startswith("GP"): gps += 1 if hld != 2 or hsl != 2 or hse != 1 or hed != 1 or gps != 1: msg = "ProcessL1a.processL1a: Essential dataset missing. Check your configuration calibration files match cruise setup. Aborting." msg = f'{msg}\ngps: {gps} :1' msg = f'{msg}\nhed: {hed} :1' msg = f'{msg}\nhld: {hld} :2' msg = f'{msg}\nhse: {hse} :1' msg = f'{msg}\nhsl: {hsl} :2' print(msg) Utilities.writeLogFile(msg) return None # Update the GPS group to include a datasets for DATETAG and TIMETAG2 for gp in root.groups: if gp.id.startswith("GP"): gpsGroup = gp # Need year-gang and sometimes Datetag from one of the sensors if gp.id.startswith("HSE"): esDateTag = gp.datasets["DATETAG"].columns["NONE"] esTimeTag2 = gp.datasets["TIMETAG2"].columns["NONE"] esSec = [] for time in esTimeTag2: esSec.append(Utilities.timeTag2ToSec(time)) gpsGroup.addDataset("DATETAG") gpsGroup.addDataset("TIMETAG2") if "UTCPOS" in gpsGroup.datasets: gpsTime = gpsGroup.datasets["UTCPOS"].columns["NONE"] elif "TIME" in gpsGroup.datasets: # prepSAS output gpsTime = gpsGroup.datasets["TIME"].columns["UTC"] else: msg = 'Failed to import GPS data.' print(msg) Utilities.writeLogFile(msg) return None # Another case for GPGGA input... if gpsGroup.id.startswith("GPGGA"): # No date is provided in GPGGA, need to find nearest time in Es and take the Datetag from Es ''' Catch-22. In order to covert the gps time, we need the year and day, which GPGGA does not have. To get these, could compare to find the nearest DATETAG in Es. In order to compare the gps time to the Es time to find the nearest, I would need to convert them to datetimes ... which would require the year and day. Instead, I use either the first or last Datetag from Es, depending on whether UTC 00:00 was crossed.''' # If the date does not change in Es, then no problem, use the Datetag of Es first element. # Otherwise, change the datetag at midnight by one day gpsDateTag = [] gpsTimeTag2 = [] if esDateTag[0] != esDateTag[-1]: msg = "ProcessL1a.processL1a: Warning: File crosses UTC 00:00. Adjusting timestamps for matchup of Datetag." print(msg) Utilities.writeLogFile(msg) newDay = False for time in gpsTime: gpsSec = Utilities.utcToSec(time) if not 'gpsSecPrior' in locals(): gpsSecPrior = gpsSec # Test for a change of ~24 hrs between this sample and the last sample # To cross 0, gpsSecPrior would need to be approaching 86400 seconds # In that case, take the final Es Datetag if (gpsSecPrior - gpsSec) > 86000: # Once triggered the first time, this will remain true for remainder of file newDay = True if newDay is True: gpsDateTag.append(esDateTag[-1]) dtDate = Utilities.dateTagToDateTime(esDateTag[-1]) gpsTimeTag2.append( Utilities.datetime2TimeTag2( Utilities.utcToDateTime(dtDate, time))) else: gpsDateTag.append(esDateTag[0]) dtDate = Utilities.dateTagToDateTime(esDateTag[0]) gpsTimeTag2.append( Utilities.datetime2TimeTag2( Utilities.utcToDateTime(dtDate, time))) gpsSecPrior = gpsSec else: for time in gpsTime: gpsDateTag.append(esDateTag[0]) dtDate = Utilities.dateTagToDateTime(esDateTag[0]) gpsTimeTag2.append( Utilities.datetime2TimeTag2( Utilities.utcToDateTime(dtDate, time))) gpsGroup.datasets["DATETAG"].columns["NONE"] = gpsDateTag gpsGroup.datasets["TIMETAG2"].columns["NONE"] = gpsTimeTag2 # Converts gp.columns to numpy array for gp in root.groups: if gp.id.startswith( "SATMSG"): # Don't convert these strings to datasets. for ds in gp.datasets.values(): ds.columnsToDataset() else: for ds in gp.datasets.values(): if not ds.columnsToDataset(): msg = "ProcessL1a.processL1a: Essential column cannot be converted to Dataset. Aborting." print(msg) Utilities.writeLogFile(msg) return None # Apply SZA filter; Currently only works with SolarTracker data at L1A (again possible in L2) if ConfigFile.settings["bL1aCleanSZA"]: root.attributes['SZA_FILTER_L1A'] = ConfigFile.settings[ "fL1aCleanSZAMax"] for gp in root.groups: # try: if 'FrameTag' in gp.attributes: if gp.attributes["FrameTag"].startswith("SATNAV"): elevData = gp.getDataset("ELEVATION") elevation = elevData.data.tolist() szaLimit = float( ConfigFile.settings["fL1aCleanSZAMax"]) ''' It would be good to add local time as a printed output with SZA''' if (90 - np.nanmax(elevation)) > szaLimit: msg = f'SZA too low. Discarding entire file. {round(90-np.nanmax(elevation))}' print(msg) Utilities.writeLogFile(msg) return None else: msg = f'SZA passed filter: {round(90-np.nanmax(elevation))}' print(msg) Utilities.writeLogFile(msg) else: print(f'No FrameTag in {gp.id} group') return root
def formatHeader(fp,node, level): seaBASSHeaderFileName = ConfigFile.settings["seaBASSHeaderFileName"] seaBASSFP = os.path.join(os.getcwd(), 'Config',seaBASSHeaderFileName) SeaBASSHeader.loadSeaBASSHeader(seaBASSFP) headerBlock = SeaBASSHeader.settings # Dataset leading columns can be taken from any sensor referenceGroup = node.getGroup("IRRADIANCE") if level == '1e': esData = referenceGroup.getDataset("ES") if level == '2': # referenceGroup = node.getGroup("IRRADIANCE") esData = referenceGroup.getDataset("ES_HYPER") # if ConfigFile.settings["bL1cSolarTracker"]: ancillaryGroup = node.getGroup("ANCILLARY") # else: # ancillaryGroup = node.getGroup("ANCILLARY_METADATA") wind = ancillaryGroup.getDataset("WINDSPEED") wind.datasetToColumns() winCol = wind.columns["WINDSPEED"] aveWind = np.nanmean(winCol) headerBlock['original_file_name'] = node.attributes['RAW_FILE_NAME'] headerBlock['data_file_name'] = os.path.split(fp)[1] headerBlock['comments'] = headerBlock['comments'] + f'\n! DateTime Processed = {time.asctime()}' # Convert Dates and Times # timeDT = esData.data['Datetime'].tolist() # Datetime has already been stripped off for saving the HDF dateDay = esData.data['Datetag'].tolist() dateDT = [Utilities.dateTagToDateTime(x) for x in dateDay] timeTag2 = esData.data['Timetag2'].tolist() timeDT = [] for i in range(len(dateDT)): timeDT.append(Utilities.timeTag2ToDateTime(dateDT[i],timeTag2[i])) # Python 2 format operator startTime = "%02d:%02d:%02d[GMT]" % (min(timeDT).hour, min(timeDT).minute, min(timeDT).second) endTime = "%02d:%02d:%02d[GMT]" % (max(timeDT).hour, max(timeDT).minute, max(timeDT).second) startDate = "%04d%02d%02d" % (min(timeDT).year, min(timeDT).month, min(timeDT).day) endDate = "%04d%02d%02d" % (max(timeDT).year, max(timeDT).month, max(timeDT).day) # Convert Position # Python 3 format syntax southLat = "{:.4f}[DEG]".format(min(esData.data['LATITUDE'].tolist())) northLat = "{:.4f}[DEG]".format(max(esData.data['LATITUDE'].tolist())) eastLon = "{:.4f}[DEG]".format(max(esData.data['LONGITUDE'].tolist())) westLon = "{:.4f}[DEG]".format(min(esData.data['LONGITUDE'].tolist())) if headerBlock['station'] == '': headerBlock['station'] = node.attributes['RAW_FILE_NAME'].split('.')[0] if headerBlock['start_time'] == '': headerBlock['start_time'] = startTime if headerBlock['end_time'] == '': headerBlock['end_time'] = endTime if headerBlock['start_date'] == '': headerBlock['start_date'] = startDate if headerBlock['end_date'] == '': headerBlock['end_date'] = endDate if headerBlock['north_latitude'] == '': headerBlock['north_latitude'] = northLat if headerBlock['south_latitude'] == '': headerBlock['south_latitude'] = southLat if headerBlock['east_longitude'] == '': headerBlock['east_longitude'] = eastLon if headerBlock['west_longitude'] == '': headerBlock['west_longitude'] = westLon if level == '2': headerBlock['wind_speed'] = aveWind # wind_speed will not be written to l1e return headerBlock
def formatData2(dataset,dsDelta,dtype, units): dsCopy = dataset.data.copy() # By copying here, we leave the ancillary data tacked on to radiometry for later # dsDelta = dsDelta.data.copy() # Convert Dates and Times and remove from dataset newData = dsCopy dateDay = dsCopy['Datetag'].tolist() newData = SeaBASSWriter.removeColumns(newData,'Datetag') del dsDelta.columns['Datetag'] dateDT = [Utilities.dateTagToDateTime(x) for x in dateDay] timeTag2 = dsCopy['Timetag2'].tolist() newData = SeaBASSWriter.removeColumns(newData,'Timetag2') del dsDelta.columns['Timetag2'] dsDelta.columnsToDataset() timeDT = [] for i in range(len(dateDT)): timeDT.append(Utilities.timeTag2ToDateTime(dateDT[i],timeTag2[i])) # Retrieve ancillaries and remove from dataset (they are not on deltas) lat = dsCopy['LATITUDE'].tolist() newData = SeaBASSWriter.removeColumns(newData,'LATITUDE') lon = dsCopy['LONGITUDE'].tolist() newData = SeaBASSWriter.removeColumns(newData,'LONGITUDE') aod = dsCopy['AOD'].tolist() newData = SeaBASSWriter.removeColumns(newData,'AOD') cloud = dsCopy['CLOUD'].tolist() newData = SeaBASSWriter.removeColumns(newData,'CLOUD') sza = dsCopy['SZA'].tolist() newData = SeaBASSWriter.removeColumns(newData,'SZA') relAz = dsCopy['REL_AZ'].tolist() newData = SeaBASSWriter.removeColumns(newData,'REL_AZ') newData = SeaBASSWriter.removeColumns(newData,'HEADING') newData = SeaBASSWriter.removeColumns(newData,'SOLAR_AZ') wind = dsCopy['WIND'].tolist() newData = SeaBASSWriter.removeColumns(newData,'WIND') dsCopy = newData # Change field names for SeaBASS compliance bands = list(dsCopy.dtype.names) ls = ['date','time','lat','lon','RelAz','SZA','AOT','cloud','wind'] if dtype == 'rrs': fieldName = 'Rrs' elif dtype == 'es': fieldName = 'Es' if dtype=='rrs': fieldsLineStr = ','.join(ls + [f'{fieldName}{band}' for band in bands] \ + [f'{fieldName}{band}_unc' for band in bands]) else: fieldsLineStr = ','.join(ls + [f'{fieldName}{band}' for band in bands] \ + [f'{fieldName}{band}_sd' for band in bands]) lenRad = (len(dsCopy.dtype.names)) unitsLine = ['yyyymmdd'] unitsLine.append('hh:mm:ss') unitsLine.extend(['degrees']*4) # lat, lon, relAz, sza unitsLine.append('unitless') # AOD unitsLine.append('%') # cloud unitsLine.append('m/s') # wind unitsLine.extend([units]*lenRad) # data unitsLine.extend([units]*lenRad) # data uncertainty unitsLineStr = ','.join(unitsLine) # Add data for each row dataOut = [] formatStr = str('{:04d}{:02d}{:02d},{:02d}:{:02d}:{:02d},{:.4f},{:.4f},{:.1f},{:.1f}'\ + ',{:.4f},{:.0f},{:.1f}'\ + ',{:.6f}'*lenRad + ',{:.6f}'*lenRad) for i in range(dsCopy.shape[0]): subList = [lat[i],lon[i],relAz[i],sza[i],aod[i],cloud[i],wind[i]] lineList = [timeDT[i].year,timeDT[i].month,timeDT[i].day,timeDT[i].hour,timeDT[i].minute,timeDT[i].second] +\ subList + list(dsCopy[i].tolist()) + list(dsDelta.data[i].tolist()) # Replace NaNs with -9999.0 lineList = [-9999.0 if np.isnan(element) else element for element in lineList] lineStr = formatStr.format(*lineList) dataOut.append(lineStr) return dataOut, fieldsLineStr, unitsLineStr
def includeModelDefaults(ancGroup, modRoot): ''' Include model data or defaults for blank ancillary fields ''' print( 'Filling blank ancillary data with models or defaults from Configuration' ) epoch = datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc) # radData = referenceGroup.getDataset("ES") # From node, the input file # Convert ancillary date time if ancGroup is not None: ancGroup.datasets['LATITUDE'].datasetToColumns() ancTime = ancGroup.datasets['LATITUDE'].columns['Timetag2'] ancSeconds = [] ancDatetime = [] for i, ancDate in enumerate( ancGroup.datasets['LATITUDE'].columns['Datetag']): ancDatetime.append( Utilities.timeTag2ToDateTime( Utilities.dateTagToDateTime(ancDate), ancTime[i])) ancSeconds.append((ancDatetime[i] - epoch).total_seconds()) # Convert model data date and time to datetime and then to seconds for interpolation if modRoot is not None: modTime = modRoot.groups[0].datasets["Timetag2"].tolist() modSeconds = [] modDatetime = [] for i, modDate in enumerate( modRoot.groups[0].datasets["Datetag"].tolist()): modDatetime.append( Utilities.timeTag2ToDateTime( Utilities.dateTagToDateTime(modDate), modTime[i])) modSeconds.append((modDatetime[i] - epoch).total_seconds()) # Model or default fills if 'WINDSPEED' in ancGroup.datasets: ancGroup.datasets['WINDSPEED'].datasetToColumns() windDataset = ancGroup.datasets['WINDSPEED'] wind = windDataset.columns['NONE'] else: windDataset = ancGroup.addDataset('WINDSPEED') wind = np.empty((1, len(ancSeconds))) wind[:] = np.nan wind = wind[0].tolist() if 'AOD' in ancGroup.datasets: ancGroup.datasets['AOD'].datasetToColumns() aodDataset = ancGroup.datasets['AOD'] aod = aodDataset.columns['NONE'] else: aodDataset = ancGroup.addDataset('AOD') aod = np.empty((1, len(ancSeconds))) aod[:] = np.nan aod = aod[0].tolist() # Default fills if 'SALINITY' in ancGroup.datasets: ancGroup.datasets['SALINITY'].datasetToColumns() saltDataset = ancGroup.datasets['SALINITY'] salt = saltDataset.columns['NONE'] else: saltDataset = ancGroup.addDataset('SALINITY') salt = np.empty((1, len(ancSeconds))) salt[:] = np.nan salt = salt[0].tolist() if 'SST' in ancGroup.datasets: ancGroup.datasets['SST'].datasetToColumns() sstDataset = ancGroup.datasets['SST'] sst = sstDataset.columns['NONE'] else: sstDataset = ancGroup.addDataset('SST') sst = np.empty((1, len(ancSeconds))) sst[:] = np.nan sst = sst[0].tolist() # Initialize flags windFlag = [] aodFlag = [] for i, ancSec in enumerate(ancSeconds): if np.isnan(wind[i]): windFlag.append('undetermined') else: windFlag.append('field') if np.isnan(aod[i]): aodFlag.append('undetermined') else: aodFlag.append('field') # Replace Wind, AOD NaNs with modeled data where possible. # These will be within one hour of the field data. if modRoot is not None: msg = 'Filling in field data with model data where needed.' print(msg) Utilities.writeLogFile(msg) for i, ancSec in enumerate(ancSeconds): if np.isnan(wind[i]): # msg = 'Replacing wind with model data' # print(msg) # Utilities.writeLogFile(msg) idx = Utilities.find_nearest(modSeconds, ancSec) wind[i] = modRoot.groups[0].datasets['Wind'][idx] windFlag[i] = 'model' if np.isnan(aod[i]): # msg = 'Replacing AOD with model data' # print(msg) # Utilities.writeLogFile(msg) idx = Utilities.find_nearest(modSeconds, ancSec) aod[i] = modRoot.groups[0].datasets['AOD'][idx] aodFlag[i] = 'model' # Replace Wind, AOD, SST, and Sal with defaults where still nan msg = 'Filling in ancillary data with default values where still needed.' print(msg) Utilities.writeLogFile(msg) saltFlag = [] sstFlag = [] for i, value in enumerate(wind): if np.isnan(value): wind[i] = ConfigFile.settings["fL1bqcDefaultWindSpeed"] windFlag[i] = 'default' for i, value in enumerate(aod): if np.isnan(value): aod[i] = ConfigFile.settings["fL1bqcDefaultAOD"] aodFlag[i] = 'default' for i, value in enumerate(salt): if np.isnan(value): salt[i] = ConfigFile.settings["fL1bqcDefaultSalt"] saltFlag.append('default') else: saltFlag.append('field') for i, value in enumerate(sst): if np.isnan(value): sst[i] = ConfigFile.settings["fL1bqcDefaultSST"] sstFlag.append('default') else: sstFlag.append('field') # Populate the datasets and flags with the InRad variables windDataset.columns["NONE"] = wind windDataset.columns["WINDFLAG"] = windFlag windDataset.columnsToDataset() aodDataset.columns["AOD"] = aod aodDataset.columns["AODFLAG"] = aodFlag aodDataset.columnsToDataset() saltDataset.columns["NONE"] = salt saltDataset.columns["SALTFLAG"] = saltFlag saltDataset.columnsToDataset() sstDataset.columns["NONE"] = sst sstDataset.columns["SSTFLAG"] = sstFlag sstDataset.columnsToDataset() # Convert ancillary seconds back to date/timetags ... ancDateTag = [] ancTimeTag2 = [] ancDT = [] for i, sec in enumerate(ancSeconds): ancDT.append( datetime.datetime.utcfromtimestamp(sec).replace( tzinfo=datetime.timezone.utc)) ancDateTag.append( float( f'{int(ancDT[i].timetuple()[0]):04}{int(ancDT[i].timetuple()[7]):03}' )) ancTimeTag2.append(float( \ f'{int(ancDT[i].timetuple()[3]):02}{int(ancDT[i].timetuple()[4]):02}{int(ancDT[i].timetuple()[5]):02}{int(ancDT[i].microsecond/1000):03}')) # Move the Timetag2 and Datetag into the arrays and remove the datasets for ds in ancGroup.datasets: ancGroup.datasets[ds].columns["Datetag"] = ancDateTag ancGroup.datasets[ds].columns["Timetag2"] = ancTimeTag2 ancGroup.datasets[ds].columns["Datetime"] = ancDT ancGroup.datasets[ds].columns.move_to_end('Timetag2', last=False) ancGroup.datasets[ds].columns.move_to_end('Datetag', last=False) ancGroup.datasets[ds].columns.move_to_end('Datetime', last=False) ancGroup.datasets[ds].columnsToDataset()