def file_length(input_file, input_subsetter, HDF5=False, QFIT=False): #-- subset the data to indices if specified if input_subsetter: file_lines = len(input_subsetter) elif HDF5: #-- read the size of an input variable within a HDF5 file with h5py.File(input_file, 'r') as fileID: file_lines, = fileID[HDF5].shape elif QFIT: #-- read the size of a QFIT binary file file_lines = read_ATM1b_QFIT_binary(input_file) else: #-- read the input file, split at lines and remove all commented lines with open(input_file, 'r') as f: i = [i for i in f.read().splitlines() if re.match(r'^(?!#)', i)] file_lines = len(i) #-- return the number of lines return file_lines
def read_ATM_qfit_file(input_file, input_subsetter): #-- regular expression pattern for extracting parameters mission_flag = '(BLATM1B|ILATM1B|ILNSA1B)' regex_pattern = r'{0}_(\d+)_(\d+)(.*?).(qi|TXT|h5)'.format(mission_flag) #-- extract mission and other parameters from filename MISSION, YYMMDD, HHMMSS, AUX, SFX = re.findall(regex_pattern, input_file).pop() #-- early date strings omitted century and millenia (e.g. 93 for 1993) if (len(YYMMDD) == 6): ypre, month, day = np.array([YYMMDD[:2], YYMMDD[2:4], YYMMDD[4:]], dtype='i') year = (ypre + 1900.0) if (ypre >= 90) else (ypre + 2000.0) elif (len(YYMMDD) == 8): year, month, day = np.array([YYMMDD[:4], YYMMDD[4:6], YYMMDD[6:]], dtype='i') #-- output python dictionary with variables ATM_L1b_input = {} #-- Version 1 of ATM QFIT files (ascii) #-- output text file from qi2txt with proper filename format #-- do not use the shortened output format from qi2txt if (SFX == 'TXT'): #-- compile regular expression operator for reading lines regex_pattern = r'[-+]?(?:(?:\d*\.\d+)|(?:\d+\.?))(?:[Ee][+-]?\d+)?' rx = re.compile(regex_pattern, re.VERBOSE) #-- read the input file, split at lines and remove all commented lines with open(input_file, 'r') as f: file_contents = [ i for i in f.read().splitlines() if re.match(r'^(?!#)', i) ] #-- number of lines of data within file file_lines = file_length(input_file, input_subsetter) #-- create output variables with length equal to the number of lines ATM_L1b_input['lat'] = np.zeros_like(file_contents, dtype=np.float) ATM_L1b_input['lon'] = np.zeros_like(file_contents, dtype=np.float) ATM_L1b_input['data'] = np.zeros_like(file_contents, dtype=np.float) hour = np.zeros_like(file_contents, dtype=np.float) minute = np.zeros_like(file_contents, dtype=np.float) second = np.zeros_like(file_contents, dtype=np.float) #-- for each line within the file for i, line in enumerate(file_contents): #-- find numerical instances within the line line_contents = rx.findall(line) ATM_L1b_input['lat'][i] = np.float(line_contents[1]) ATM_L1b_input['lon'][i] = np.float(line_contents[2]) ATM_L1b_input['data'][i] = np.float(line_contents[3]) hour[i] = np.float(line_contents[-1][:2]) minute[i] = np.float(line_contents[-1][2:4]) second[i] = np.float(line_contents[-1][4:]) #-- Version 1 of ATM QFIT files (binary) elif (SFX == 'qi'): #-- read input QFIT data file and subset if specified fid, h = read_ATM1b_QFIT_binary(input_file) #-- number of lines of data within file file_lines = file_length(input_file, input_subsetter, QFIT=True) ATM_L1b_input['lat'] = fid['latitude'][:] ATM_L1b_input['lon'] = fid['longitude'][:] ATM_L1b_input['data'] = fid['elevation'][:] time_hhmmss = fid['time_hhmmss'][:] #-- extract hour, minute and second from time_hhmmss hour = np.zeros_like(time_hhmmss, dtype=np.float) minute = np.zeros_like(time_hhmmss, dtype=np.float) second = np.zeros_like(time_hhmmss, dtype=np.float) #-- for each line within the file for i, packed_time in enumerate(time_hhmmss): #-- convert to zero-padded string with 3 decimal points line_contents = '{0:010.3f}'.format(packed_time) hour[i] = np.float(line_contents[:2]) minute[i] = np.float(line_contents[2:4]) second[i] = np.float(line_contents[4:]) #-- Version 2 of ATM QFIT files (HDF5) elif (SFX == 'h5'): #-- Open the HDF5 file for reading fileID = h5py.File(os.path.expanduser(input_file), 'r') #-- number of lines of data within file file_lines = file_length(input_file, input_subsetter, HDF5='elevation') #-- create output variables with length equal to input elevation ATM_L1b_input['lat'] = fileID['latitude'][:] ATM_L1b_input['lon'] = fileID['longitude'][:] ATM_L1b_input['data'] = fileID['elevation'][:] time_hhmmss = fileID['instrument_parameters']['time_hhmmss'][:] #-- extract hour, minute and second from time_hhmmss hour = np.zeros_like(time_hhmmss, dtype=np.float) minute = np.zeros_like(time_hhmmss, dtype=np.float) second = np.zeros_like(time_hhmmss, dtype=np.float) #-- for each line within the file for i, packed_time in enumerate(time_hhmmss): #-- convert to zero-padded string with 3 decimal points line_contents = '{0:010.3f}'.format(packed_time) hour[i] = np.float(line_contents[:2]) minute[i] = np.float(line_contents[2:4]) second[i] = np.float(line_contents[4:]) #-- close the input HDF5 file fileID.close() #-- calculate the number of leap seconds between GPS time (seconds #-- since Jan 6, 1980 00:00:00) and UTC gps_seconds = pyTMD.time.convert_calendar_dates(year, month, day, hour=hour, minute=minute, second=second, epoch=(1980, 1, 6, 0, 0, 0), scale=86400.0) leap_seconds = pyTMD.time.count_leap_seconds(gps_seconds) #-- calculation of Julian day taking into account leap seconds #-- converting to J2000 seconds ATM_L1b_input['time'] = pyTMD.time.convert_calendar_dates( year, month, day, hour=hour, minute=minute, second=second - leap_seconds, epoch=(2000, 1, 1, 12, 0, 0, 0), scale=86400.0) #-- subset the data to indices if specified if input_subsetter: for key, val in ATM_L1b_input.items(): ATM_L1b_input[key] = val[input_subsetter] #-- hemispheric shot count count = {} count['N'] = np.count_nonzero(ATM_L1b_input['lat'] >= 0.0) count['S'] = np.count_nonzero(ATM_L1b_input['lat'] < 0.0) #-- determine hemisphere with containing shots in file HEM, = [key for key, val in count.items() if val] #-- return the output variables return ATM_L1b_input, file_lines, HEM