def readdzt(infile, gps=False, spm=None, epsr=None, verbose=False): """ function to unpack and return things we need from the header, and the data itself currently unused but potentially useful lines: # headerstruct = '<5h 5f h 4s 4s 7h 3I d I 3c x 3h d 2x 2c s s 14s s s 12s h 816s 76s' # the structure of the bytewise header and "gps data" as I understand it - 1024 bytes # readsize = (2,2,2,2,2,4,4,4,4,4,2,4,4,4,2,2,2,2,2,4,4,4,8,4,3,1,2,2,2,8,1,1,14,1,1,12,2) # the variable size of bytes in the header (most of the time) - 128 bytes # fx.printmsg('total header structure size: '+str(calcsize(headerstruct))) # packed_size = 0 # for i in range(len(readsize)): packed_size = packed_size+readsize[i] # fx.printmsg('fixed header size: '+str(packed_size)+'\n') """ infile_gps = os.path.splitext(infile)[0] + ".DZG" infile = open(infile, 'rb') header = {} header['infile'] = infile.name header['known_ant'] = [None, None, None, None] header['rh_ant'] = [None, None, None, None] header['rh_antname'] = [None, None, None, None] header['antfreq'] = [None, None, None, None] # begin read header['rh_tag'] = struct.unpack( '<h', infile.read(2))[0] # 0x00ff if header, 0xfnff if old file format header['rh_data'] = struct.unpack( '<h', infile.read(2))[0] # offset to data from beginning of file header['rh_nsamp'] = struct.unpack('<h', infile.read(2))[0] # samples per scan header['rh_bits'] = struct.unpack('<h', infile.read(2))[0] # bits per data word header['rh_zero'] = struct.unpack( '<h', infile.read(2) )[0] # if sir-30 or utilityscan df, then repeats per sample; otherwise 0x80 for 8bit and 0x8000 for 16bit header['rhf_sps'] = struct.unpack('<f', infile.read(4))[0] # scans per second header['rhf_spm'] = struct.unpack('<f', infile.read(4))[0] # scans per meter header['dzt_spm'] = header['rhf_spm'] if spm: header['rhf_spm'] = spm header['rhf_mpm'] = struct.unpack('<f', infile.read(4))[0] # meters per mark header['rhf_position'] = struct.unpack('<f', infile.read(4))[0] # position (ns) header['rhf_range'] = struct.unpack('<f', infile.read(4))[0] # range (ns) header['rh_npass'] = struct.unpack( '<h', infile.read(2))[0] # number of passes for 2-D files # bytes 32-36 and 36-40: creation and modification date and time in bits # structured as little endian u5u6u5u5u4u7 infile.seek(32) try: header['rhb_cdt'] = readtime(infile.read(4)) except: header['rhb_cdt'] = datetime(1980, 1, 1) try: header['rhb_mdt'] = readtime(infile.read(4)) except: header['rhb_mdt'] = datetime(1980, 1, 1) header['rh_rgain'] = struct.unpack( '<h', infile.read(2))[0] # offset to range gain function header['rh_nrgain'] = struct.unpack( '<h', infile.read(2))[0] # size of range gain function header['rh_text'] = struct.unpack('<h', infile.read(2))[0] # offset to text header['rh_ntext'] = struct.unpack('<h', infile.read(2))[0] # size of text header['rh_proc'] = struct.unpack( '<h', infile.read(2))[0] # offset to processing history header['rh_nproc'] = struct.unpack( '<h', infile.read(2))[0] # size of processing history header['rh_nchan'] = struct.unpack('<h', infile.read(2))[0] # number of channels if epsr != None: header['dzt_epsr'] = struct.unpack('<f', infile.read(4))[0] header['rhf_epsr'] = epsr else: header['rhf_epsr'] = struct.unpack('<f', infile.read(4))[ 0] # epsr (sometimes referred to as "dielectric permittivity") header['dzt_epsr'] = header['rhf_epsr'] header['rhf_top'] = struct.unpack( '<f', infile.read(4))[0] # position in meters (useless?) header['dzt_depth'] = struct.unpack( '<f', infile.read(4))[0] # range in meters based on DZT rhf_epsr header['rhf_depth'] = header['dzt_depth'] * ( math.sqrt(header['dzt_epsr']) / math.sqrt(header['rhf_epsr']) ) # range based on user epsr #rhf_coordx = struct.unpack('<ff', infile.read(8))[0] # this is definitely useless # read frequencies for multiple antennae (This finds the antenna type of the current scan and find the center frequency) """ for chan in list(range(header['rh_nchan'])): if chan == 0: infile.seek(98) # start of antenna section else: infile.seek(98 + (MINHEADSIZE*(chan))) # start of antenna bytes for channel n # TODO: Find out why rh_antname doesn't show up header['rh_ant'][chan] = infile.read(14).decode('utf-8').split('\x00')[0] # This is some other identifying number header['rh_antname'][chan] = header['rh_ant'][chan].rsplit('x')[0] # This is the names of the current antenna try: header['antfreq'][chan] = ANT[header['rh_antname'][chan]] header['known_ant'][chan] = True except KeyError: header['known_ant'][chan] = False header['antfreq'][chan] = int("".join(takewhile(str.isdigit, header['rh_ant'][chan].replace('D5','').replace('D6','')))) # hoping this works #header['antfreq'][chan] = int(header['rh_antname'][chan].replace('D5','').replace('D6','')) """ # This is BirsView modification to enable the library to work with GSSI Mini XT header['rh_ant'] = None header['rh_antname'] = '62300XT' header['antfreq'] = 2300 header['known_ant'] = True infile.seek(113) # skip to something that matters vsbyte = infile.read(1) # byte containing versioning bits header['rh_version'] = ord( vsbyte ) >> 5 # whether or not the system is GPS-capable, 1=no 2=yes (does not mean GPS is in file) header['rh_system'] = ord( vsbyte) >> 3 # the system type (values in UNIT={...} dictionary above) infile.seek(header['rh_rgain']) try: header['rgain_bytes'] = infile.read(header['rh_nrgain']) except: pass if header[ 'rh_data'] < MINHEADSIZE: # whether or not the header is normal or big-->determines offset to data array infile.seek(MINHEADSIZE * header['rh_data']) header['data_offset'] = MINHEADSIZE * header['rh_data'] else: infile.seek(MINHEADSIZE * header['rh_nchan']) header['data_offset'] = MINHEADSIZE * header['rh_nchan'] if header['rh_bits'] == 8: dtype = np.uint8 # 8-bit unsigned elif header['rh_bits'] == 16: dtype = np.uint16 # 16-bit unsigned else: dtype = np.int32 # 32-bit signed # read in and transpose data data = np.fromfile(infile, dtype).reshape( -1, (header['rh_nsamp'] * header['rh_nchan'])).T header['cr'] = 1 / math.sqrt(Mu_0 * Eps_0 * header['rhf_epsr']) header['ns_per_zsample'] = header['rhf_depth'] / (data.shape[0] * header['cr']) try: header['sec'] = data.shape[1] / float(header['rhf_sps']) except ZeroDivisionError: header['sec'] = 1. header['traces'] = int(data.shape[1] / header['rh_nchan']) infile.close() if gps: try: if verbose: fx.printmsg('reading GPS file...') gps = readdzg(infile_gps, 'dzg', header, verbose=verbose) except IOError as e0: fx.printmsg('WARNING: no DZG file found') try: infile_gps = os.path.splitext(infile_gps)[0] + ".csv" gps = readdzg(infile_gps, 'csv', header, verbose=verbose) except Exception as e1: try: infile_gps = os.path.splitext(infile_gps)[0] + ".CSV" gps = readdzg(infile_gps, 'csv', header, verbose=verbose) except Exception as e2: fx.printmsg( 'ERROR reading GPS. distance normalization will not be possible.' ) fx.printmsg(' details: %s' % e0) fx.printmsg(' %s' % e1) fx.printmsg(' %s' % e2) gps = [] else: pass return [header, data, gps]
def readdzt(infile, gps=DataFrame(), spm=None, start_scan=0, num_scans=-1, epsr=None, antfreq=[None, None, None, None], verbose=False, zero=[None, None, None, None]): """ Function to unpack and return things the program needs from the file header, and the data itself. :param str infile: The DZT file location :param bool gps: Whether a GPS file exists. Defaults to False, but changed to :py:class:`pandas.DataFrame` if a DZG file with the same name as :code:`infile` exists. :param float spm: User value of samples per meter, if specified. Defaults to None. :param float epsr: User value of relative permittivity, if specified. Defaults to None. :param list[int,int,int,int] zero: List of time-zero values per channel. Defaults to a list of :code:`None` values, which resolves to :code:`rh_zero`. :param bool verbose: Verbose, defaults to False :rtype: header (:py:class:`dict`), radar array (:py:class:`numpy.ndarray`), gps (False or :py:class:`pandas.DataFrame`) """ ''' currently unused but potentially useful lines: # headerstruct = '<5h 5f h 4s 4s 7h 3I d I 3c x 3h d 2x 2c s s 14s s s 12s h 816s 76s' # the structure of the bytewise header and "gps data" as I understand it - 1024 bytes # readsize = (2,2,2,2,2,4,4,4,4,4,2,4,4,4,2,2,2,2,2,4,4,4,8,4,3,1,2,2,2,8,1,1,14,1,1,12,2) # the variable size of bytes in the header (most of the time) - 128 bytes # fx.printmsg('total header structure size: '+str(calcsize(headerstruct))) # packed_size = 0 # for i in range(len(readsize)): packed_size = packed_size+readsize[i] # fx.printmsg('fixed header size: '+str(packed_size)+'\\n') ''' infile_gps = os.path.splitext(infile)[0] + ".DZG" infile_dzx = os.path.splitext(infile)[0] + ".DZX" infile = open(infile, 'rb') header = {} header['infile'] = infile.name header['known_ant'] = [None, None, None, None] header['dzt_ant'] = [None, None, None, None] header['rh_ant'] = [None, None, None, None] header['rh_antname'] = [None, None, None, None] header['antfreq'] = [None, None, None, None] header['timezero'] = [None, None, None, None] # begin read header['rh_tag'] = struct.unpack( '<h', infile.read(2))[0] # 0x00ff if header, 0xfnff if old file format header['rh_data'] = struct.unpack( '<h', infile.read(2))[0] # offset to data from beginning of file header['rh_nsamp'] = struct.unpack('<h', infile.read(2))[0] # samples per scan header['rh_bits'] = struct.unpack('<h', infile.read(2))[0] # bits per data word header['rh_zero'] = struct.unpack( '<h', infile.read(2) )[0] # if sir-30 or utilityscan df, then repeats per sample; otherwise 0x80 for 8bit and 0x8000 for 16bit header['rhf_sps'] = struct.unpack('<f', infile.read(4))[0] # scans per second header['dzt_sps'] = header['rhf_sps'] header['rhf_spm'] = struct.unpack('<f', infile.read(4))[0] # scans per meter header['dzt_spm'] = header['rhf_spm'] if spm: header['rhf_spm'] = spm header['rhf_mpm'] = struct.unpack('<f', infile.read(4))[0] # meters per mark header['rhf_position'] = struct.unpack('<f', infile.read(4))[0] # position (ns) header['rhf_range'] = struct.unpack('<f', infile.read(4))[0] # range (ns) header['rh_npass'] = struct.unpack( '<h', infile.read(2))[0] # number of passes for 2-D files # bytes 32-36 and 36-40: creation and modification date and time in bits # structured as little endian u5u6u5u5u4u7 infile.seek(32) try: header['rhb_cdt'] = readtime(infile.read(4)) except: header['rhb_cdt'] = datetime(1980, 1, 1) try: header['rhb_mdt'] = readtime(infile.read(4)) except: header['rhb_mdt'] = datetime(1980, 1, 1) header['rh_rgain'] = struct.unpack( '<h', infile.read(2))[0] # offset to range gain function header['rh_nrgain'] = struct.unpack( '<h', infile.read(2))[0] # size of range gain function infile.seek(header['rh_rgain']) try: header['rgain_bytes'] = infile.read(header['rh_nrgain']) except: fx.printmsg('WARNING: Could not read range gain function') infile.seek(44) header['rh_text'] = struct.unpack('<h', infile.read(2))[0] # offset to text header['rh_ntext'] = struct.unpack('<h', infile.read(2))[0] # size of text header['rh_proc'] = struct.unpack( '<h', infile.read(2))[0] # offset to processing history header['rh_nproc'] = struct.unpack( '<h', infile.read(2))[0] # size of processing history header['rh_nchan'] = struct.unpack('<h', infile.read(2))[0] # number of channels if epsr != None: # in this case the user has specified an epsr value header['dzt_epsr'] = struct.unpack('<f', infile.read(4))[0] header['rhf_epsr'] = epsr else: header['rhf_epsr'] = struct.unpack('<f', infile.read(4))[ 0] # epsr (sometimes referred to as "dielectric permittivity") header['dzt_epsr'] = header['rhf_epsr'] # calculate relative wave celerity given epsr value(s) header['cr'] = 1 / math.sqrt(Mu_0 * Eps_0 * header['rhf_epsr']) header['cr_true'] = 1 / math.sqrt(Mu_0 * Eps_0 * header['dzt_epsr']) header['rhf_top'] = struct.unpack( '<f', infile.read(4) )[0] # from experimentation, it seems this is the data top position in meters header['dzt_depth'] = struct.unpack('<f', infile.read(4))[ 0] # range in meters based on DZT rhf_epsr, before subtracting rhf_top if (header['dzt_depth'] == 0): # if dzt depth is 0, we need to calculate it using cr and rhf_range (converted to seconds) header['dzt_depth'] = header['cr'] * (header['rhf_range'] * (10**(-10))) header['rhf_depth'] = header['dzt_depth'] * ( math.sqrt(header['dzt_epsr']) / math.sqrt(header['rhf_epsr']) ) # range based on user epsr, before subtracting rhf_top # getting into largely useless territory (under "normal" operation) header['rh_xstart'] = struct.unpack( '<f', infile.read(4))[0] # starting x grid coordinate? part of rh_coordx header['rh_xend'] = struct.unpack( '<f', infile.read(4))[0] # ending x grid coordinate? part of rh_coordx header['rhf_servo_level'] = struct.unpack( '<f', infile.read(4))[0] # gain servo level # 3 "reserved" bytes infile.seek(81) header['rh_accomp'] = struct.unpack( 'B', infile.read(1))[0] # Ant Conf component header['rh_sconfig'] = struct.unpack( '<h', infile.read(2))[0] # setup config number header['rh_spp'] = struct.unpack('<h', infile.read(2))[0] # scans per pass header['rh_linenum'] = struct.unpack('<h', infile.read(2))[0] # line number header['rh_ystart'] = struct.unpack( '<f', infile.read(4))[0] # starting y grid coordinate? part of rh_coordx header['rh_yend'] = struct.unpack( '<f', infile.read(4))[0] # ending y grid coordinate? part of rh_coordx header['rh_96'] = infile.read(1) header['rh_lineorder'] = int( '{0:08b}'.format(ord(header['rh_96']))[::-1][4:], 2) header['rh_slicetype'] = int( '{0:08b}'.format(ord(header['rh_96']))[::-1][:4], 2) header['rh_dtype'] = infile.read(1) # no description of dtype freq = [None, None, None, None] for i in range(header['rh_nchan']): if (antfreq != None) and (antfreq != [None, None, None, None]): try: freq[i] = antfreq[i] except (TypeError, IndexError) as e: freq[i] = 200 print( 'WARNING: due to an error, antenna %s frequency was set to 200 MHz' % (i)) print('Error detail: %s' % (e)) curpos = infile.tell() # read frequencies for multiple antennae for chan in list(range(header['rh_nchan'])): if chan == 0: infile.seek(98) # start of antenna section else: infile.seek(98 + (MINHEADSIZE * (chan))) # start of antenna bytes for channel n header['dzt_ant'][chan] = infile.read(14) header['rh_ant'][chan] = header['dzt_ant'][chan].decode('utf-8').split( '\x00')[0] header['rh_antname'][chan] = header['rh_ant'][chan].rsplit('x')[0] try: header['antfreq'][chan] = ANT[header['rh_antname'][chan]] header['known_ant'][chan] = True except KeyError: header['known_ant'][chan] = False try: header['antfreq'][chan] = int("".join( takewhile( str.isdigit, header['rh_ant'][chan].replace('D5', '').replace( 'D6', '')))) # hoping this works except ValueError: header['antfreq'] = freq #header['antfreq'][chan] = int(header['rh_antname'][chan].replace('D5','').replace('D6','')) infile.seek(curpos + 14) header['rh_112'] = infile.read(1) header['rh_lineorder'] = int('{0:08b}'.format(ord(header['rh_112']))[4:], 2) header['rh_slicetype'] = int('{0:08b}'.format(ord(header['rh_112']))[:4], 2) #infile.seek(113) # byte 113 header['vsbyte'] = infile.read(1) # byte containing versioning bits header['rh_version'] = int( '{0:08b}'.format(ord(header['vsbyte']))[5:], 2 ) # ord(vsbyte) >> 5 # whether or not the system is GPS-capable, 1=no 2=yes (does not mean GPS is in file) header['rh_system'] = int( '{0:08b}'.format(ord(header['vsbyte']))[:5], 2 ) # ord(vsbyte) >> 3 ## the system type (values in UNIT={...} dictionary in constants.py) header['rh_name'] = infile.read(12) header['rh_chksum'] = infile.read(2) header['INFOAREA'] = infile.read(MINHEADSIZE - PAREASIZE - GPSAREASIZE) header['rh_RGPS0'] = infile.read(RGPSSIZE) header['rh_RGPS1'] = infile.read(RGPSSIZE) if header[ 'rh_system'] == 14: # hardcoded because this is so frustrating. assuming no other antennas can be paired with SS Mini XT header['rh_antname'] = ['SSMINIXT', None, None, None] header['antfreq'] = [2700, None, None, None] header['known_ant'] = [True, False, False, False] if header[ 'rh_data'] < MINHEADSIZE: # whether or not the header is normal or big-->determines offset to data array header['data_offset'] = MINHEADSIZE * header['rh_data'] else: header['data_offset'] = MINHEADSIZE * header['rh_nchan'] infile.seek(MINHEADSIZE * header['rh_nchan']) header['header_extra'] = infile.read(header['data_offset'] - (MINHEADSIZE * header['rh_nchan'])) if header['rh_bits'] == 8: dtype = np.uint8 # 8-bit unsigned elif header['rh_bits'] == 16: dtype = np.uint16 # 16-bit unsigned else: dtype = np.int32 # 32-bit signed header['dtype'] = dtype if start_scan != 0: try: # calculate start offset in bytes: start_offset = int(start_scan * header['rh_nchan'] * header['rh_nsamp'] * header['rh_bits'] / 8) except ValueError: # if this fails, then fall back to 0 offset. start_offset = 0 fx.printmsg( 'WARNING: ValueError for scan offset: {start_scan} (reading from start of data)' ) # consider returning No Data? else: start_offset = 0 if num_scans != -1: try: num_items = int(num_scans * header['rh_nsamp'] * header['rh_nchan']) except ValueError: # if this fails then get all scans... fx.printmsg( 'WARNING: ValueError for number of scans: {num_scans} (reading all items from {start_scan} scans)' ) num_items = -1 else: num_items = -1 # read in and transpose data data = np.fromfile(infile, dtype, count=num_items) data = data.reshape( -1, (header['rh_nsamp'] * header['rh_nchan'])) # offset=start_offset, data = data.T header['shape'] = data.shape header['ns_per_zsample'] = ((header['rhf_depth'] - header['rhf_top']) * 2) / (header['rh_nsamp'] * header['cr']) header['samp_freq'] = 1 / ((header['dzt_depth'] * 2) / (header['rh_nsamp'] * header['cr_true'])) try: header['sec'] = data.shape[1] / float(header['rhf_sps']) except ZeroDivisionError: header['sec'] = 1. infile.close() for i in range(header['rh_nchan']): try: header['timezero'][i] = int(list(zero)[i]) except (TypeError, IndexError): fx.printmsg( 'WARNING: no time zero specified for channel %s, defaulting to rh_zero value (%s)' % (i, header['rh_zero'])) header['timezero'][i] = header['rh_zero'] if os.path.isfile(infile_gps): try: if verbose: fx.printmsg('reading GPS file...') gps = readdzg(infile_gps, 'dzg', header, verbose=verbose) except IOError as e0: fx.printmsg('WARNING: cannot read DZG file') try: infile_gps = os.path.splitext(infile_gps)[0] + ".csv" gps = readdzg(infile_gps, 'csv', header, verbose=verbose) except Exception as e1: try: infile_gps = os.path.splitext(infile_gps)[0] + ".CSV" gps = readdzg(infile_gps, 'csv', header, verbose=verbose) except Exception as e2: fx.printmsg( 'ERROR reading GPS. distance normalization will not be possible.' ) fx.printmsg(' details: %s' % e0) fx.printmsg(' %s' % e1) fx.printmsg(' %s' % e2) gps = DataFrame() else: fx.printmsg('WARNING: no DZG file found for GPS input') gps = DataFrame() header['marks'] = [] header['picks'] = {} if os.path.isfile(infile_dzx): header['marks'] = get_user_marks(infile_dzx, verbose=verbose) header['picks'] = get_picks(infile_dzx, verbose=verbose) else: fx.printmsg( 'WARNING: could not find DZX file to read metadata. Trying to read array for marks...' ) tnums = np.ndarray.tolist( data[0]) # the first row of the array is trace number usr_marks = np.ndarray.tolist( data[1] ) # when the system type is SIR3000, the second row should be user marks (otherwise these are in the DZX, see note below) i = 0 for m in usr_marks: if m > 0: #print(m) header['marks'].append(i) i += 1 if len(header['marks']) == header['shape'][1]: fx.printmsg( 'number of marks matches the number of traces (%s). this is probably wrong, so throwing out the mark list.' % (len(header['marks']))) header['marks'] = [] else: fx.printmsg('DZT marks read successfully. marks: %s' % len(header['marks'])) fx.printmsg(' traces: %s' % header['marks']) # make a list of data by channel data = arraylist(header, data) return [header, data, gps]
def readdzt(infile, gps=False, spm=None, start_scan=0, num_scans=-1, epsr=None, verbose=False): """ Function to unpack and return things the program needs from the file header, and the data itself. :param str infile: The DZT file location :param bool gps: Whether a GPS file exists. Defaults to False, but changed to :py:class:`pandas.DataFrame` if a DZG file with the same name as :code:`infile` exists. :param float spm: User value of samples per meter, if specified. Defaults to None. :param float epsr: User value of relative permittivity, if specified. Defaults to None. :param bool verbose: Verbose, defaults to False :rtype: header (:py:class:`dict`), radar array (:py:class:`numpy.ndarray`), gps (False or :py:class:`pandas.DataFrame`) """ ''' currently unused but potentially useful lines: # headerstruct = '<5h 5f h 4s 4s 7h 3I d I 3c x 3h d 2x 2c s s 14s s s 12s h 816s 76s' # the structure of the bytewise header and "gps data" as I understand it - 1024 bytes # readsize = (2,2,2,2,2,4,4,4,4,4,2,4,4,4,2,2,2,2,2,4,4,4,8,4,3,1,2,2,2,8,1,1,14,1,1,12,2) # the variable size of bytes in the header (most of the time) - 128 bytes # fx.printmsg('total header structure size: '+str(calcsize(headerstruct))) # packed_size = 0 # for i in range(len(readsize)): packed_size = packed_size+readsize[i] # fx.printmsg('fixed header size: '+str(packed_size)+'\\n') ''' infile_gps = os.path.splitext(infile)[0] + ".DZG" infile = open(infile, 'rb') header = {} header['infile'] = infile.name header['known_ant'] = [None, None, None, None] header['rh_ant'] = [None, None, None, None] header['rh_antname'] = [None, None, None, None] header['antfreq'] = [None, None, None, None] # begin read header['rh_tag'] = struct.unpack( '<h', infile.read(2))[0] # 0x00ff if header, 0xfnff if old file format header['rh_data'] = struct.unpack( '<h', infile.read(2))[0] # offset to data from beginning of file header['rh_nsamp'] = struct.unpack('<h', infile.read(2))[0] # samples per scan header['rh_bits'] = struct.unpack('<h', infile.read(2))[0] # bits per data word header['rh_zero'] = struct.unpack( '<h', infile.read(2) )[0] # if sir-30 or utilityscan df, then repeats per sample; otherwise 0x80 for 8bit and 0x8000 for 16bit header['rhf_sps'] = struct.unpack('<f', infile.read(4))[0] # scans per second header['rhf_spm'] = struct.unpack('<f', infile.read(4))[0] # scans per meter header['dzt_spm'] = header['rhf_spm'] if spm: header['rhf_spm'] = spm header['rhf_mpm'] = struct.unpack('<f', infile.read(4))[0] # meters per mark header['rhf_position'] = struct.unpack('<f', infile.read(4))[0] # position (ns) header['rhf_range'] = struct.unpack('<f', infile.read(4))[0] # range (ns) header['rh_npass'] = struct.unpack( '<h', infile.read(2))[0] # number of passes for 2-D files # bytes 32-36 and 36-40: creation and modification date and time in bits # structured as little endian u5u6u5u5u4u7 infile.seek(32) try: header['rhb_cdt'] = readtime(infile.read(4)) except: header['rhb_cdt'] = datetime(1980, 1, 1) try: header['rhb_mdt'] = readtime(infile.read(4)) except: header['rhb_mdt'] = datetime(1980, 1, 1) header['rh_rgain'] = struct.unpack( '<h', infile.read(2))[0] # offset to range gain function header['rh_nrgain'] = struct.unpack( '<h', infile.read(2))[0] # size of range gain function header['rh_text'] = struct.unpack('<h', infile.read(2))[0] # offset to text header['rh_ntext'] = struct.unpack('<h', infile.read(2))[0] # size of text header['rh_proc'] = struct.unpack( '<h', infile.read(2))[0] # offset to processing history header['rh_nproc'] = struct.unpack( '<h', infile.read(2))[0] # size of processing history header['rh_nchan'] = struct.unpack('<h', infile.read(2))[0] # number of channels if epsr != None: # in this case the user has specified an epsr value header['dzt_epsr'] = struct.unpack('<f', infile.read(4))[0] header['rhf_epsr'] = epsr else: header['rhf_epsr'] = struct.unpack('<f', infile.read(4))[ 0] # epsr (sometimes referred to as "dielectric permittivity") header['dzt_epsr'] = header['rhf_epsr'] header['rhf_top'] = struct.unpack( '<f', infile.read(4))[0] # position in meters (useless?) header['dzt_depth'] = struct.unpack( '<f', infile.read(4))[0] # range in meters based on DZT rhf_epsr header['rhf_depth'] = header['dzt_depth'] * ( math.sqrt(header['dzt_epsr']) / math.sqrt(header['rhf_epsr']) ) # range based on user epsr #rhf_coordx = struct.unpack('<ff', infile.read(8))[0] # this is definitely useless # read frequencies for multiple antennae for chan in list(range(header['rh_nchan'])): if chan == 0: infile.seek(98) # start of antenna section else: infile.seek(98 + (MINHEADSIZE * (chan))) # start of antenna bytes for channel n header['rh_ant'][chan] = infile.read(14).decode('utf-8').split( '\x00')[0] header['rh_antname'][chan] = header['rh_ant'][chan].rsplit('x')[0] try: header['antfreq'][chan] = ANT[header['rh_antname'][chan]] header['known_ant'][chan] = True except KeyError: header['known_ant'][chan] = False header['antfreq'][chan] = int("".join( takewhile( str.isdigit, header['rh_ant'][chan].replace( 'D5', '').replace('D6', '')))) # hoping this works #header['antfreq'][chan] = int(header['rh_antname'][chan].replace('D5','').replace('D6','')) infile.seek(113) # skip to something that matters vsbyte = infile.read(1) # byte containing versioning bits header['rh_version'] = ord( vsbyte ) >> 5 # whether or not the system is GPS-capable, 1=no 2=yes (does not mean GPS is in file) header['rh_system'] = ord( vsbyte) >> 3 # the system type (values in UNIT={...} dictionary above) infile.seek(header['rh_rgain']) try: header['rgain_bytes'] = infile.read(header['rh_nrgain']) except: pass if header[ 'rh_data'] < MINHEADSIZE: # whether or not the header is normal or big-->determines offset to data array infile.seek(MINHEADSIZE * header['rh_data']) header['data_offset'] = MINHEADSIZE * header['rh_data'] else: infile.seek(MINHEADSIZE * header['rh_nchan']) header['data_offset'] = MINHEADSIZE * header['rh_nchan'] if header['rh_bits'] == 8: dtype = np.uint8 # 8-bit unsigned elif header['rh_bits'] == 16: dtype = np.uint16 # 16-bit unsigned else: dtype = np.int32 # 32-bit signed if start_scan != 0: try: # calculate start offset in bytes: start_offset = int(start_scan * header['rh_nchan'] * header['rh_nsamp'] * header['rh_bits'] / 8) except ValueError: # if this fails, then fall back to 0 offset. start_offset = 0 fx.printmsg( 'WARNING: ValueError for scan offset: {start_scan} (reading from start of data)' ) # consider returning No Data? else: start_offset = 0 if num_scans != -1: try: num_items = int(num_scans * header['rh_nsamp'] * header['rh_nchan']) except ValueError: # if this fails then get all scans... fx.printmsg( 'WARNING: ValueError for number of scans: {num_scans} (reading all items from {start_scan} scans)' ) num_items = -1 else: num_items = -1 # read in and transpose data data = np.fromfile(infile, dtype, offset=start_offset, count=num_items).reshape( -1, (header['rh_nsamp'] * header['rh_nchan'])).T header['cr'] = 1 / math.sqrt(Mu_0 * Eps_0 * header['rhf_epsr']) header['cr_true'] = 1 / math.sqrt(Mu_0 * Eps_0 * header['dzt_epsr']) header['ns_per_zsample'] = (header['rhf_depth'] * 2) / (header['rh_nsamp'] * header['cr']) header['samp_freq'] = 1 / ((header['dzt_depth'] * 2) / (header['rh_nsamp'] * header['cr_true'])) try: header['sec'] = data.shape[1] / float(header['rhf_sps']) except ZeroDivisionError: header['sec'] = 1. infile.close() if os.path.isfile(infile_gps): try: if verbose: fx.printmsg('reading GPS file...') gps = readdzg(infile_gps, 'dzg', header, verbose=verbose) except IOError as e0: fx.printmsg('WARNING: cannot read DZG file') try: infile_gps = os.path.splitext(infile_gps)[0] + ".csv" gps = readdzg(infile_gps, 'csv', header, verbose=verbose) except Exception as e1: try: infile_gps = os.path.splitext(infile_gps)[0] + ".CSV" gps = readdzg(infile_gps, 'csv', header, verbose=verbose) except Exception as e2: fx.printmsg( 'ERROR reading GPS. distance normalization will not be possible.' ) fx.printmsg(' details: %s' % e0) fx.printmsg(' %s' % e1) fx.printmsg(' %s' % e2) gps = [] else: fx.printmsg('WARNING: no DZG file found for GPS input') return [header, data, gps]
def h5(ar, infile_basename, outfile_abspath, header, verbose=False): """ .. warning:: HDF5 output is not yet available. In the future, this function will output to HDF5 format. :param numpy.ndarray ar: Radar array :param str infile_basename: Input file basename :param str outfile_abspath: Output file path :param dict header: File header dictionary to write, if desired. Defaults to None. :param bool verbose: Verbose, defaults to False """ ''' Assumptions: - constant velocity between marks (may be possible to add a check) - marks are made at same time on GPS and SIR - gps and gpr are in same location when mark is made - good quality horizontal solution single-channel IceRadar h5 structure is /line_x/location_n/datacapture_0/echogram_0 (/group/group/group/dataset) each dataset has an 'attributes' item attached, formatted in 'collections.defaultdict' style: [('PCSavetimestamp', str), ('GPS Cluster- MetaData_xml', str), ('Digitizer-MetaData_xml', str), ('GPS Cluster_UTM-MetaData_xml', str)] ''' if verbose: fx.printmsg('output format is IceRadar HDF5. writing file to: %s' % outfile_abspath) # setup formattable strings svts = 'PCSavetimestamp' gpsx = 'GPS Cluster- MetaData_xml' # main gps string. 8 formattable values: gps_sec, lat, lon, qual, num_sats, hdop, altitude, geoid_ht gpsclstr = '<Cluster>\r\n<Name>GPS Cluster</Name>\r\n<NumElts>10</NumElts>\r\n<String>\r\n<Name>GPS_timestamp_UTC</Name>\r\n<Val>%.2f</Val>\r\n</String>\r\n<String>\r\n<Name>Lat_N</Name>\r\n<Val>%.4f</Val>\r\n</String>\r\n<String>\r\n<Name>Long_ W</Name>\r\n<Val>%.4f</Val>\r\n</String>\r\n<String>\r\n<Name>Fix_Quality</Name>\r\n<Val>%i</Val>\r\n</String>\r\n<String>\r\n<Name>Num _Sat</Name>\r\n<Val>%i</Val>\r\n</String>\r\n<String>\r\n<Name>Dilution</Name>\r\n<Val>%.2f</Val>\r\n</String>\r\n<String>\r\n<Name>Alt_asl_m</Name>\r\n<Val>%.2f</Val>\r\n</String>\r\n<String>\r\n<Name>Geoid_Heigh_m</Name>\r\n<Val>%.2f</Val>\r\n</String>\r\n<Boolean>\r\n<Name>GPS Fix valid</Name>\r\n<Val>1</Val>\r\n</Boolean>\r\n<Boolean>\r\n<Name>GPS Message ok</Name>\r\n<Val>1</Val>\r\n</Boolean>\r\n</Cluster>\r\n' dimx = 'Digitizer-MetaData_xml' # digitizer string. 3 formattable values: rhf_depth, rh_nsamp, stack dimxstr = '<Cluster>\r\n<Name>Digitizer MetaData</Name>\r\n<NumElts>3</NumElts>\r\n<Cluster>\r\n<Name>Digitizer settings</Name>\r\n<NumElts>5</NumElts>\r\n<Cluster>\r\n<Name>Vertical</Name>\r\n<NumElts>3</NumElts>\r\n<DBL>\r\n<Name>vertical range</Name>\r\n<Val>%f</Val>\r\n</DBL>\r\n<DBL>\r\n<Name>Vertical Offset</Name>\r\n<Val>0.00000000000000</Val>\r\n</DBL>\r\n<I32>\r\n<Name>vertical coupling</Name>\r\n<Val>1</Val>\r\n</I32>\r\n</Cluster>\r\n<Cluster>\r\n<Name>Channel</Name>\r\n<NumElts>1</NumElts>\r\n<DBL>\r\n<Name>maximum input frequency</Name>\r\n<Val>%f</Val>\r\n</DBL>\r\n</Cluster>\r\n<Cluster>\r\n<Name>Horizontal</Name>\r\n<NumElts>2</NumElts>\r\n<DBL>\r\n<Name> Sample Rate</Name>\r\n<Val>250000000.00000000000000</Val>\r\n</DBL>\r\n<I32>\r\n<Name>Record Length</Name>\r\n<Val>%i</Val>\r\n</I32>\r\n</Cluster>\r\n<Cluster>\r\n<Name>Trigger</Name>\r\n<NumElts>12</NumElts>\r\n<U16>\r\n<Name>trigger type</Name>\r\n<Val>0</Val>\r\n</U16>\r\n<DBL>\r\n<Name>trigger delay</Name>\r\n<Val>0.00000000000000</Val>\r\n</DBL>\r\n<DBL>\r\n<Name>reference position</Name>\r\n<Val>10.00000000000000</Val>\r\n</DBL>\r\n<DBL>\r\n<Name>trigger level</Name>\r\n<Val>2.00000000000000E-2</Val>\r\n</DBL>\r\n<DBL>\r\n<Name>hysteresis</Name>\r\n<Val>0.00000000000000</Val>\r\n</DBL>\r\n<DBL>\r\n<Name>low level</Name>\r\n<Val>0.00000000000000</Val>\r\n</DBL>\r\n<DBL>\r\n<Name>high level</Name>\r\n<Val>0.00000000000000</Val>\r\n</DBL>\r\n<U16>\r\n<Name>trigger coupling</Name>\r\n<Val>1</Val>\r\n</U16>\r\n<I32>\r\n<Name>trigger window mode</Name>\r\n<Val>0</Val>\r\n</I32>\r\n<I32>\r\n<Name>trigger slope</Name>\r\n<Val>0</Val>\r\n</I32>\r\n<String>\r\n<Name>trigger source</Name>\r\n<Val>0</Val>\r\n</String>\r\n<I32>\r\n<Name>Trigger Modifier</Name>\r\n<Val>2</Val>\r\n</I32>\r\n</Cluster>\r\n<String>\r\n<Name>channel name</Name>\r\n<Val>0</Val>\r\n</String>\r\n</Cluster>\r\n<U16>\r\n<Name>Stacking</Name>\r\n<Val>%i</Val>\r\n</U16>\r\n<Cluster>\r\n<Name>Radargram extra info</Name>\r\n<NumElts>2</NumElts>\r\n<DBL>\r\n<Name>relativeInitialX</Name>\r\n<Val>-1.51999998365682E-7</Val>\r\n</DBL>\r\n<DBL>\r\n<Name>xIncrement</Name>\r\n<Val>3.99999988687227E-9</Val>\r\n</DBL>\r\n</Cluster>\r\n</Cluster>\r\n' gutx = 'GPS Cluster_UTM-MetaData_xml' # gps UTM string. 1 formattable value: num_sats gpsutmstr = '<Cluster>\r\n<Name>GPS_UTM Cluster</Name>\r\n<NumElts>10</NumElts>\r\n<String>\r\n<Name>Datum</Name>\r\n<Val>NaN</Val>\r\n</String>\r\n<String>\r\n<Name>Easting_m</Name>\r\n<Val></Val>\r\n</String>\r\n<String>\r\n<Name>Northing_m</Name>\r\n<Val>NaN</Val>\r\n</String>\r\n<String>\r\n<Name>Elevation</Name>\r\n<Val>NaN</Val>\r\n</String>\r\n<String>\r\n<Name>Zone</Name>\r\n<Val>NaN</Val>\r\n</String>\r\n<String>\r\n<Name>Satellites (dup)</Name>\r\n<Val>%i</Val>\r\n</String>\r\n<Boolean>\r\n<Name>GPS Fix Valid (dup)</Name>\r\n<Val>1</Val>\r\n</Boolean>\r\n<Boolean>\r\n<Name>GPS Message ok (dup)</Name>\r\n<Val>1</Val>\r\n</Boolean>\r\n<Boolean>\r\n<Name>Flag_1</Name>\r\n<Val>0</Val>\r\n</Boolean>\r\n<Boolean>\r\n<Name>Flag_2</Name>\r\n<Val>0</Val>\r\n</Boolean>\r\n</Cluster>\r\n' if os.path.exists(infile_basename + '.DZG'): gps = readdzg(infile_basename + '.DZG', 'dzg', header['rhf_sps'], ar.shape[1], verbose) else: gps = '' # if there's no DZG file...need a way to parse another gps source if possible # make data structure n = 0 # line number, iteratively increased f = h5py.File('%s.h5' % (outfile_abspath), 'w') # overwrite existing file if verbose: fx.printmsg('exporting to %s.h5' % outfile_abspath) try: li = f.create_group('line_0') # create line zero except ValueError: # the line already exists in the file li = f['line_0'] for sample in ar.T: # create strings # pcsavetimestamp # formatting: m/d/yyyy_h:m:ss PM svts_str = gps[n]['timestamp'].astype(datetime).strftime( '%m/%d/%Y_%H:%M:%S %p') # gpscluster # order we need: (len(list), tracetime, y, x, q, sats, dil, z, gh, 1, 1) # rows in gps: tracenum, lat, lon, altitude, geoid_ht, qual, num_sats, hdop, timestamp gpsx_str = gpsclstr % (gps[n]['gps_sec'], gps[n]['lat'], gps[n]['lon'], gps[n]['qual'], gps[n]['num_sats'], gps[n]['hdop'], gps[n]['altitude'], gps[n]['geoid_ht']) # digitizer dimx_str = dimxstr % (r[0]['rhf_depth'], freq, r[0]['rh_nsamp'], r[0]['stack']) # utm gpscluster gutx_str = gpsutmstr % (gps[n]['num_sats']) lo = li.create_group('location_' + str(n)) # create a location for each trace dc = lo.create_group('datacapture_0') eg = dc.create_dataset('echogram_0', (ar.shape[0], ), data=sample) eg.attrs.create(svts, svts_str) # store pcsavetimestamp attribute eg.attrs.create(gpsx, gpsx_str) # store gpscluster attribute eg.attrs.create(dimx, dimx_str) # store digitizer attribute eg.attrs.create(gutx, gutx_str) # store utm gpscluster attribute n += 1 f.close()