Beispiel #1
0
def readdzt(infile, gps=False, spm=None, epsr=None, verbose=False):
    """
    function to unpack and return things we need from the header, and the data itself
    currently unused but potentially useful lines:
    # headerstruct = '<5h 5f h 4s 4s 7h 3I d I 3c x 3h d 2x 2c s s 14s s s 12s h 816s 76s' # the structure of the bytewise header and "gps data" as I understand it - 1024 bytes
    # readsize = (2,2,2,2,2,4,4,4,4,4,2,4,4,4,2,2,2,2,2,4,4,4,8,4,3,1,2,2,2,8,1,1,14,1,1,12,2) # the variable size of bytes in the header (most of the time) - 128 bytes
    # fx.printmsg('total header structure size: '+str(calcsize(headerstruct)))
    # packed_size = 0
    # for i in range(len(readsize)): packed_size = packed_size+readsize[i]
    # fx.printmsg('fixed header size: '+str(packed_size)+'\n')
    """
    infile_gps = os.path.splitext(infile)[0] + ".DZG"
    infile = open(infile, 'rb')
    header = {}
    header['infile'] = infile.name
    header['known_ant'] = [None, None, None, None]
    header['rh_ant'] = [None, None, None, None]
    header['rh_antname'] = [None, None, None, None]
    header['antfreq'] = [None, None, None, None]

    # begin read

    header['rh_tag'] = struct.unpack(
        '<h', infile.read(2))[0]  # 0x00ff if header, 0xfnff if old file format
    header['rh_data'] = struct.unpack(
        '<h', infile.read(2))[0]  # offset to data from beginning of file
    header['rh_nsamp'] = struct.unpack('<h',
                                       infile.read(2))[0]  # samples per scan
    header['rh_bits'] = struct.unpack('<h',
                                      infile.read(2))[0]  # bits per data word
    header['rh_zero'] = struct.unpack(
        '<h', infile.read(2)
    )[0]  # if sir-30 or utilityscan df, then repeats per sample; otherwise 0x80 for 8bit and 0x8000 for 16bit
    header['rhf_sps'] = struct.unpack('<f',
                                      infile.read(4))[0]  # scans per second
    header['rhf_spm'] = struct.unpack('<f',
                                      infile.read(4))[0]  # scans per meter
    header['dzt_spm'] = header['rhf_spm']
    if spm:
        header['rhf_spm'] = spm

    header['rhf_mpm'] = struct.unpack('<f',
                                      infile.read(4))[0]  # meters per mark
    header['rhf_position'] = struct.unpack('<f',
                                           infile.read(4))[0]  # position (ns)
    header['rhf_range'] = struct.unpack('<f', infile.read(4))[0]  # range (ns)
    header['rh_npass'] = struct.unpack(
        '<h', infile.read(2))[0]  # number of passes for 2-D files
    # bytes 32-36 and 36-40: creation and modification date and time in bits
    # structured as little endian u5u6u5u5u4u7
    infile.seek(32)
    try:
        header['rhb_cdt'] = readtime(infile.read(4))
    except:
        header['rhb_cdt'] = datetime(1980, 1, 1)
    try:
        header['rhb_mdt'] = readtime(infile.read(4))
    except:
        header['rhb_mdt'] = datetime(1980, 1, 1)
    header['rh_rgain'] = struct.unpack(
        '<h', infile.read(2))[0]  # offset to range gain function
    header['rh_nrgain'] = struct.unpack(
        '<h', infile.read(2))[0]  # size of range gain function
    header['rh_text'] = struct.unpack('<h',
                                      infile.read(2))[0]  # offset to text
    header['rh_ntext'] = struct.unpack('<h', infile.read(2))[0]  # size of text
    header['rh_proc'] = struct.unpack(
        '<h', infile.read(2))[0]  # offset to processing history
    header['rh_nproc'] = struct.unpack(
        '<h', infile.read(2))[0]  # size of processing history
    header['rh_nchan'] = struct.unpack('<h',
                                       infile.read(2))[0]  # number of channels
    if epsr != None:
        header['dzt_epsr'] = struct.unpack('<f', infile.read(4))[0]
        header['rhf_epsr'] = epsr
    else:
        header['rhf_epsr'] = struct.unpack('<f', infile.read(4))[
            0]  # epsr (sometimes referred to as "dielectric permittivity")
        header['dzt_epsr'] = header['rhf_epsr']
    header['rhf_top'] = struct.unpack(
        '<f', infile.read(4))[0]  # position in meters (useless?)
    header['dzt_depth'] = struct.unpack(
        '<f', infile.read(4))[0]  # range in meters based on DZT rhf_epsr
    header['rhf_depth'] = header['dzt_depth'] * (
        math.sqrt(header['dzt_epsr']) / math.sqrt(header['rhf_epsr'])
    )  # range based on user epsr
    #rhf_coordx = struct.unpack('<ff', infile.read(8))[0] # this is definitely useless

    # read frequencies for multiple antennae (This finds the antenna type of the current scan and find the center frequency)
    """
    for chan in list(range(header['rh_nchan'])):
        if chan == 0:
            infile.seek(98) # start of antenna section
        else:
            infile.seek(98 + (MINHEADSIZE*(chan))) # start of antenna bytes for channel n

        # TODO: Find out why rh_antname doesn't show up
        header['rh_ant'][chan] = infile.read(14).decode('utf-8').split('\x00')[0] # This is some other identifying number
        header['rh_antname'][chan] = header['rh_ant'][chan].rsplit('x')[0] # This is the names of the current antenna
        try:
            header['antfreq'][chan] = ANT[header['rh_antname'][chan]]
            header['known_ant'][chan] = True
        except KeyError:
            header['known_ant'][chan] = False
            header['antfreq'][chan] = int("".join(takewhile(str.isdigit, header['rh_ant'][chan].replace('D5','').replace('D6','')))) # hoping this works
            #header['antfreq'][chan] = int(header['rh_antname'][chan].replace('D5','').replace('D6',''))
    """
    # This is BirsView modification to enable the library to work with GSSI Mini XT

    header['rh_ant'] = None
    header['rh_antname'] = '62300XT'
    header['antfreq'] = 2300
    header['known_ant'] = True

    infile.seek(113)  # skip to something that matters
    vsbyte = infile.read(1)  # byte containing versioning bits
    header['rh_version'] = ord(
        vsbyte
    ) >> 5  # whether or not the system is GPS-capable, 1=no 2=yes (does not mean GPS is in file)
    header['rh_system'] = ord(
        vsbyte) >> 3  # the system type (values in UNIT={...} dictionary above)

    infile.seek(header['rh_rgain'])
    try:
        header['rgain_bytes'] = infile.read(header['rh_nrgain'])
    except:
        pass

    if header[
            'rh_data'] < MINHEADSIZE:  # whether or not the header is normal or big-->determines offset to data array
        infile.seek(MINHEADSIZE * header['rh_data'])
        header['data_offset'] = MINHEADSIZE * header['rh_data']
    else:
        infile.seek(MINHEADSIZE * header['rh_nchan'])
        header['data_offset'] = MINHEADSIZE * header['rh_nchan']

    if header['rh_bits'] == 8:
        dtype = np.uint8  # 8-bit unsigned
    elif header['rh_bits'] == 16:
        dtype = np.uint16  # 16-bit unsigned
    else:
        dtype = np.int32  # 32-bit signed

    # read in and transpose data
    data = np.fromfile(infile, dtype).reshape(
        -1, (header['rh_nsamp'] * header['rh_nchan'])).T

    header['cr'] = 1 / math.sqrt(Mu_0 * Eps_0 * header['rhf_epsr'])
    header['ns_per_zsample'] = header['rhf_depth'] / (data.shape[0] *
                                                      header['cr'])

    try:
        header['sec'] = data.shape[1] / float(header['rhf_sps'])
    except ZeroDivisionError:
        header['sec'] = 1.
    header['traces'] = int(data.shape[1] / header['rh_nchan'])

    infile.close()

    if gps:
        try:
            if verbose:
                fx.printmsg('reading GPS file...')
            gps = readdzg(infile_gps, 'dzg', header, verbose=verbose)
        except IOError as e0:
            fx.printmsg('WARNING: no DZG file found')
            try:
                infile_gps = os.path.splitext(infile_gps)[0] + ".csv"
                gps = readdzg(infile_gps, 'csv', header, verbose=verbose)
            except Exception as e1:
                try:
                    infile_gps = os.path.splitext(infile_gps)[0] + ".CSV"
                    gps = readdzg(infile_gps, 'csv', header, verbose=verbose)
                except Exception as e2:
                    fx.printmsg(
                        'ERROR reading GPS. distance normalization will not be possible.'
                    )
                    fx.printmsg('   details: %s' % e0)
                    fx.printmsg('            %s' % e1)
                    fx.printmsg('            %s' % e2)
                    gps = []
    else:
        pass

    return [header, data, gps]
Beispiel #2
0
def readdzt(infile,
            gps=DataFrame(),
            spm=None,
            start_scan=0,
            num_scans=-1,
            epsr=None,
            antfreq=[None, None, None, None],
            verbose=False,
            zero=[None, None, None, None]):
    """
    Function to unpack and return things the program needs from the file header, and the data itself.

    :param str infile: The DZT file location
    :param bool gps: Whether a GPS file exists. Defaults to False, but changed to :py:class:`pandas.DataFrame` if a DZG file with the same name as :code:`infile` exists.
    :param float spm: User value of samples per meter, if specified. Defaults to None.
    :param float epsr: User value of relative permittivity, if specified. Defaults to None.
    :param list[int,int,int,int] zero: List of time-zero values per channel. Defaults to a list of :code:`None` values, which resolves to :code:`rh_zero`.
    :param bool verbose: Verbose, defaults to False
    :rtype: header (:py:class:`dict`), radar array (:py:class:`numpy.ndarray`), gps (False or :py:class:`pandas.DataFrame`)
    """
    '''
    currently unused but potentially useful lines:
    # headerstruct = '<5h 5f h 4s 4s 7h 3I d I 3c x 3h d 2x 2c s s 14s s s 12s h 816s 76s' # the structure of the bytewise header and "gps data" as I understand it - 1024 bytes
    # readsize = (2,2,2,2,2,4,4,4,4,4,2,4,4,4,2,2,2,2,2,4,4,4,8,4,3,1,2,2,2,8,1,1,14,1,1,12,2) # the variable size of bytes in the header (most of the time) - 128 bytes
    # fx.printmsg('total header structure size: '+str(calcsize(headerstruct)))
    # packed_size = 0
    # for i in range(len(readsize)): packed_size = packed_size+readsize[i]
    # fx.printmsg('fixed header size: '+str(packed_size)+'\\n')
    '''
    infile_gps = os.path.splitext(infile)[0] + ".DZG"
    infile_dzx = os.path.splitext(infile)[0] + ".DZX"
    infile = open(infile, 'rb')
    header = {}
    header['infile'] = infile.name
    header['known_ant'] = [None, None, None, None]
    header['dzt_ant'] = [None, None, None, None]
    header['rh_ant'] = [None, None, None, None]
    header['rh_antname'] = [None, None, None, None]
    header['antfreq'] = [None, None, None, None]
    header['timezero'] = [None, None, None, None]

    # begin read

    header['rh_tag'] = struct.unpack(
        '<h', infile.read(2))[0]  # 0x00ff if header, 0xfnff if old file format
    header['rh_data'] = struct.unpack(
        '<h', infile.read(2))[0]  # offset to data from beginning of file
    header['rh_nsamp'] = struct.unpack('<h',
                                       infile.read(2))[0]  # samples per scan
    header['rh_bits'] = struct.unpack('<h',
                                      infile.read(2))[0]  # bits per data word
    header['rh_zero'] = struct.unpack(
        '<h', infile.read(2)
    )[0]  # if sir-30 or utilityscan df, then repeats per sample; otherwise 0x80 for 8bit and 0x8000 for 16bit
    header['rhf_sps'] = struct.unpack('<f',
                                      infile.read(4))[0]  # scans per second
    header['dzt_sps'] = header['rhf_sps']
    header['rhf_spm'] = struct.unpack('<f',
                                      infile.read(4))[0]  # scans per meter
    header['dzt_spm'] = header['rhf_spm']
    if spm:
        header['rhf_spm'] = spm

    header['rhf_mpm'] = struct.unpack('<f',
                                      infile.read(4))[0]  # meters per mark
    header['rhf_position'] = struct.unpack('<f',
                                           infile.read(4))[0]  # position (ns)
    header['rhf_range'] = struct.unpack('<f', infile.read(4))[0]  # range (ns)
    header['rh_npass'] = struct.unpack(
        '<h', infile.read(2))[0]  # number of passes for 2-D files
    # bytes 32-36 and 36-40: creation and modification date and time in bits
    # structured as little endian u5u6u5u5u4u7
    infile.seek(32)
    try:
        header['rhb_cdt'] = readtime(infile.read(4))
    except:
        header['rhb_cdt'] = datetime(1980, 1, 1)
    try:
        header['rhb_mdt'] = readtime(infile.read(4))
    except:
        header['rhb_mdt'] = datetime(1980, 1, 1)
    header['rh_rgain'] = struct.unpack(
        '<h', infile.read(2))[0]  # offset to range gain function
    header['rh_nrgain'] = struct.unpack(
        '<h', infile.read(2))[0]  # size of range gain function
    infile.seek(header['rh_rgain'])
    try:
        header['rgain_bytes'] = infile.read(header['rh_nrgain'])
    except:
        fx.printmsg('WARNING: Could not read range gain function')
    infile.seek(44)
    header['rh_text'] = struct.unpack('<h',
                                      infile.read(2))[0]  # offset to text
    header['rh_ntext'] = struct.unpack('<h', infile.read(2))[0]  # size of text
    header['rh_proc'] = struct.unpack(
        '<h', infile.read(2))[0]  # offset to processing history
    header['rh_nproc'] = struct.unpack(
        '<h', infile.read(2))[0]  # size of processing history
    header['rh_nchan'] = struct.unpack('<h',
                                       infile.read(2))[0]  # number of channels
    if epsr != None:  # in this case the user has specified an epsr value
        header['dzt_epsr'] = struct.unpack('<f', infile.read(4))[0]
        header['rhf_epsr'] = epsr
    else:
        header['rhf_epsr'] = struct.unpack('<f', infile.read(4))[
            0]  # epsr (sometimes referred to as "dielectric permittivity")
        header['dzt_epsr'] = header['rhf_epsr']

    # calculate relative wave celerity given epsr value(s)
    header['cr'] = 1 / math.sqrt(Mu_0 * Eps_0 * header['rhf_epsr'])
    header['cr_true'] = 1 / math.sqrt(Mu_0 * Eps_0 * header['dzt_epsr'])

    header['rhf_top'] = struct.unpack(
        '<f', infile.read(4)
    )[0]  # from experimentation, it seems this is the data top position in meters
    header['dzt_depth'] = struct.unpack('<f', infile.read(4))[
        0]  # range in meters based on DZT rhf_epsr, before subtracting rhf_top
    if (header['dzt_depth'] == 0):
        # if dzt depth is 0, we need to calculate it using cr and rhf_range (converted to seconds)
        header['dzt_depth'] = header['cr'] * (header['rhf_range'] *
                                              (10**(-10)))

    header['rhf_depth'] = header['dzt_depth'] * (
        math.sqrt(header['dzt_epsr']) / math.sqrt(header['rhf_epsr'])
    )  # range based on user epsr, before subtracting rhf_top

    # getting into largely useless territory (under "normal" operation)
    header['rh_xstart'] = struct.unpack(
        '<f',
        infile.read(4))[0]  # starting x grid coordinate? part of rh_coordx
    header['rh_xend'] = struct.unpack(
        '<f', infile.read(4))[0]  # ending x grid coordinate? part of rh_coordx
    header['rhf_servo_level'] = struct.unpack(
        '<f', infile.read(4))[0]  # gain servo level
    # 3 "reserved" bytes
    infile.seek(81)
    header['rh_accomp'] = struct.unpack(
        'B', infile.read(1))[0]  # Ant Conf component
    header['rh_sconfig'] = struct.unpack(
        '<h', infile.read(2))[0]  # setup config number
    header['rh_spp'] = struct.unpack('<h', infile.read(2))[0]  # scans per pass
    header['rh_linenum'] = struct.unpack('<h',
                                         infile.read(2))[0]  # line number
    header['rh_ystart'] = struct.unpack(
        '<f',
        infile.read(4))[0]  # starting y grid coordinate? part of rh_coordx
    header['rh_yend'] = struct.unpack(
        '<f', infile.read(4))[0]  # ending y grid coordinate? part of rh_coordx

    header['rh_96'] = infile.read(1)
    header['rh_lineorder'] = int(
        '{0:08b}'.format(ord(header['rh_96']))[::-1][4:], 2)
    header['rh_slicetype'] = int(
        '{0:08b}'.format(ord(header['rh_96']))[::-1][:4], 2)
    header['rh_dtype'] = infile.read(1)  # no description of dtype

    freq = [None, None, None, None]
    for i in range(header['rh_nchan']):
        if (antfreq != None) and (antfreq != [None, None, None, None]):
            try:
                freq[i] = antfreq[i]
            except (TypeError, IndexError) as e:
                freq[i] = 200
                print(
                    'WARNING: due to an error, antenna %s frequency was set to 200 MHz'
                    % (i))
                print('Error detail: %s' % (e))

    curpos = infile.tell()
    # read frequencies for multiple antennae
    for chan in list(range(header['rh_nchan'])):
        if chan == 0:
            infile.seek(98)  # start of antenna section
        else:
            infile.seek(98 + (MINHEADSIZE *
                              (chan)))  # start of antenna bytes for channel n
        header['dzt_ant'][chan] = infile.read(14)
        header['rh_ant'][chan] = header['dzt_ant'][chan].decode('utf-8').split(
            '\x00')[0]
        header['rh_antname'][chan] = header['rh_ant'][chan].rsplit('x')[0]
        try:
            header['antfreq'][chan] = ANT[header['rh_antname'][chan]]
            header['known_ant'][chan] = True
        except KeyError:
            header['known_ant'][chan] = False
            try:
                header['antfreq'][chan] = int("".join(
                    takewhile(
                        str.isdigit,
                        header['rh_ant'][chan].replace('D5', '').replace(
                            'D6', ''))))  # hoping this works
            except ValueError:
                header['antfreq'] = freq
            #header['antfreq'][chan] = int(header['rh_antname'][chan].replace('D5','').replace('D6',''))

    infile.seek(curpos + 14)
    header['rh_112'] = infile.read(1)
    header['rh_lineorder'] = int('{0:08b}'.format(ord(header['rh_112']))[4:],
                                 2)
    header['rh_slicetype'] = int('{0:08b}'.format(ord(header['rh_112']))[:4],
                                 2)

    #infile.seek(113) # byte 113
    header['vsbyte'] = infile.read(1)  # byte containing versioning bits
    header['rh_version'] = int(
        '{0:08b}'.format(ord(header['vsbyte']))[5:], 2
    )  # ord(vsbyte) >> 5 # whether or not the system is GPS-capable, 1=no 2=yes (does not mean GPS is in file)
    header['rh_system'] = int(
        '{0:08b}'.format(ord(header['vsbyte']))[:5], 2
    )  # ord(vsbyte) >> 3 ## the system type (values in UNIT={...} dictionary in constants.py)
    header['rh_name'] = infile.read(12)
    header['rh_chksum'] = infile.read(2)
    header['INFOAREA'] = infile.read(MINHEADSIZE - PAREASIZE - GPSAREASIZE)
    header['rh_RGPS0'] = infile.read(RGPSSIZE)
    header['rh_RGPS1'] = infile.read(RGPSSIZE)

    if header[
            'rh_system'] == 14:  # hardcoded because this is so frustrating. assuming no other antennas can be paired with SS Mini XT
        header['rh_antname'] = ['SSMINIXT', None, None, None]
        header['antfreq'] = [2700, None, None, None]
        header['known_ant'] = [True, False, False, False]

    if header[
            'rh_data'] < MINHEADSIZE:  # whether or not the header is normal or big-->determines offset to data array
        header['data_offset'] = MINHEADSIZE * header['rh_data']
    else:
        header['data_offset'] = MINHEADSIZE * header['rh_nchan']

    infile.seek(MINHEADSIZE * header['rh_nchan'])
    header['header_extra'] = infile.read(header['data_offset'] -
                                         (MINHEADSIZE * header['rh_nchan']))

    if header['rh_bits'] == 8:
        dtype = np.uint8  # 8-bit unsigned
    elif header['rh_bits'] == 16:
        dtype = np.uint16  # 16-bit unsigned
    else:
        dtype = np.int32  # 32-bit signed
    header['dtype'] = dtype

    if start_scan != 0:
        try:
            # calculate start offset in bytes:
            start_offset = int(start_scan * header['rh_nchan'] *
                               header['rh_nsamp'] * header['rh_bits'] / 8)
        except ValueError:
            # if this fails, then fall back to 0 offset.
            start_offset = 0
            fx.printmsg(
                'WARNING: ValueError for scan offset: {start_scan} (reading from start of data)'
            )
            # consider returning No Data?
    else:
        start_offset = 0

    if num_scans != -1:
        try:
            num_items = int(num_scans * header['rh_nsamp'] *
                            header['rh_nchan'])
        except ValueError:
            # if this fails then get all scans...
            fx.printmsg(
                'WARNING: ValueError for number of scans: {num_scans} (reading all items from {start_scan} scans)'
            )
            num_items = -1
    else:
        num_items = -1

    # read in and transpose data
    data = np.fromfile(infile, dtype, count=num_items)
    data = data.reshape(
        -1, (header['rh_nsamp'] * header['rh_nchan']))  # offset=start_offset,
    data = data.T
    header['shape'] = data.shape

    header['ns_per_zsample'] = ((header['rhf_depth'] - header['rhf_top']) *
                                2) / (header['rh_nsamp'] * header['cr'])
    header['samp_freq'] = 1 / ((header['dzt_depth'] * 2) /
                               (header['rh_nsamp'] * header['cr_true']))

    try:
        header['sec'] = data.shape[1] / float(header['rhf_sps'])
    except ZeroDivisionError:
        header['sec'] = 1.

    infile.close()

    for i in range(header['rh_nchan']):
        try:
            header['timezero'][i] = int(list(zero)[i])
        except (TypeError, IndexError):
            fx.printmsg(
                'WARNING: no time zero specified for channel %s, defaulting to rh_zero value (%s)'
                % (i, header['rh_zero']))
            header['timezero'][i] = header['rh_zero']

    if os.path.isfile(infile_gps):
        try:
            if verbose:
                fx.printmsg('reading GPS file...')
            gps = readdzg(infile_gps, 'dzg', header, verbose=verbose)
        except IOError as e0:
            fx.printmsg('WARNING: cannot read DZG file')
            try:
                infile_gps = os.path.splitext(infile_gps)[0] + ".csv"
                gps = readdzg(infile_gps, 'csv', header, verbose=verbose)
            except Exception as e1:
                try:
                    infile_gps = os.path.splitext(infile_gps)[0] + ".CSV"
                    gps = readdzg(infile_gps, 'csv', header, verbose=verbose)
                except Exception as e2:
                    fx.printmsg(
                        'ERROR reading GPS. distance normalization will not be possible.'
                    )
                    fx.printmsg('   details: %s' % e0)
                    fx.printmsg('            %s' % e1)
                    fx.printmsg('            %s' % e2)
                    gps = DataFrame()
    else:
        fx.printmsg('WARNING: no DZG file found for GPS input')
        gps = DataFrame()

    header['marks'] = []
    header['picks'] = {}

    if os.path.isfile(infile_dzx):
        header['marks'] = get_user_marks(infile_dzx, verbose=verbose)
        header['picks'] = get_picks(infile_dzx, verbose=verbose)
    else:
        fx.printmsg(
            'WARNING: could not find DZX file to read metadata. Trying to read array for marks...'
        )

        tnums = np.ndarray.tolist(
            data[0])  # the first row of the array is trace number
        usr_marks = np.ndarray.tolist(
            data[1]
        )  # when the system type is SIR3000, the second row should be user marks (otherwise these are in the DZX, see note below)
        i = 0
        for m in usr_marks:
            if m > 0:
                #print(m)
                header['marks'].append(i)
            i += 1
        if len(header['marks']) == header['shape'][1]:
            fx.printmsg(
                'number of marks matches the number of traces (%s). this is probably wrong, so throwing out the mark list.'
                % (len(header['marks'])))
            header['marks'] = []
        else:
            fx.printmsg('DZT marks read successfully. marks: %s' %
                        len(header['marks']))
            fx.printmsg('                            traces: %s' %
                        header['marks'])

    # make a list of data by channel
    data = arraylist(header, data)

    return [header, data, gps]
Beispiel #3
0
def readdzt(infile,
            gps=False,
            spm=None,
            start_scan=0,
            num_scans=-1,
            epsr=None,
            verbose=False):
    """
    Function to unpack and return things the program needs from the file header, and the data itself.

    :param str infile: The DZT file location
    :param bool gps: Whether a GPS file exists. Defaults to False, but changed to :py:class:`pandas.DataFrame` if a DZG file with the same name as :code:`infile` exists.
    :param float spm: User value of samples per meter, if specified. Defaults to None.
    :param float epsr: User value of relative permittivity, if specified. Defaults to None.
    :param bool verbose: Verbose, defaults to False
    :rtype: header (:py:class:`dict`), radar array (:py:class:`numpy.ndarray`), gps (False or :py:class:`pandas.DataFrame`)
    """
    '''
    currently unused but potentially useful lines:
    # headerstruct = '<5h 5f h 4s 4s 7h 3I d I 3c x 3h d 2x 2c s s 14s s s 12s h 816s 76s' # the structure of the bytewise header and "gps data" as I understand it - 1024 bytes
    # readsize = (2,2,2,2,2,4,4,4,4,4,2,4,4,4,2,2,2,2,2,4,4,4,8,4,3,1,2,2,2,8,1,1,14,1,1,12,2) # the variable size of bytes in the header (most of the time) - 128 bytes
    # fx.printmsg('total header structure size: '+str(calcsize(headerstruct)))
    # packed_size = 0
    # for i in range(len(readsize)): packed_size = packed_size+readsize[i]
    # fx.printmsg('fixed header size: '+str(packed_size)+'\\n')
    '''
    infile_gps = os.path.splitext(infile)[0] + ".DZG"
    infile = open(infile, 'rb')
    header = {}
    header['infile'] = infile.name
    header['known_ant'] = [None, None, None, None]
    header['rh_ant'] = [None, None, None, None]
    header['rh_antname'] = [None, None, None, None]
    header['antfreq'] = [None, None, None, None]

    # begin read

    header['rh_tag'] = struct.unpack(
        '<h', infile.read(2))[0]  # 0x00ff if header, 0xfnff if old file format
    header['rh_data'] = struct.unpack(
        '<h', infile.read(2))[0]  # offset to data from beginning of file
    header['rh_nsamp'] = struct.unpack('<h',
                                       infile.read(2))[0]  # samples per scan
    header['rh_bits'] = struct.unpack('<h',
                                      infile.read(2))[0]  # bits per data word
    header['rh_zero'] = struct.unpack(
        '<h', infile.read(2)
    )[0]  # if sir-30 or utilityscan df, then repeats per sample; otherwise 0x80 for 8bit and 0x8000 for 16bit
    header['rhf_sps'] = struct.unpack('<f',
                                      infile.read(4))[0]  # scans per second
    header['rhf_spm'] = struct.unpack('<f',
                                      infile.read(4))[0]  # scans per meter
    header['dzt_spm'] = header['rhf_spm']
    if spm:
        header['rhf_spm'] = spm

    header['rhf_mpm'] = struct.unpack('<f',
                                      infile.read(4))[0]  # meters per mark
    header['rhf_position'] = struct.unpack('<f',
                                           infile.read(4))[0]  # position (ns)
    header['rhf_range'] = struct.unpack('<f', infile.read(4))[0]  # range (ns)
    header['rh_npass'] = struct.unpack(
        '<h', infile.read(2))[0]  # number of passes for 2-D files
    # bytes 32-36 and 36-40: creation and modification date and time in bits
    # structured as little endian u5u6u5u5u4u7
    infile.seek(32)
    try:
        header['rhb_cdt'] = readtime(infile.read(4))
    except:
        header['rhb_cdt'] = datetime(1980, 1, 1)
    try:
        header['rhb_mdt'] = readtime(infile.read(4))
    except:
        header['rhb_mdt'] = datetime(1980, 1, 1)
    header['rh_rgain'] = struct.unpack(
        '<h', infile.read(2))[0]  # offset to range gain function
    header['rh_nrgain'] = struct.unpack(
        '<h', infile.read(2))[0]  # size of range gain function
    header['rh_text'] = struct.unpack('<h',
                                      infile.read(2))[0]  # offset to text
    header['rh_ntext'] = struct.unpack('<h', infile.read(2))[0]  # size of text
    header['rh_proc'] = struct.unpack(
        '<h', infile.read(2))[0]  # offset to processing history
    header['rh_nproc'] = struct.unpack(
        '<h', infile.read(2))[0]  # size of processing history
    header['rh_nchan'] = struct.unpack('<h',
                                       infile.read(2))[0]  # number of channels
    if epsr != None:  # in this case the user has specified an epsr value
        header['dzt_epsr'] = struct.unpack('<f', infile.read(4))[0]
        header['rhf_epsr'] = epsr
    else:
        header['rhf_epsr'] = struct.unpack('<f', infile.read(4))[
            0]  # epsr (sometimes referred to as "dielectric permittivity")
        header['dzt_epsr'] = header['rhf_epsr']
    header['rhf_top'] = struct.unpack(
        '<f', infile.read(4))[0]  # position in meters (useless?)
    header['dzt_depth'] = struct.unpack(
        '<f', infile.read(4))[0]  # range in meters based on DZT rhf_epsr
    header['rhf_depth'] = header['dzt_depth'] * (
        math.sqrt(header['dzt_epsr']) / math.sqrt(header['rhf_epsr'])
    )  # range based on user epsr
    #rhf_coordx = struct.unpack('<ff', infile.read(8))[0] # this is definitely useless

    # read frequencies for multiple antennae
    for chan in list(range(header['rh_nchan'])):
        if chan == 0:
            infile.seek(98)  # start of antenna section
        else:
            infile.seek(98 + (MINHEADSIZE *
                              (chan)))  # start of antenna bytes for channel n
        header['rh_ant'][chan] = infile.read(14).decode('utf-8').split(
            '\x00')[0]
        header['rh_antname'][chan] = header['rh_ant'][chan].rsplit('x')[0]
        try:
            header['antfreq'][chan] = ANT[header['rh_antname'][chan]]
            header['known_ant'][chan] = True
        except KeyError:
            header['known_ant'][chan] = False
            header['antfreq'][chan] = int("".join(
                takewhile(
                    str.isdigit, header['rh_ant'][chan].replace(
                        'D5', '').replace('D6', ''))))  # hoping this works
            #header['antfreq'][chan] = int(header['rh_antname'][chan].replace('D5','').replace('D6',''))

    infile.seek(113)  # skip to something that matters
    vsbyte = infile.read(1)  # byte containing versioning bits
    header['rh_version'] = ord(
        vsbyte
    ) >> 5  # whether or not the system is GPS-capable, 1=no 2=yes (does not mean GPS is in file)
    header['rh_system'] = ord(
        vsbyte) >> 3  # the system type (values in UNIT={...} dictionary above)

    infile.seek(header['rh_rgain'])
    try:
        header['rgain_bytes'] = infile.read(header['rh_nrgain'])
    except:
        pass

    if header[
            'rh_data'] < MINHEADSIZE:  # whether or not the header is normal or big-->determines offset to data array
        infile.seek(MINHEADSIZE * header['rh_data'])
        header['data_offset'] = MINHEADSIZE * header['rh_data']
    else:
        infile.seek(MINHEADSIZE * header['rh_nchan'])
        header['data_offset'] = MINHEADSIZE * header['rh_nchan']

    if header['rh_bits'] == 8:
        dtype = np.uint8  # 8-bit unsigned
    elif header['rh_bits'] == 16:
        dtype = np.uint16  # 16-bit unsigned
    else:
        dtype = np.int32  # 32-bit signed

    if start_scan != 0:
        try:
            # calculate start offset in bytes:
            start_offset = int(start_scan * header['rh_nchan'] *
                               header['rh_nsamp'] * header['rh_bits'] / 8)
        except ValueError:
            # if this fails, then fall back to 0 offset.
            start_offset = 0
            fx.printmsg(
                'WARNING: ValueError for scan offset: {start_scan} (reading from start of data)'
            )
            # consider returning No Data?
    else:
        start_offset = 0

    if num_scans != -1:
        try:
            num_items = int(num_scans * header['rh_nsamp'] *
                            header['rh_nchan'])
        except ValueError:
            # if this fails then get all scans...
            fx.printmsg(
                'WARNING: ValueError for number of scans: {num_scans} (reading all items from {start_scan} scans)'
            )
            num_items = -1
    else:
        num_items = -1

    # read in and transpose data
    data = np.fromfile(infile, dtype,
                       offset=start_offset, count=num_items).reshape(
                           -1, (header['rh_nsamp'] * header['rh_nchan'])).T

    header['cr'] = 1 / math.sqrt(Mu_0 * Eps_0 * header['rhf_epsr'])
    header['cr_true'] = 1 / math.sqrt(Mu_0 * Eps_0 * header['dzt_epsr'])
    header['ns_per_zsample'] = (header['rhf_depth'] *
                                2) / (header['rh_nsamp'] * header['cr'])
    header['samp_freq'] = 1 / ((header['dzt_depth'] * 2) /
                               (header['rh_nsamp'] * header['cr_true']))

    try:
        header['sec'] = data.shape[1] / float(header['rhf_sps'])
    except ZeroDivisionError:
        header['sec'] = 1.

    infile.close()

    if os.path.isfile(infile_gps):
        try:
            if verbose:
                fx.printmsg('reading GPS file...')
            gps = readdzg(infile_gps, 'dzg', header, verbose=verbose)
        except IOError as e0:
            fx.printmsg('WARNING: cannot read DZG file')
            try:
                infile_gps = os.path.splitext(infile_gps)[0] + ".csv"
                gps = readdzg(infile_gps, 'csv', header, verbose=verbose)
            except Exception as e1:
                try:
                    infile_gps = os.path.splitext(infile_gps)[0] + ".CSV"
                    gps = readdzg(infile_gps, 'csv', header, verbose=verbose)
                except Exception as e2:
                    fx.printmsg(
                        'ERROR reading GPS. distance normalization will not be possible.'
                    )
                    fx.printmsg('   details: %s' % e0)
                    fx.printmsg('            %s' % e1)
                    fx.printmsg('            %s' % e2)
                    gps = []
    else:
        fx.printmsg('WARNING: no DZG file found for GPS input')

    return [header, data, gps]
Beispiel #4
0
def h5(ar, infile_basename, outfile_abspath, header, verbose=False):
    """
    .. warning:: HDF5 output is not yet available.

    In the future, this function will output to HDF5 format.

    :param numpy.ndarray ar: Radar array
    :param str infile_basename: Input file basename
    :param str outfile_abspath: Output file path
    :param dict header: File header dictionary to write, if desired. Defaults to None.
    :param bool verbose: Verbose, defaults to False
    """
    '''
    Assumptions:
    - constant velocity between marks (may be possible to add a check)
    - marks are made at same time on GPS and SIR
    - gps and gpr are in same location when mark is made
    - good quality horizontal solution
    single-channel IceRadar h5 structure is
    /line_x/location_n/datacapture_0/echogram_0 (/group/group/group/dataset)
    each dataset has an 'attributes' item attached, formatted in 'collections.defaultdict' style:
    [('PCSavetimestamp', str), ('GPS Cluster- MetaData_xml', str), ('Digitizer-MetaData_xml', str), ('GPS Cluster_UTM-MetaData_xml', str)]
    '''

    if verbose:
        fx.printmsg('output format is IceRadar HDF5. writing file to: %s' %
                    outfile_abspath)

    # setup formattable strings
    svts = 'PCSavetimestamp'
    gpsx = 'GPS Cluster- MetaData_xml'
    # main gps string. 8 formattable values: gps_sec, lat, lon, qual, num_sats, hdop, altitude, geoid_ht
    gpsclstr = '<Cluster>\r\n<Name>GPS Cluster</Name>\r\n<NumElts>10</NumElts>\r\n<String>\r\n<Name>GPS_timestamp_UTC</Name>\r\n<Val>%.2f</Val>\r\n</String>\r\n<String>\r\n<Name>Lat_N</Name>\r\n<Val>%.4f</Val>\r\n</String>\r\n<String>\r\n<Name>Long_ W</Name>\r\n<Val>%.4f</Val>\r\n</String>\r\n<String>\r\n<Name>Fix_Quality</Name>\r\n<Val>%i</Val>\r\n</String>\r\n<String>\r\n<Name>Num _Sat</Name>\r\n<Val>%i</Val>\r\n</String>\r\n<String>\r\n<Name>Dilution</Name>\r\n<Val>%.2f</Val>\r\n</String>\r\n<String>\r\n<Name>Alt_asl_m</Name>\r\n<Val>%.2f</Val>\r\n</String>\r\n<String>\r\n<Name>Geoid_Heigh_m</Name>\r\n<Val>%.2f</Val>\r\n</String>\r\n<Boolean>\r\n<Name>GPS Fix valid</Name>\r\n<Val>1</Val>\r\n</Boolean>\r\n<Boolean>\r\n<Name>GPS Message ok</Name>\r\n<Val>1</Val>\r\n</Boolean>\r\n</Cluster>\r\n'
    dimx = 'Digitizer-MetaData_xml'
    # digitizer string. 3 formattable values: rhf_depth, rh_nsamp, stack
    dimxstr = '<Cluster>\r\n<Name>Digitizer MetaData</Name>\r\n<NumElts>3</NumElts>\r\n<Cluster>\r\n<Name>Digitizer settings</Name>\r\n<NumElts>5</NumElts>\r\n<Cluster>\r\n<Name>Vertical</Name>\r\n<NumElts>3</NumElts>\r\n<DBL>\r\n<Name>vertical range</Name>\r\n<Val>%f</Val>\r\n</DBL>\r\n<DBL>\r\n<Name>Vertical Offset</Name>\r\n<Val>0.00000000000000</Val>\r\n</DBL>\r\n<I32>\r\n<Name>vertical coupling</Name>\r\n<Val>1</Val>\r\n</I32>\r\n</Cluster>\r\n<Cluster>\r\n<Name>Channel</Name>\r\n<NumElts>1</NumElts>\r\n<DBL>\r\n<Name>maximum input frequency</Name>\r\n<Val>%f</Val>\r\n</DBL>\r\n</Cluster>\r\n<Cluster>\r\n<Name>Horizontal</Name>\r\n<NumElts>2</NumElts>\r\n<DBL>\r\n<Name> Sample Rate</Name>\r\n<Val>250000000.00000000000000</Val>\r\n</DBL>\r\n<I32>\r\n<Name>Record Length</Name>\r\n<Val>%i</Val>\r\n</I32>\r\n</Cluster>\r\n<Cluster>\r\n<Name>Trigger</Name>\r\n<NumElts>12</NumElts>\r\n<U16>\r\n<Name>trigger type</Name>\r\n<Val>0</Val>\r\n</U16>\r\n<DBL>\r\n<Name>trigger delay</Name>\r\n<Val>0.00000000000000</Val>\r\n</DBL>\r\n<DBL>\r\n<Name>reference position</Name>\r\n<Val>10.00000000000000</Val>\r\n</DBL>\r\n<DBL>\r\n<Name>trigger level</Name>\r\n<Val>2.00000000000000E-2</Val>\r\n</DBL>\r\n<DBL>\r\n<Name>hysteresis</Name>\r\n<Val>0.00000000000000</Val>\r\n</DBL>\r\n<DBL>\r\n<Name>low level</Name>\r\n<Val>0.00000000000000</Val>\r\n</DBL>\r\n<DBL>\r\n<Name>high level</Name>\r\n<Val>0.00000000000000</Val>\r\n</DBL>\r\n<U16>\r\n<Name>trigger coupling</Name>\r\n<Val>1</Val>\r\n</U16>\r\n<I32>\r\n<Name>trigger window mode</Name>\r\n<Val>0</Val>\r\n</I32>\r\n<I32>\r\n<Name>trigger slope</Name>\r\n<Val>0</Val>\r\n</I32>\r\n<String>\r\n<Name>trigger source</Name>\r\n<Val>0</Val>\r\n</String>\r\n<I32>\r\n<Name>Trigger Modifier</Name>\r\n<Val>2</Val>\r\n</I32>\r\n</Cluster>\r\n<String>\r\n<Name>channel name</Name>\r\n<Val>0</Val>\r\n</String>\r\n</Cluster>\r\n<U16>\r\n<Name>Stacking</Name>\r\n<Val>%i</Val>\r\n</U16>\r\n<Cluster>\r\n<Name>Radargram extra info</Name>\r\n<NumElts>2</NumElts>\r\n<DBL>\r\n<Name>relativeInitialX</Name>\r\n<Val>-1.51999998365682E-7</Val>\r\n</DBL>\r\n<DBL>\r\n<Name>xIncrement</Name>\r\n<Val>3.99999988687227E-9</Val>\r\n</DBL>\r\n</Cluster>\r\n</Cluster>\r\n'
    gutx = 'GPS Cluster_UTM-MetaData_xml'
    # gps UTM string. 1 formattable value: num_sats
    gpsutmstr = '<Cluster>\r\n<Name>GPS_UTM Cluster</Name>\r\n<NumElts>10</NumElts>\r\n<String>\r\n<Name>Datum</Name>\r\n<Val>NaN</Val>\r\n</String>\r\n<String>\r\n<Name>Easting_m</Name>\r\n<Val></Val>\r\n</String>\r\n<String>\r\n<Name>Northing_m</Name>\r\n<Val>NaN</Val>\r\n</String>\r\n<String>\r\n<Name>Elevation</Name>\r\n<Val>NaN</Val>\r\n</String>\r\n<String>\r\n<Name>Zone</Name>\r\n<Val>NaN</Val>\r\n</String>\r\n<String>\r\n<Name>Satellites (dup)</Name>\r\n<Val>%i</Val>\r\n</String>\r\n<Boolean>\r\n<Name>GPS Fix Valid (dup)</Name>\r\n<Val>1</Val>\r\n</Boolean>\r\n<Boolean>\r\n<Name>GPS Message ok (dup)</Name>\r\n<Val>1</Val>\r\n</Boolean>\r\n<Boolean>\r\n<Name>Flag_1</Name>\r\n<Val>0</Val>\r\n</Boolean>\r\n<Boolean>\r\n<Name>Flag_2</Name>\r\n<Val>0</Val>\r\n</Boolean>\r\n</Cluster>\r\n'

    if os.path.exists(infile_basename + '.DZG'):
        gps = readdzg(infile_basename + '.DZG', 'dzg', header['rhf_sps'],
                      ar.shape[1], verbose)
    else:
        gps = ''  # if there's no DZG file...need a way to parse another gps source if possible

    # make data structure
    n = 0  # line number, iteratively increased
    f = h5py.File('%s.h5' % (outfile_abspath), 'w')  # overwrite existing file
    if verbose:
        fx.printmsg('exporting to %s.h5' % outfile_abspath)

    try:
        li = f.create_group('line_0')  # create line zero
    except ValueError:  # the line already exists in the file
        li = f['line_0']
    for sample in ar.T:
        # create strings

        # pcsavetimestamp
        # formatting: m/d/yyyy_h:m:ss PM
        svts_str = gps[n]['timestamp'].astype(datetime).strftime(
            '%m/%d/%Y_%H:%M:%S %p')

        # gpscluster
        # order we need: (len(list), tracetime, y, x, q, sats, dil, z, gh, 1, 1)
        # rows in gps: tracenum, lat, lon, altitude, geoid_ht, qual, num_sats, hdop, timestamp
        gpsx_str = gpsclstr % (gps[n]['gps_sec'], gps[n]['lat'], gps[n]['lon'],
                               gps[n]['qual'], gps[n]['num_sats'],
                               gps[n]['hdop'], gps[n]['altitude'],
                               gps[n]['geoid_ht'])

        # digitizer
        dimx_str = dimxstr % (r[0]['rhf_depth'], freq, r[0]['rh_nsamp'],
                              r[0]['stack'])

        # utm gpscluster
        gutx_str = gpsutmstr % (gps[n]['num_sats'])

        lo = li.create_group('location_' +
                             str(n))  # create a location for each trace
        dc = lo.create_group('datacapture_0')
        eg = dc.create_dataset('echogram_0', (ar.shape[0], ), data=sample)
        eg.attrs.create(svts, svts_str)  # store pcsavetimestamp attribute
        eg.attrs.create(gpsx, gpsx_str)  # store gpscluster attribute
        eg.attrs.create(dimx, dimx_str)  # store digitizer attribute
        eg.attrs.create(gutx, gutx_str)  # store utm gpscluster attribute
        n += 1
    f.close()