Exemple #1
0
 def test_strict_mode(self):
     md = '''GUANO|Version: 1.0
     TE: no
     Loc Position: 10N 567288E 4584472N
     '''
     try:
         GuanoFile.from_string(md, strict=True)
     except ValueError as e:
         pass
     g = GuanoFile.from_string(md, strict=False)
     self.assertEqual(g.get('TE', None), 'no')
     self.assertEqual(g.get('Loc Position', None), '10N 567288E 4584472N')
Exemple #2
0
    def test_file_roundtrip(self):
        """Write a GUANO .WAV file containing Unicode data, re-read it and confirm value is identical"""
        fname = 'test_guano.wav'

        # write a fake .WAV file
        g = GuanoFile.from_string(self.MD)
        g.filename = fname
        g.wav_params = wavparams(1, 2, 500000, 2, 'NONE', None)
        g._wav_data = b'\01\02'  # faking it, don't try this at home!
        g._wav_data_size = 2
        g.write()

        # read it back in
        g2 = GuanoFile(fname)

        self.assertEqual(self.NOTE, g2['Note'])
Exemple #3
0
 def setUp(self):
     GuanoFile.register('User', 'Answer', int)
     self.md = GuanoFile.from_string(self.MD)
Exemple #4
0
 def test_sb42_bad_encoding(self):
     """SonoBat 4.2 doesn't actually encode as UTF-8. At least try not to blow up when reading."""
     # SonoBat *probably* uses mac-roman on OS X and windows-1252 on Windows... in the US at least.
     md = b'GUANO|Version:  1.0\nNote:  Mobile transect with mic 4\xd5 above roof.\n\x00\x00'
     GuanoFile.from_string(md)
Exemple #5
0
 def test_sb42_bad_guano_version(self):
     """Some version of SonoBat 4.2 writes a GUANO|Version of "1.0:" by accident."""
     md = b'GUANO|Version:  1.0:\n1.0:\n'
     GuanoFile.from_string(md)
Exemple #6
0
 def test_sb42_bad_timestamp(self):
     """SonoBat 4.2 blank timestamp"""
     md = '''GUANO|Version: 1.0
     Timestamp:
     '''
     GuanoFile.from_string(md)
Exemple #7
0
 def test_sb41_bad_key(self):
     """SonoBat 4.1 disembodied colon"""
     md = '''GUANO|Version: 1.0
     :
     '''
     self.assertEqual(1, len(list(GuanoFile.from_string(md).items())))
Exemple #8
0
 def test_sb41_bad_te(self):
     """SonoBat 4.1 "optional" TE value"""
     md = '''GUANO|Version: 1.0
     TE:
     '''
     GuanoFile.from_string(md)
Exemple #9
0
 def test_from_string(self):
     """Parse a GUANO metadata block containing Unicode data"""
     g = GuanoFile.from_string(self.MD)
     self.assertEqual(self.NOTE, g['Note'])
Exemple #10
0
def extract_anabat(fname, hpfilter_khz=8.0, **kwargs):
    """Extract (times, frequencies, amplitudes, metadata) from Anabat sequence file"""
    amplitudes = None
    with open(fname, 'rb') as f, contextlib.closing(
            mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)) as m:
        size = len(m)

        # parse header
        data_info_pointer, file_type, tape, date, loc, species, spec, note1, note2 = struct.unpack_from(
            ANABAT_129_HEAD_FMT, m)
        data_pointer, res1, divratio, vres = struct.unpack_from(
            ANABAT_129_DATA_INFO_FMT, m, data_info_pointer)
        species = [_s(species).split('(', 1)[0]] if '(' in species else [
            s.strip() for s in _s(species).split(',')
        ]  # remove KPro junk
        metadata = dict(date=date,
                        loc=_s(loc),
                        species=species,
                        spec=_s(spec),
                        note1=_s(note1),
                        note2=_s(note2),
                        divratio=divratio)
        if file_type >= 132:
            year, month, day, hour, minute, second, second_hundredths, microseconds, id_code, gps_data = struct.unpack_from(
                ANABAT_132_ADDL_DATA_INFO_FMT, m, 0x120)
            try:
                timestamp = datetime(year, month, day, hour, minute, second,
                                     second_hundredths * 10000 + microseconds)
            except ValueError as e:
                log.exception('Failed extracting timestamp')
                timestamp = None
            metadata.update(
                dict(timestamp=timestamp, id=_s(id_code), gps=_s(gps_data)))
            if data_pointer - 0x150 > 12:  # and m[pos:pos+5] == 'GUANO':
                try:
                    guano = GuanoFile.from_string(m[0x150:data_pointer])
                    log.debug(guano.to_string())
                    amplitudes = guano.get('ZCANT|Amplitudes', None)
                except:
                    log.exception('Failed parsing GUANO metadata block')
            else:
                log.debug('No GUANO metadata found')
        log.debug(
            'file_type: %d\tdata_info_pointer: 0x%3x\tdata_pointer: 0x%3x',
            file_type, data_info_pointer, data_pointer)
        log.debug(metadata)
        if res1 != 25000:
            raise ValueError(
                'Anabat files with non-standard RES1 (%s) not yet supported!' %
                res1)

        # parse actual sequence data
        i = data_pointer  # byte index as we scan through the file (data starts at 0x150 for v132, 0x120 for older files)
        intervals_us = np.empty(2**14, np.dtype('uint32'))
        offdots = OrderedDict()  # dot index -> number of subsequent dots
        int_i = 0  # interval index

        while i < size:

            if int_i >= len(intervals_us):
                # Anabat files were formerly capped at 16384 dots, but may now be larger; grow
                intervals_us = np.concatenate(
                    (intervals_us, np.empty(2**14, np.dtype('uint32'))))

            byte = Byte.unpack_from(m, i)[0]

            if byte <= 0x7F:
                # Single byte is a 7-bit signed two's complement offset from previous interval
                offset = byte if byte < 2**6 else byte - 2**7  # clever two's complement unroll
                if int_i > 0:
                    intervals_us[int_i] = intervals_us[int_i - 1] + offset
                    int_i += 1
                else:
                    log.warning(
                        'Sequence file starts with a one-byte interval diff! Skipping byte %x',
                        byte)
                    #intervals.append(offset)  # ?!

            elif 0x80 <= byte <= 0x9F:
                # time interval is contained in 13 bits, upper 5 from the remainder of this byte, lower 8 bits from the next byte
                accumulator = (byte & 0b00011111) << 8
                i += 1
                accumulator |= Byte.unpack_from(m, i)[0]
                intervals_us[int_i] = accumulator
                int_i += 1

            elif 0xA0 <= byte <= 0xBF:
                # interval is contained in 21 bits, upper 5 from the remainder of this byte, next 8 from the next byte and the lower 8 from the byte after that
                accumulator = (byte & 0b00011111) << 16
                i += 1
                accumulator |= Byte.unpack_from(m, i)[0] << 8
                i += 1
                accumulator |= Byte.unpack_from(m, i)[0]
                intervals_us[int_i] = accumulator
                int_i += 1

            elif 0xC0 <= byte <= 0xDF:
                # interval is contained in 29 bits, the upper 5 from the remainder of this byte, the next 8 from the following byte etc.
                accumulator = (byte & 0b00011111) << 24
                i += 1
                accumulator |= Byte.unpack_from(m, i)[0] << 16
                i += 1
                accumulator |= Byte.unpack_from(m, i)[0] << 8
                i += 1
                accumulator |= Byte.unpack_from(m, i)[0]
                intervals_us[int_i] = accumulator
                int_i += 1

            elif 0xE0 <= byte <= 0xFF:
                # status byte which applies to the next n dots
                status = byte & 0b00011111
                i += 1
                dotcount = Byte.unpack_from(m, i)[0]
                if status == DotStatus.OFF:
                    offdots[int_i] = dotcount
                else:
                    log.debug(
                        'UNSUPPORTED: Status %X for %d dots at dot %d (file offset 0x%X)',
                        status, dotcount, int_i, i)

            else:
                raise Exception('Unknown byte %X at offset 0x%X' % (byte, i))

            i += 1

    intervals_us = intervals_us[:int_i]  # TODO: should we free unused memory?

    intervals_s = intervals_us * 1e-6
    times_s = np.cumsum(intervals_s)
    freqs_hz = 1 / (times_s[2:] - times_s[:-2]) * divratio
    freqs_hz[freqs_hz == np.inf] = 0  # TODO: fix divide-by-zero
    freqs_hz[freqs_hz < 4000] = 0
    freqs_hz[freqs_hz > 250000] = 0

    if offdots:
        n_offdots = sum(offdots.values())
        log.debug('Throwing out %d off-dots of %d (%.1f%%)', n_offdots,
                  len(times_s),
                  float(n_offdots) / len(times_s) * 100)
        off_mask = np.zeros(len(intervals_us), dtype=bool)
        for int_i, dotcount in offdots.items():
            off_mask[int_i:int_i + dotcount] = True
        times_s = masked_array(times_s, mask=off_mask).compressed()
        freqs_hz = masked_array(freqs_hz, mask=off_mask).compressed()

    min_, max_ = min(freqs_hz) if any(freqs_hz) else 0, max(freqs_hz) if any(
        freqs_hz) else 0
    log.debug('%s\tDots: %d\tMinF: %.1f\tMaxF: %.1f', basename(fname),
              len(freqs_hz), min_ / 1000.0, max_ / 1000.0)

    times_s, freqs_hz, amplitudes = hpf_zc(times_s, freqs_hz, amplitudes,
                                           hpfilter_khz * 1000)

    assert (len(times_s) == len(freqs_hz) == len(amplitudes or freqs_hz))
    return times_s, freqs_hz, amplitudes, metadata