Exemple #1
0
 def test_readjsondictrequiredkeypresent(self):
     with tempdir() as dirname:
         bd = DataDir(dirname)
         wd = {'a': 1, 'b': [1, 2, 3], 'c': 'k'}
         bd.write_jsondict('test1.json', wd)
         rd = bd.read_jsondict('test1.json', requiredkeys=('a', 'c'))
         self.assertDictEqual(wd, rd)
Exemple #2
0
def create_testbasedatadir(filename='test.json', datadict=None):
    if datadict is None:
        datadict = {'a': 1}
    with tempdir() as dirname:
        bdddirname = Path(dirname) / 'data.bd'
        bdddirname.mkdir()
        bdd = DataDir(bdddirname)
        bdd._write_jsondict(filename, datadict)
        yield bdd
Exemple #3
0
 def test_readjsondictrequiredkeynotpresent(self):
     with tempdir() as dirname:
         bd = DataDir(dirname)
         wd = {'a': 1, 'b': [1, 2, 3], 'c': 'k'}
         bd.write_jsondict('test1.json', wd)
         self.assertRaises(ValueError,
                           bd.read_jsondict,
                           'test1.json',
                           requiredkeys=('a', 'd'))
Exemple #4
0
 def __init__(self, path, tablekey=None):
     self.datadir = DataDir(path=path)
     if tablekey is not None:
         self._seqfile = f'{self._seqfile.rsplit( ".", 1 )[0]}_{tablekey}.csv'
         self._infofile = f'{self._infofile.rsplit(".", 1)[0]}_{tablekey}.json'
     if not (self.datadir.path / self._seqfile).exists():
         raise IOError(f"cannot find table file '{self._seqfile}'")
     if not (self.datadir.path / self._infofile).exists():
         raise IOError(f"cannot find table file '{self._infofile}'")
     d = self.datadir.read_jsondict(self._infofile)
     self.fs = d['fs']
     self.nframes = d['nframes']
     self.duration = self.nframes / self.fs
     self.nsnds = d['nsnds']
     self.tablekey = tablekey
Exemple #5
0
 def test_writejsondictincorrectinput(self):
     with tempdir() as dirname:
         bd = DataDir(dirname)
         with self.assertRaises(TypeError):
             bd.write_jsondict('test1.json', 3)
         with self.assertRaises(TypeError):
             bd.write_jsondict('test1.json', 'a')
Exemple #6
0
 def test_writetxt(self):
     with tempdir() as dirname:
         bd = DataDir(dirname)
         bd.write_txt('test1.txt', 'hello')
         self.assertEqual(bd.read_txt('test1.txt'), 'hello')
Exemple #7
0
 def test_readjsondictnotdict(self):
     with tempdir() as dirname:
         bd = DataDir(dirname)
         bd.write_jsonfile('test1.json', [1, 2, 3])
         self.assertRaises(TypeError, bd.read_jsondict, 'test1.json')
Exemple #8
0
 def test_updatejsondictcorrect(self):
     with tempdir() as dirname:
         bd = DataDir(dirname)
         bd.write_jsondict('test1.json', {'a': 1})
         bd.update_jsondict('test1.json', {'a': 2, 'b': 3})
Exemple #9
0
class SndDict:
    """Disk-based dictionary of sounds.

    Essentially all wav files in a directory. They need to have the lowercase '.wav'
    extension and we work with float32 files only. The keys are simply the filenames
    without the .wav extension, the items are Snd objects. The directory contains a
    json file that provides some info on the sounds for efficiency.

    """
    _classid = 'SndDict'
    _infofile = 'snddict.json'

    def __init__(self, path):
        self.datadir = DataDir(path=path)
        if not (self.datadir.path / self._infofile).exists():
            self._updateinfofile()

    def __getitem__(self, item):
        return wavread(self.datadir.path / f'{item}.wav')

    def __str__(self):
        return f'{self._classid}: {self.datadir.path.name} {self.keys()}'

    def __len__(self):
        return len(self.keys())

    __repr__ = __str__

    def _updateinfofile(self):
        keys = sorted([file[:-4] for file in os.listdir(self.datadir.path) if file.endswith(".wav")])
        d = {}
        for key in keys:
            snd = self[key]
            d[key] = {}
            d[key]['fs']= snd.fs
            d[key]['nchannels'] = snd.nchannels
            d[key]['nframes'] = snd.nframes
            d[key]['duration'] = snd.duration
        self.datadir.write_jsondict(self._infofile, d, overwrite=True)

    def info(self):
        return self.datadir.read_jsondict(self._infofile)

    def add(self, key, snd, overwrite=False):
        if not isinstance(snd, (Snd, DarrSnd)):
            raise TypeError(f'cannot add object of type {type(snd)} to {self._classid}')
        if key in self.keys() and not overwrite:
            raise ValueError(f'SndDict already contains a Snd wih key {key}, use the pop method to remove first')
        snd.to_wav(self.datadir.path / f'{key}.wav', dtype='float32', overwrite=overwrite)
        d = self.datadir.read_jsondict(self._infofile)
        d[key] = {}
        d[key]['fs'] = snd.fs
        d[key]['nchannels'] = snd.nchannels
        d[key]['nframes'] = snd.nframes
        d[key]['duration'] = snd.duration
        self.datadir.write_jsondict(self._infofile, d, overwrite=True)

    def keys(self):
        return sorted(self.info().keys())

    def items(self):
        for key in self.keys():
            yield key, self[key]

    def pop(self, key):
        if not key in self.keys():
            raise ValueError(f"Snd {key} does not exist in SndDict")
        (self.datadir.path / f'{key}.wav').unlink()
        d = self.datadir.read_jsondict(self._infofile)
        d.pop(key)
        self.datadir.write_jsondict(self._infofile, d, overwrite=True)

    def allsame(self):
        """Tests if attributes of all sounds are the same (fs, nchannels, nframes, duration).

        """
        d = {'fs': [],
             'nchannels': [],
             'nframes': [],
             'duration': []}

        for sndkey, values in self.info().items():
            d['fs'].append(values['fs'])
            d['nchannels'].append(values['nchannels'])
            d['nframes'].append(values['nframes'])
            d['duration'].append(values['duration'])
        s = {}
        for key, values in d.items():
            s[key] = all(val == values[0] for val in values)
        return s

    def nframes(self):
        return {key: snd.nframes for key,snd in self.items()}

    def read(self):
        """reads every in memory and returns a dictionary with Snd objects"""
        return {key: snd for (key, snd) in self.items()}
Exemple #10
0
 def test_writejsondictcorrectinput(self):
     with tempdir() as dirname:
         bd = DataDir(dirname)
         bd.write_jsondict('test1.json', {'a': 1})
Exemple #11
0
 def test_nonexistingpath(self):
     with self.assertRaises(OSError):
         DataDir("lkjhlkihlkblhhhgdhg")  # assume that doesn't exist
Exemple #12
0
 def test_deleteprotectedfile(self):
     with tempdir() as dirname:
         bd = DataDir(dirname, protectedpaths=('test.dat', ))
         self.assertRaises(OSError, bd.delete_files, (('test.dat', )))
Exemple #13
0
 def test_protectedfiles(self):
     with tempdir() as dirname:
         bd = DataDir(dirname, protectedpaths=('test.dat', ))
         self.assertEqual(bd.protectedfiles, set(('test.dat', )))
Exemple #14
0
 def test_deletefiles(self):
     with tempdir() as dirname:
         bd = DataDir(dirname)
         bd.write_txt('test1.txt', 'hello')
         bd.write_txt('test2.txt', 'hello')
         bd.delete_files(('test1.txt', 'test2.txt', 'test3.txt'))
Exemple #15
0
class SndSeq:

    _classid = 'SndSeq'
    _seqfile = 'sndseq.csv'
    _infofile = 'sndseq.json'

    def __init__(self, path, tablekey=None):
        self.datadir = DataDir(path=path)
        if tablekey is not None:
            self._seqfile = f'{self._seqfile.rsplit( ".", 1 )[0]}_{tablekey}.csv'
            self._infofile = f'{self._infofile.rsplit(".", 1)[0]}_{tablekey}.json'
        if not (self.datadir.path / self._seqfile).exists():
            raise IOError(f"cannot find table file '{self._seqfile}'")
        if not (self.datadir.path / self._infofile).exists():
            raise IOError(f"cannot find table file '{self._infofile}'")
        d = self.datadir.read_jsondict(self._infofile)
        self.fs = d['fs']
        self.nframes = d['nframes']
        self.duration = self.nframes / self.fs
        self.nsnds = d['nsnds']
        self.tablekey = tablekey

    def __str__(self):
        return f'{self._classid}: {self.datadir.path.name} <{self.nsnds} snds, {duration_string(self.duration)} >'

    __repr__ = __str__

    def seqtable(self):
        dtypes = {
            'snd': 'str',
            'startframe': 'int64',
            'endframe': 'int64',
            'starttime': 'float64',
            'endtime': 'float64'
        }
        return SndSeqTable(
            pd.read_csv(self.datadir.path / self._seqfile).astype(dtypes))

    def add_calibmarks_3sweeps(self,
                               startfreq=500.,
                               endfreq=1000.,
                               chirpduration=0.2,
                               rampduration=1e-3,
                               silentinterval=30.,
                               rms=0.2,
                               overwrite=False):
        c = calibmark_3sweeps(startfreq=startfreq,
                              endfreq=endfreq,
                              chirpduration=chirpduration,
                              silenceduration=0.1,
                              rampduration=rampduration,
                              fs=self.fs,
                              rmsamp=rms)
        snddict = SndDict(self.datadir.path)
        snddict.add('calibmark', c, overwrite=overwrite)
        seqtable = self.seqtable()
        c1 = pd.DataFrame({
            'startframe': [0],
            'snd': ['calibmark'],
            'endframe': [c.nframes]
        })
        nframes_silence = int(round(silentinterval * self.fs))
        seqtable['startframe'] += c.nframes + nframes_silence
        seqtable['endframe'] += c.nframes + nframes_silence
        startframe = int(seqtable['endframe'].iloc[-1]) + nframes_silence
        endframe = startframe + c.nframes
        c2 = pd.DataFrame({
            'startframe': [startframe],
            'snd': ['calibmark'],
            'endframe': [endframe]
        })
        seqtable = pd.concat([c1, seqtable, c2])
        seqtable['starttime'] = seqtable['startframe'] / float(self.fs)
        seqtable['endtime'] = seqtable['endframe'] / float(self.fs)
        snddict.datadir.write_jsondict(filename=self._seqfile,
                                       d={
                                           'fs': self.fs,
                                           'nframes': endframe,
                                           'nsnds': len(seqtable)
                                       },
                                       overwrite=True)
        seqtablepath = snddict.datadir.path / self._seqfile
        time.sleep(2.)  # needed for windows
        seqtablepath.unlink()
        cols = ['snd', 'startframe', 'endframe', 'starttime', 'endtime']
        additionalcols = [
            col for col in list(seqtable.columns) if col not in cols
        ]
        seqtable.to_csv(seqtablepath,
                        index=False,
                        columns=cols + additionalcols)

    def timetransform_fromrecording(self, snd, lookduration=None):
        """Find a linear function that transforms the timing of sounds in the sequence
        to timing of the sounds in a recording of the sequence.

        We assume that there is an offset (i.e. sequence starts somewhere in the
        recording) and that there may be small differences between the provided sampling
        rates, and the actual ones in playback and recording devices. This method uses
        crosscorrelation and finds the first and the last sounds in the recording.

        """
        if not self.fs == snd.fs:
            raise ValueError("snd does not have same fs as sndseq")
        origtable = self.seqtable()
        s1name = origtable.iloc[0]['snd']
        s2name = origtable.iloc[-1]['snd']
        s1 = wavread(self.datadir.path / f'{s1name}.wav')
        s2 = wavread(self.datadir.path / f'{s2name}.wav')
        if lookduration is None:
            lookduration = 0.2 * self.duration
        looknframes = int(round(lookduration * snd.fs))
        for nframes in (s1.nframes, s2.nframes):
            if not looknframes >= nframes:
                raise ValueError(
                    "lookduration should be longer than duration target sound")
        cc = np.correlate(snd.frames[:looknframes, 0],
                          s1.frames[:, 0],
                          mode='valid')
        i1 = np.absolute(cc.argmax())
        t1 = i1 / float(snd.fs)
        # calibmark at end
        cc = np.correlate(snd.frames[-looknframes:, 0],
                          s2.frames[:, 0],
                          mode='valid')
        i2 = (np.absolute(cc.argmax()) + snd.nframes - looknframes)
        t2 = i2 / float(snd.fs)
        ot1 = origtable.iloc[0]['starttime']
        ot2 = origtable.iloc[-1]['starttime']
        slope = (t2 - t1) / (ot2 - ot1)
        offset = t1 - slope * ot1
        return slope, offset

    def seqtable_fromrecording(self, snd, lookduration=None):
        slope, offset = self.timetransform_fromrecording(
            snd=snd, lookduration=lookduration)
        seqtable = self.seqtable()
        seqtable['starttime'] = slope * seqtable['starttime'] + offset
        seqtable['endtime'] = slope * seqtable['endtime'] + offset
        seqtable['startframe'] = np.round(seqtable['starttime'] *
                                          self.fs).astype('int64')
        seqtable['endframe'] = (np.round(seqtable['endtime'] *
                                         self.fs)).astype('int64')
        return seqtable

    def to_snd(self, dtype='float32'):
        snddict = SndDict(self.datadir.path).read()
        seqtable = self.seqtable()
        nframes = seqtable['endframe'].iloc[-1]
        nchannels = snddict[seqtable['snd'][0]].nchannels
        fs = snddict[seqtable['snd'][0]].fs
        ar = np.zeros((nframes, nchannels), dtype=dtype)
        tier = Tier()
        for index, row in seqtable.iterrows():
            snd = snddict[row['snd']]
            startframe = row['startframe']
            endframe = row['endframe']
            ar[startframe:endframe] = snd.frames
            interval = Interval()
            interval.xmin = row['starttime']
            interval.xmax = row['endtime']
        return Snd(frames=ar, fs=fs)

    def to_textgrid(self, filepath):
        return self.seqtable().to_textgrid(filepath=filepath)
Exemple #16
0
 def test_writetxtdonotoverwrite(self):
     with tempdir() as dirname:
         bd = DataDir(dirname)
         bd.write_txt('test1.txt', 'hello')
         self.assertRaises(OSError, bd._write_txt, 'test1.txt', 'hello')
Exemple #17
0
 def test_writetxtoverwrite(self):
     with tempdir() as dirname:
         bd = DataDir(dirname)
         bd.write_txt('test1.txt', 'hello')
         bd.write_txt('test1.txt', 'hello', overwrite=True)
Exemple #18
0
 def __init__(self, path):
     self.datadir = DataDir(path=path)
     if not (self.datadir.path / self._infofile).exists():
         self._updateinfofile()