def test_readjsondictrequiredkeypresent(self): with tempdir() as dirname: bd = DataDir(dirname) wd = {'a': 1, 'b': [1, 2, 3], 'c': 'k'} bd.write_jsondict('test1.json', wd) rd = bd.read_jsondict('test1.json', requiredkeys=('a', 'c')) self.assertDictEqual(wd, rd)
def create_testbasedatadir(filename='test.json', datadict=None): if datadict is None: datadict = {'a': 1} with tempdir() as dirname: bdddirname = Path(dirname) / 'data.bd' bdddirname.mkdir() bdd = DataDir(bdddirname) bdd._write_jsondict(filename, datadict) yield bdd
def test_readjsondictrequiredkeynotpresent(self): with tempdir() as dirname: bd = DataDir(dirname) wd = {'a': 1, 'b': [1, 2, 3], 'c': 'k'} bd.write_jsondict('test1.json', wd) self.assertRaises(ValueError, bd.read_jsondict, 'test1.json', requiredkeys=('a', 'd'))
def __init__(self, path, tablekey=None): self.datadir = DataDir(path=path) if tablekey is not None: self._seqfile = f'{self._seqfile.rsplit( ".", 1 )[0]}_{tablekey}.csv' self._infofile = f'{self._infofile.rsplit(".", 1)[0]}_{tablekey}.json' if not (self.datadir.path / self._seqfile).exists(): raise IOError(f"cannot find table file '{self._seqfile}'") if not (self.datadir.path / self._infofile).exists(): raise IOError(f"cannot find table file '{self._infofile}'") d = self.datadir.read_jsondict(self._infofile) self.fs = d['fs'] self.nframes = d['nframes'] self.duration = self.nframes / self.fs self.nsnds = d['nsnds'] self.tablekey = tablekey
def test_writejsondictincorrectinput(self): with tempdir() as dirname: bd = DataDir(dirname) with self.assertRaises(TypeError): bd.write_jsondict('test1.json', 3) with self.assertRaises(TypeError): bd.write_jsondict('test1.json', 'a')
def test_writetxt(self): with tempdir() as dirname: bd = DataDir(dirname) bd.write_txt('test1.txt', 'hello') self.assertEqual(bd.read_txt('test1.txt'), 'hello')
def test_readjsondictnotdict(self): with tempdir() as dirname: bd = DataDir(dirname) bd.write_jsonfile('test1.json', [1, 2, 3]) self.assertRaises(TypeError, bd.read_jsondict, 'test1.json')
def test_updatejsondictcorrect(self): with tempdir() as dirname: bd = DataDir(dirname) bd.write_jsondict('test1.json', {'a': 1}) bd.update_jsondict('test1.json', {'a': 2, 'b': 3})
class SndDict: """Disk-based dictionary of sounds. Essentially all wav files in a directory. They need to have the lowercase '.wav' extension and we work with float32 files only. The keys are simply the filenames without the .wav extension, the items are Snd objects. The directory contains a json file that provides some info on the sounds for efficiency. """ _classid = 'SndDict' _infofile = 'snddict.json' def __init__(self, path): self.datadir = DataDir(path=path) if not (self.datadir.path / self._infofile).exists(): self._updateinfofile() def __getitem__(self, item): return wavread(self.datadir.path / f'{item}.wav') def __str__(self): return f'{self._classid}: {self.datadir.path.name} {self.keys()}' def __len__(self): return len(self.keys()) __repr__ = __str__ def _updateinfofile(self): keys = sorted([file[:-4] for file in os.listdir(self.datadir.path) if file.endswith(".wav")]) d = {} for key in keys: snd = self[key] d[key] = {} d[key]['fs']= snd.fs d[key]['nchannels'] = snd.nchannels d[key]['nframes'] = snd.nframes d[key]['duration'] = snd.duration self.datadir.write_jsondict(self._infofile, d, overwrite=True) def info(self): return self.datadir.read_jsondict(self._infofile) def add(self, key, snd, overwrite=False): if not isinstance(snd, (Snd, DarrSnd)): raise TypeError(f'cannot add object of type {type(snd)} to {self._classid}') if key in self.keys() and not overwrite: raise ValueError(f'SndDict already contains a Snd wih key {key}, use the pop method to remove first') snd.to_wav(self.datadir.path / f'{key}.wav', dtype='float32', overwrite=overwrite) d = self.datadir.read_jsondict(self._infofile) d[key] = {} d[key]['fs'] = snd.fs d[key]['nchannels'] = snd.nchannels d[key]['nframes'] = snd.nframes d[key]['duration'] = snd.duration self.datadir.write_jsondict(self._infofile, d, overwrite=True) def keys(self): return sorted(self.info().keys()) def items(self): for key in self.keys(): yield key, self[key] def pop(self, key): if not key in self.keys(): raise ValueError(f"Snd {key} does not exist in SndDict") (self.datadir.path / f'{key}.wav').unlink() d = self.datadir.read_jsondict(self._infofile) d.pop(key) self.datadir.write_jsondict(self._infofile, d, overwrite=True) def allsame(self): """Tests if attributes of all sounds are the same (fs, nchannels, nframes, duration). """ d = {'fs': [], 'nchannels': [], 'nframes': [], 'duration': []} for sndkey, values in self.info().items(): d['fs'].append(values['fs']) d['nchannels'].append(values['nchannels']) d['nframes'].append(values['nframes']) d['duration'].append(values['duration']) s = {} for key, values in d.items(): s[key] = all(val == values[0] for val in values) return s def nframes(self): return {key: snd.nframes for key,snd in self.items()} def read(self): """reads every in memory and returns a dictionary with Snd objects""" return {key: snd for (key, snd) in self.items()}
def test_writejsondictcorrectinput(self): with tempdir() as dirname: bd = DataDir(dirname) bd.write_jsondict('test1.json', {'a': 1})
def test_nonexistingpath(self): with self.assertRaises(OSError): DataDir("lkjhlkihlkblhhhgdhg") # assume that doesn't exist
def test_deleteprotectedfile(self): with tempdir() as dirname: bd = DataDir(dirname, protectedpaths=('test.dat', )) self.assertRaises(OSError, bd.delete_files, (('test.dat', )))
def test_protectedfiles(self): with tempdir() as dirname: bd = DataDir(dirname, protectedpaths=('test.dat', )) self.assertEqual(bd.protectedfiles, set(('test.dat', )))
def test_deletefiles(self): with tempdir() as dirname: bd = DataDir(dirname) bd.write_txt('test1.txt', 'hello') bd.write_txt('test2.txt', 'hello') bd.delete_files(('test1.txt', 'test2.txt', 'test3.txt'))
class SndSeq: _classid = 'SndSeq' _seqfile = 'sndseq.csv' _infofile = 'sndseq.json' def __init__(self, path, tablekey=None): self.datadir = DataDir(path=path) if tablekey is not None: self._seqfile = f'{self._seqfile.rsplit( ".", 1 )[0]}_{tablekey}.csv' self._infofile = f'{self._infofile.rsplit(".", 1)[0]}_{tablekey}.json' if not (self.datadir.path / self._seqfile).exists(): raise IOError(f"cannot find table file '{self._seqfile}'") if not (self.datadir.path / self._infofile).exists(): raise IOError(f"cannot find table file '{self._infofile}'") d = self.datadir.read_jsondict(self._infofile) self.fs = d['fs'] self.nframes = d['nframes'] self.duration = self.nframes / self.fs self.nsnds = d['nsnds'] self.tablekey = tablekey def __str__(self): return f'{self._classid}: {self.datadir.path.name} <{self.nsnds} snds, {duration_string(self.duration)} >' __repr__ = __str__ def seqtable(self): dtypes = { 'snd': 'str', 'startframe': 'int64', 'endframe': 'int64', 'starttime': 'float64', 'endtime': 'float64' } return SndSeqTable( pd.read_csv(self.datadir.path / self._seqfile).astype(dtypes)) def add_calibmarks_3sweeps(self, startfreq=500., endfreq=1000., chirpduration=0.2, rampduration=1e-3, silentinterval=30., rms=0.2, overwrite=False): c = calibmark_3sweeps(startfreq=startfreq, endfreq=endfreq, chirpduration=chirpduration, silenceduration=0.1, rampduration=rampduration, fs=self.fs, rmsamp=rms) snddict = SndDict(self.datadir.path) snddict.add('calibmark', c, overwrite=overwrite) seqtable = self.seqtable() c1 = pd.DataFrame({ 'startframe': [0], 'snd': ['calibmark'], 'endframe': [c.nframes] }) nframes_silence = int(round(silentinterval * self.fs)) seqtable['startframe'] += c.nframes + nframes_silence seqtable['endframe'] += c.nframes + nframes_silence startframe = int(seqtable['endframe'].iloc[-1]) + nframes_silence endframe = startframe + c.nframes c2 = pd.DataFrame({ 'startframe': [startframe], 'snd': ['calibmark'], 'endframe': [endframe] }) seqtable = pd.concat([c1, seqtable, c2]) seqtable['starttime'] = seqtable['startframe'] / float(self.fs) seqtable['endtime'] = seqtable['endframe'] / float(self.fs) snddict.datadir.write_jsondict(filename=self._seqfile, d={ 'fs': self.fs, 'nframes': endframe, 'nsnds': len(seqtable) }, overwrite=True) seqtablepath = snddict.datadir.path / self._seqfile time.sleep(2.) # needed for windows seqtablepath.unlink() cols = ['snd', 'startframe', 'endframe', 'starttime', 'endtime'] additionalcols = [ col for col in list(seqtable.columns) if col not in cols ] seqtable.to_csv(seqtablepath, index=False, columns=cols + additionalcols) def timetransform_fromrecording(self, snd, lookduration=None): """Find a linear function that transforms the timing of sounds in the sequence to timing of the sounds in a recording of the sequence. We assume that there is an offset (i.e. sequence starts somewhere in the recording) and that there may be small differences between the provided sampling rates, and the actual ones in playback and recording devices. This method uses crosscorrelation and finds the first and the last sounds in the recording. """ if not self.fs == snd.fs: raise ValueError("snd does not have same fs as sndseq") origtable = self.seqtable() s1name = origtable.iloc[0]['snd'] s2name = origtable.iloc[-1]['snd'] s1 = wavread(self.datadir.path / f'{s1name}.wav') s2 = wavread(self.datadir.path / f'{s2name}.wav') if lookduration is None: lookduration = 0.2 * self.duration looknframes = int(round(lookduration * snd.fs)) for nframes in (s1.nframes, s2.nframes): if not looknframes >= nframes: raise ValueError( "lookduration should be longer than duration target sound") cc = np.correlate(snd.frames[:looknframes, 0], s1.frames[:, 0], mode='valid') i1 = np.absolute(cc.argmax()) t1 = i1 / float(snd.fs) # calibmark at end cc = np.correlate(snd.frames[-looknframes:, 0], s2.frames[:, 0], mode='valid') i2 = (np.absolute(cc.argmax()) + snd.nframes - looknframes) t2 = i2 / float(snd.fs) ot1 = origtable.iloc[0]['starttime'] ot2 = origtable.iloc[-1]['starttime'] slope = (t2 - t1) / (ot2 - ot1) offset = t1 - slope * ot1 return slope, offset def seqtable_fromrecording(self, snd, lookduration=None): slope, offset = self.timetransform_fromrecording( snd=snd, lookduration=lookduration) seqtable = self.seqtable() seqtable['starttime'] = slope * seqtable['starttime'] + offset seqtable['endtime'] = slope * seqtable['endtime'] + offset seqtable['startframe'] = np.round(seqtable['starttime'] * self.fs).astype('int64') seqtable['endframe'] = (np.round(seqtable['endtime'] * self.fs)).astype('int64') return seqtable def to_snd(self, dtype='float32'): snddict = SndDict(self.datadir.path).read() seqtable = self.seqtable() nframes = seqtable['endframe'].iloc[-1] nchannels = snddict[seqtable['snd'][0]].nchannels fs = snddict[seqtable['snd'][0]].fs ar = np.zeros((nframes, nchannels), dtype=dtype) tier = Tier() for index, row in seqtable.iterrows(): snd = snddict[row['snd']] startframe = row['startframe'] endframe = row['endframe'] ar[startframe:endframe] = snd.frames interval = Interval() interval.xmin = row['starttime'] interval.xmax = row['endtime'] return Snd(frames=ar, fs=fs) def to_textgrid(self, filepath): return self.seqtable().to_textgrid(filepath=filepath)
def test_writetxtdonotoverwrite(self): with tempdir() as dirname: bd = DataDir(dirname) bd.write_txt('test1.txt', 'hello') self.assertRaises(OSError, bd._write_txt, 'test1.txt', 'hello')
def test_writetxtoverwrite(self): with tempdir() as dirname: bd = DataDir(dirname) bd.write_txt('test1.txt', 'hello') bd.write_txt('test1.txt', 'hello', overwrite=True)
def __init__(self, path): self.datadir = DataDir(path=path) if not (self.datadir.path / self._infofile).exists(): self._updateinfofile()