Ejemplo n.º 1
0
    def run(self):
        speech_dir = 'E:\\alon_emanuel_drive\Downloads' \
            '\Audio_Speech_Actors_01-24'
        song_dir = 'E:\\alon_emanuel_drive\Downloads\Audio_Song_Actors_01-24'
        good = 0
        bad = 0

        for dir in os.listdir(speech_dir):
            for file in tqdm(os.listdir(os.path.join(speech_dir, dir))):
                file = os.path.join(speech_dir, dir, file)
                try:
                    wavo = wavio.read(file)
                except:
                    bad += 1
                else:
                    good += 1

        for dir in os.listdir(song_dir):
            for file in tqdm(os.listdir(os.path.join(song_dir, dir))):
                file = os.path.join(song_dir, dir, file)
                try:
                    wavo = wavio.read(file)
                except:
                    bad += 1
                else:
                    good += 1

        print(f'Good: {good}')
        print(f'Bad: {bad}')
Ejemplo n.º 2
0
 def __init__(self, file, wind=400):
     self.directory = file
     self.sound = sa.WaveObject.from_wave_file(file)
     self.rate = wavio.read(file).rate
     self._raw = np.ravel(wavio.read(file).data)
     self.data = normalize_level(self._raw)
     # print(type(self.data))
     self.envelope = windowed_avg(self.data, wind)
     self.spectrum = np.abs(np.fft.rfft(self.data, n=21654))
Ejemplo n.º 3
0
def get_input_processed_pair(model, file_info, batch, total_batches):
    # Get the clean file path by searching through the file's parent directory.
    clean_path = None
    for possibly_clean_file in os.listdir(file_info.directory):
        if possibly_clean_file.endswith("_clean.wav"):
            clean_path = os.path.join(file_info.directory, possibly_clean_file)

    # Load both the clean and distorted files.
    file_object = wavio.read(clean_path)

    #Calculate bit depth for normalization
    bitdepth_divisor = float(2**(file_object.sampwidth*8 - 1))
    x = file_object.data
    y = wavio.read(file_info.global_path).data

    assert (x.shape[1] == y.shape[1]), "Clean file has different number of channels than non-clean file."
    num_channels = x.shape[1]

    # Pad the data.
    x = np.pad(x, ((0, x.shape[0] % model.frame_size), (0, 0)), 'constant', constant_values=(0, 0))
    y = np.pad(y, ((0, y.shape[0] % model.frame_size), (0, 0)), 'constant', constant_values=(0, 0))
    # Pad clean beginning to match receptive field.
    x = np.pad(x, ((model.R, 0), (0, 0)), 'constant', constant_values=(0, 0))

    # Take the batch. Use the distorted data shape since it hasn't been
    # padded with the receptive field.
    start = int((batch / total_batches) * y.shape[0])
    end = int(((batch + 1.0) / total_batches) * y.shape[0])
    y = y[start:end,...]
    x = x[start:end+model.R,...]

    # Normalize to [-1.0, 1.0]
    y = y.astype(np.float32, order='C') / bitdepth_divisor
    x = x.astype(np.float32, order='C') / bitdepth_divisor

    # Dimension 0 is the number of frame_sizes that fit in x[0] - receptive field samples
    # Dimension 1 is framesize + receptive field (what is necessary for lookback on a given sample)
    # Dimension 2 is channels
    new_x_shape = (int((x.shape[0]-model.R)/model.frame_size), model.frame_size + model.R, num_channels)

    # stride specifies how many bits we have to move in each dimension for new view
    xstride = (model.frame_size*4*num_channels, 4*num_channels, 4)
    x = np.lib.stride_tricks.as_strided(x, new_x_shape, xstride)


    # Dimension 0 is the number of frame_sizes that fit in y[0] samples
    # Dimension 1 is framesize  (what is necessary for lookback on a given sample)
    # Dimension 2 is left and right channels
    new_y_shape = (int((y.shape[0])/model.frame_size), model.frame_size, num_channels)
    ystride = (model.frame_size*4*num_channels, 4*num_channels, 4)
    y = np.lib.stride_tricks.as_strided(y, new_y_shape, ystride)

    return x, y
Ejemplo n.º 4
0
def extract(filename):
    """ 返回给定文件的 mfcc 特征 """
    wavobj = wavio.read(filename)
    fs = wavobj.rate
    signal = wavobj.data
    mfcc = MFCC.extract(fs, signal)
    return mfcc
def get_audio(audio_path, is_crop=True):
    try:
        # pdb.set_trace()
        wave_obj = wavio.read(audio_path)
        rate = wave_obj.rate
        sig = np.squeeze(wave_obj.data)
        # (rate,sig) = wav.read(ad)
    except TypeError:
        # print(ad)
        (rate,sig) = wav.read(audio_path)
    # only short than 10 seconds
    # if np.shape(sig)[0]/float(rate) > 10:
    #     sig = sig[0:rate*10]

    # Mel-filter bank
    sig = sig - np.mean(sig)
    fbank_feat = logfbank(sig, rate, winlen=0.025,\
        winstep=0.01,nfilt=40,nfft=512,lowfreq=0,highfreq=None,preemph=0.97)
    
    if is_crop:
        if fbank_feat.shape[0] < 1024:
            # pdb.set_trace()
            zero_pad = np.zeros((1024-fbank_feat.shape[0], 40))
            fbank_feat = np.concatenate([fbank_feat, zero_pad], 0)
        else:
            fbank_feat = fbank_feat[:1024]

    return fbank_feat
Ejemplo n.º 6
0
def showNoiseEnergies():
    import pylab as pl
    import SignalProc
    #sp = SignalProc.SignalProc(data5, sampleRate)
    pl.ion()
    tbd = [0, 1, 3, 7, 15, 31]
    #tbd = np.concatenate([np.arange(30),np.arange(50,63)])
    #tbd = np.arange(50)
    listnodes = np.arange(63)
    listnodes = np.delete(listnodes, tbd)

    for root, dirs, files in os.walk(
            str('Sound Files/Noise examples/Noise_10s')):
        for filename in files:
            if filename.endswith('.wav'):
                filename = root + '/' + filename
                wavobj = wavio.read(filename)
                sampleRate = wavobj.rate
                data = wavobj.data
                if data.dtype is not 'float':
                    data = data.astype('float')  # / 32768.0
                if np.shape(np.shape(data))[0] > 1:
                    data = np.squeeze(data[:, 0])

                pl.figure()
                e1 = WaveletSegment.computeWaveletEnergy_1s(data, 'dmey2')
                pl.plot(e1[listnodes])
                pl.title(filename)
Ejemplo n.º 7
0
def testFeatures():
    import wavio
    wavobj = wavio.read('Sound Files/tril1.wav')
    fs = wavobj.rate
    data = wavobj.data

    if data.dtype is not 'float':
        data = data.astype('float') # / 32768.0

    if np.shape(np.shape(data))[0] > 1:
        data = data[:, 0]

    sp = SignalProc.SignalProc(sampleRate=fs, window_width=256, incr=128)
    # The next lines are to get a spectrogram that *should* precisely match the Raven one
    #sg = sp.spectrogram(data, multitaper=False, window_width=256, incr=128, window='Ones')
    #sg = sg ** 2
    sg = sp.spectrogram(data, multitaper=False, window_width=256, incr=128, window='Hann')

    f = Features(data,fs,256,128)

    features = []
    # Loop over the segments (and time slices within?)
    features.append([f.get_Raven_spectrogram_measurements(sg=sg,fs=fs,window_width=256,f1=0,f2=np.shape(sg)[1],t1=0,t2=np.shape(sg)[0]),f.get_Raven_robust_measurements(sg,fs,0,np.shape(sg)[1],0,np.shape(sg)[0]),f.get_Raven_waveform_measurements(data,fs,0,len(data)),f.wiener_entropy(sg)])

    # Will need to think about feature vector length for the librosa features, since they are on fixed windows
    f.get_chroma()
    f.get_mfcc()
    f.get_tonnetz()
    f.get_spectral_features()
    f.get_lpc(data,order=44)
Ejemplo n.º 8
0
def read(file, dtype='float'):
    wave = wavio.read(file)
    assert wave.sampwidth == 2 and wave.rate == constants.FRAMERATE

    left = wave.data[:, 0].astype(dtype)
    right = wave.data[:, 1].astype(dtype) if wave.data.shape[1] > 1 else None
    return left, right
Ejemplo n.º 9
0
    def loadData(self,fName,trainTest=True):
        # Load data
        filename = fName+'.wav' #'train/kiwi/train1.wav'
        filenameAnnotation = fName+'-sec.txt'#'train/kiwi/train1-sec.xlsx'
        try:
            wavobj = wavio.read(filename)
        except:
            print("unsupported file: ", filename)
            pass
        self.sampleRate = wavobj.rate
        self.data = wavobj.data
        if self.data.dtype is not 'float':
            self.data = self.data.astype('float') #/ 32768.0
        if np.shape(np.shape(self.data))[0]>1:
            self.data = np.squeeze(self.data[:,0])
        n=int(np.ceil(len(self.data)/self.sampleRate))

        if trainTest==True:     #survey data don't have annotations
            # Get the segmentation from the txt file
            import csv
            self.annotation = []
            count = 0
            with open(filenameAnnotation) as f:
                reader = csv.reader(f, delimiter="\t")
                d = list(reader)
            for row in range(0,n):
                self.annotation.append(int(d[row][1]))
Ejemplo n.º 10
0
def testFeatures():
    wavobj = wavio.read('D:\AviaNZ\Sound_Files\Denoising_paper_data\Primary_dataset\kiwi\male\male1.wav')
    fs = wavobj.rate
    data = wavobj.data

    if data.dtype is not 'float':
        data = data.astype('float')         # / 32768.0

    if np.shape(np.shape(data))[0] > 1:
        data = data[:, 0]

    sp = SignalProc.SignalProc(sampleRate=fs, window_width=256, incr=128)
    # The next lines are to get a spectrogram that *should* precisely match the Raven one
    #sg = sp.spectrogram(data, multitaper=False, window_width=256, incr=128, window='Ones')
    #sg = sg ** 2
    sg = sp.spectrogram(data, sgType='Standard',window_width=256, incr=128, window='Hann')

    f = Features(data, fs, 256, 128)

    features = []
    # Loop over the segments (and time slices within?)
    mfcc = f.get_mfcc().tolist()
    # features.append(mfcc.tolist())
    we = f.get_WE()
    we = we.transpose().tolist()
    # how to combine features with different resolution?

    features.append([f.get_Raven_spectrogram_measurements(sg=sg,fs=fs,window_width=256,f1=0,f2=np.shape(sg)[1],t1=0,t2=np.shape(sg)[0]),f.get_Raven_robust_measurements(sg,fs,0,np.shape(sg)[1],0,np.shape(sg)[0]),f.get_Raven_waveform_measurements(data,fs,0,len(data)),f.wiener_entropy(sg)])

    # Will need to think about feature vector length for the librosa features, since they are on fixed windows
    f.get_chroma()
    f.get_mfcc()
    f.get_tonnetz()
    f.get_spectral_features()
    f.get_lpc(data,order=44)
Ejemplo n.º 11
0
def read(filename, limit=None):
    try:
        audiofile = AudioSegment.from_file(filename)

        if limit:
            audiofile = audiofile[:limit * 1000]

        data = np.fromstring(audiofile._data, np.int16)

        channels = []
        for chn in range(audiofile.channels):
            channels.append(data[chn::audiofile.channels])

        frame_rate = audiofile.frame_rate

    except audioop.error:

        wav = wavio.read(filename)
        audiofile = wav.data

        if limit:
            audiofile = audiofile[:limit * 1000]

        audiofile = audiofile.T
        audiofile = audiofile.astype(np.int16)

        channels = []
        for chn in audiofile:
            channels.append(chn)

        frame_rate = wav.rate

    return channels, frame_rate, unique_hash(filename)
Ejemplo n.º 12
0
def read_wav(filename):
    """
    Reads a wav file and returns the associated data

    Parameters
    ----------
    filename : string
        The name of the wav file you want to read

    Returns
    -------
    rate
        The sampling rate of the wav
    length
        The number of samples in each channel
    resolution
        The number of bytes per sample
    nc
        The number of sound channels
    x
        [length, nc]-dimension array containing the wav data
    """

    wave = wavio.read(filename)
    rate = wave.rate
    length = wave.data.shape[0]
    resolution = wave.sampwidth
    nc = wave.data.shape[1]
    x = wave.data

    return [rate, length, resolution, nc, x]
Ejemplo n.º 13
0
    def test4(self):
        path = tempfile.mkdtemp()
        filename = os.path.join(path, "test4data.wav")
        data = np.zeros(32, dtype=np.int16)
        data[1::4] = 10000
        data[3::4] = -10000

        wavio.write(filename, data, 44100, sampwidth=1)
        try:
            f = wave.open(filename, 'r')
            self.assertEqual(f.getnchannels(), 1)
            self.assertEqual(f.getsampwidth(), 1)
            self.assertEqual(f.getframerate(), 44100)
            f.close()

            w = wavio.read(filename)
            self.assertEqual(w.rate, 44100)
            self.assertEqual(w.sampwidth, 1)
            self.assertEqual(w.data.dtype, np.uint8)
            self.assertEqual(w.data.shape, (32, 1))
            expected = 128*np.ones_like(data, dtype=np.uint8).reshape(-1, 1)
            expected[1::4, 0] = 255
            expected[3::4, 0] = 0
            np.testing.assert_equal(w.data, expected)
        finally:
            os.remove(filename)
            os.removedirs(path)
Ejemplo n.º 14
0
def open_wavefile(filename, target_rms=.01):
    print "READING: " + filename
    try:
        wav = wavio.read(filename)
        fs, wavefile = wav.rate, wav.data
        # [fs, width, wavefile] = readwav(filename)
    except:
        print traceback.format_exc()
        print "ERROR: could not read file"
        sys.exit(1)

    x = np.array(wavefile, dtype=float)

    x = x * (2**-15)  # normalizing to match MATLAB double representation
    # print "\nfirst few samples of x:\n", x[0:5,:]
    dim = x.shape
    num_chan = 1

    # normalize
    if len(dim) > 1:
        num_chan = dim[1]
        for c in range(0, num_chan):
            rms = np.sqrt(np.mean(np.square(x[:, c])))
            x[:, c] = 1. * x[:, c] / rms * target_rms + np.random.rand(x.shape[
                0]) * 1e-20  # adding noise for files with fake zero data
    else:
        # x = x[0:140000] # debug: comment out
        rms = np.sqrt(np.mean(np.square(x)))
        x = 1. * x / rms * target_rms + np.random.rand(
            x.shape[0]) * 1e-20  # adding noise for files with fake zero data
    num_frames = x.shape[0]

    print "\tsample rate: ", fs, "\n\t# samples: ", num_frames, "\n\t# channels: ", num_chan

    return x, fs, num_frames
Ejemplo n.º 15
0
def open_wavefile(filename, target_rms=.01):
	print "READING: " + filename
	try:
		wav = wavio.read(filename)
		fs, wavefile = wav.rate, wav.data
		# [fs, width, wavefile] = readwav(filename)
	except:
		print traceback.format_exc()
		print "ERROR: could not read file"
		sys.exit(1)

	x = np.array(wavefile, dtype=float)

	x = x*(2**-15) # normalizing to match MATLAB double representation
	# print "\nfirst few samples of x:\n", x[0:5,:]
	dim = x.shape
	num_chan = 1

	# normalize
	if len(dim) > 1:
		num_chan = dim[1]
		for c in range(0, num_chan):
			rms = np.sqrt(np.mean(np.square(x[:, c])))
			x[:, c] = 1. * x[:, c] / rms * target_rms + np.random.rand(x.shape[0])*1e-20 # adding noise for files with fake zero data
	else:
		# x = x[0:140000] # debug: comment out
		rms = np.sqrt(np.mean(np.square(x)))
		x = 1. * x / rms * target_rms + np.random.rand(x.shape[0])*1e-20 # adding noise for files with fake zero data
	num_frames = x.shape[0]

	print "\tsample rate: ", fs, "\n\t# samples: ", num_frames, "\n\t# channels: ", num_chan

	return x, fs, num_frames
Ejemplo n.º 16
0
    def __init__(self, file=""):
        """Read the audio file and save all the important data"""

        self.fileName = file

        if file != "":
            self.wav = wavio.read(file)

            # framerate
            self.fs = self.wav.rate

            # number of bytes per sample
            self.bytes = self.wav.sampwidth

            # track data
            self.data = np.array(self.wav.data)

            #track data as float
            self.floatData = int_to_float(self.data, self.bytes)

            # dimensions of the data which is (nSamples, nChannels)
            # nSamples: number of samples in a file
            # nChannels: number of channels, 1 - mono, 2 - stereo
            (self.nSamples, self.nChannels) = np.shape(self.data)

            # length of a file in seconds
            self.length = self.nSamples / self.fs
Ejemplo n.º 17
0
def ExtractCompressedAudioNTS(fileName):
    # Not Thread Safe!!

    os.system('mkdir -p temp')
    tempWav = 'temp/temp.wav'
    tempRaw = 'temp/temp.raw'

    sampleRate = 48000

    if True:
        cmd = '%s -y -i "%s" -ac 1 -ar %d %s' % (
            ffmpegEXE, fileName, sampleRate, tempWav)
        print(cmd)
        sys.stdout.flush()
        os.system(cmd)
        dw = wavio.read(tempWav)
        # sampleRate=dw.rate

    cmd = '%s -y -i "%s" -f f32le -c:a pcm_f32le -ac 1 -ar %d %s' % (
        ffmpegEXE, fileName, sampleRate, tempRaw)
    print(cmd)
    sys.stdout.flush()
    os.system(cmd)

    data = numpy.fromfile(tempRaw, dtype=numpy.dtype('<f'))
    return (data, sampleRate)
Ejemplo n.º 18
0
def create_dataset(src_path, esc50_dst_path, esc10_dst_path):
    print('* {} -> {}'.format(src_path, esc50_dst_path))
    print('* {} -> {}'.format(src_path, esc10_dst_path))
    esc10_classes = [0, 10, 11, 20, 38, 21, 40, 41, 1, 12]  # ESC-10 is a subset of ESC-50
    esc50_dataset = {}
    esc10_dataset = {}

    for fold in range(1, 6):
        esc50_dataset['fold{}'.format(fold)] = {}
        esc50_sounds = []
        esc50_labels = []
        esc10_dataset['fold{}'.format(fold)] = {}
        esc10_sounds = []
        esc10_labels = []

        #wavを読み込み
        for wav_file in sorted(glob.glob(os.path.join(src_path, '{}-*.wav'.format(fold)))):
            sound = wavio.read(wav_file).data.T[0]
            start = sound.nonzero()[0].min()
            end = sound.nonzero()[0].max()
            sound = sound[start: end + 1]  # Remove silent sections
            label = int(os.path.splitext(wav_file)[0].split('-')[-1])
            esc50_sounds.append(sound)
            esc50_labels.append(label)
            if label in esc10_classes:
                esc10_sounds.append(sound)
                esc10_labels.append(esc10_classes.index(label))

        esc50_dataset['fold{}'.format(fold)]['sounds'] = esc50_sounds
        esc50_dataset['fold{}'.format(fold)]['labels'] = esc50_labels
        esc10_dataset['fold{}'.format(fold)]['sounds'] = esc10_sounds
        esc10_dataset['fold{}'.format(fold)]['labels'] = esc10_labels
    #保存形式は辞書->array->辞書->array
    np.savez(esc50_dst_path, **esc50_dataset)
    np.savez(esc10_dst_path, **esc10_dataset)
Ejemplo n.º 19
0
    def get_new_map(self, filename):

        self.wave = wavio.read(filename)
        self.left_channel = self.wave.data[:, 0]
        self.right_channel = self.wave.data[:, 1]

        return
    def load_data(wave_path):
        def normalize(x):
            m = np.max(np.abs(x))
            return x / m

        def tomono(x):
            return (x[:, 0] + x[:, 1]) / 2

        def downsample3(sig, Nwin=32):
            win = firwin(numtaps=Nwin, cutoff=0.55)
            new_sig = sig.copy()
            new_sig = np.convolve(new_sig, win, 'same')
            new_sig = new_sig[2::3]
            return new_sig

        def toint16(x):
            return np.int16(x * (2 ** 15))

        # Load data
        wavobj = read(wave_path)

        fs = wavobj.rate
        # Preprocess data
        waveform = wavobj.data.copy()
        waveform = normalize(waveform)
        if type == 'solo':
            waveform = waveform[:, 0]
        else:
            waveform = tomono(waveform)
        waveform = downsample3(waveform)
        waveform = toint16(waveform)
        if len(waveform.shape) == 1:
            waveform = np.reshape(waveform, [1, len(waveform)])
        return waveform, fs
Ejemplo n.º 21
0
def loadFile(filename):

    # Load any previous segments stored
    if os.path.isfile(filename + '.data'):
        file = open(filename + '.data', 'r')
        segments = json.load(file)
        file.close()
        if len(segments) > 0:
            if segments[0][0] == -1:
                del segments[0]
        else:
            return None, None, 0, 0, 0, 0
    else:
        return None, None, 0, 0, 0, 0

    if os.stat(filename).st_size != 0: # avoid files with no data (Tier 1 has 0Kb .wavs)
        wavobj = wavio.read(filename)

        # Parse wav format details based on file header:
        sampleRate = wavobj.rate
        audiodata = wavobj.data
        minFreq = 0
        maxFreq = sampleRate / 2.
        fileLength = wavobj.nframes

        if audiodata.dtype is not 'float':
            audiodata = audiodata.astype('float')  # / 32768.0

        if np.shape(np.shape(audiodata))[0] > 1:
            audiodata = audiodata[:, 0]
            datalength = np.shape(audiodata)[0]
            datalengthSec = datalength / sampleRate
            #print("Length of file is ", datalengthSec, " seconds (", datalength, "samples) loaded from ", fileLength / sampleRate, "seconds (", fileLength, " samples) with sample rate ",sampleRate, " Hz.")

        return segments, audiodata, sampleRate, minFreq, maxFreq, datalengthSec
Ejemplo n.º 22
0
def read_audio(filename):
    wav = wavio.read(filename)
    input_audio = wav.data[:, 0]
    input_audio = input_audio / np.max(np.abs(input_audio), axis=0)
    fs = wav.rate
    print("Frecuencia de muestreo", fs)
    return input_audio, fs
Ejemplo n.º 23
0
def main(tempo, pattern):

    wav = wavio.read('metronome.wav')
    sample_rate = wav.rate

    beat_duration = 60 / tempo
    quarter_note = int(sample_rate * beat_duration)

    pattern = pattern.split('.')
    audio = wav.data
    audio = audio[:quarter_note]

    x = np.linspace(0, 2 * beat_duration * np.pi,
                    int(beat_duration * sample_rate))
    notes = []
    for p in pattern:
        if p == '0':
            n = np.zeros_like(audio)
            n = n.astype(np.int16)

        elif p == '1':

            n = audio.astype(np.int16)

        notes.append(n)

    measure = np.concatenate([n for n in notes])

    sd.play(measure, sample_rate)
Ejemplo n.º 24
0
    def get_status(self, seconds=20, pps=100):
        status = {}
        if os.path.exists(self.file_name):

            wav = wavio.read(self.file_name)
            rate = wav.rate
            data = wav.data

            status.update({
                "record_length": (len(data) / rate) * 1000,
                "name": self.name,
                "filename": self.file_name
            })

            waveform_points = []
            waveform_times = []

            points = seconds * pps

            samples = int(seconds * rate)
            data0 = data[:, 0][-samples:]
            parts = np.array_split(data0, points)
            waveform_points = np.average(parts, 1)
            waveform_points = list(waveform_points / ((2**self.bitdepth) / 2))
            waveform_times = list(np.arange(seconds, 0, -(seconds / points)))

            status.update({
                "waveform": waveform_points,
                "times": waveform_times
            })

        else:
            status.update({"error": "file doesn't exist (yet)"})

        return status
Ejemplo n.º 25
0
def readwav(path):
    Struct = wavio.read(path)
    #读取一个WAV文件并返回一个保存采样率,采样宽度(以字节为单位)和包含数据的numpy数组的对象。
    wav = Struct.data.astype(float) / np.power(2, Struct.sampwidth*8-1)#np.power(a,b)求a的b次方
#    print(Struct.sampwidth)
    fs = Struct.rate
    return wav, fs
Ejemplo n.º 26
0
    def test4(self):
        path = tempfile.mkdtemp()
        filename = os.path.join(path, "test4data.wav")
        data = np.zeros(32, dtype=np.int16)
        data[1::4] = 10000
        data[3::4] = -10000

        wavio.write(filename, data, 44100, sampwidth=1)
        try:
            f = wave.open(filename, 'r')
            self.assertEqual(f.getnchannels(), 1)
            self.assertEqual(f.getsampwidth(), 1)
            self.assertEqual(f.getframerate(), 44100)
            f.close()

            w = wavio.read(filename)
            self.assertEqual(w.rate, 44100)
            self.assertEqual(w.sampwidth, 1)
            self.assertEqual(w.data.dtype, np.uint8)
            self.assertEqual(w.data.shape, (32, 1))
            expected = 128*np.ones_like(data, dtype=np.uint8).reshape(-1, 1)
            expected[1::4, 0] = 255
            expected[3::4, 0] = 0
            np.testing.assert_equal(w.data, expected)
        finally:
            os.remove(filename)
            os.removedirs(path)
    def __getitem__(self, index):
        'Generates one sample of data'
        # Select sample
        ID = self.list_IDs[index]
        y = self.labels[index]
        assert y <= self.labels.max()
        # Load data and get label
        if y == 0:
            main_path = '/vol/hinkelstn/data/FILTERED/atrial_fibrillation_8k/'
#            main_path = '/data/bhosseini/hinkelstn/FILTERED/atrial_fibrillation_8k/'
        else:
            main_path = '/vol/hinkelstn/data/FILTERED/sinus_rhythm_8k/'
#            main_path = '/data/bhosseini/hinkelstn/FILTERED/sinus_rhythm_8k/'

#        list_f = os.listdir(main_path)
        path = main_path + ID
        w = wavio.read(path)
        w_zm = stats.zscore(w.data, axis=0, ddof=1)
        #        X = w.data.transpose(1,0)
        if self.t_range:
            X = torch.tensor(w_zm[self.t_range, :].transpose(1, 0)).float()
        else:
            X = torch.tensor(w_zm.transpose(1, 0)).float()

#        X = torch.tensor(w.data.transpose(1,0)).view(1,2,X.shape[1])

        y = torch.tensor(y).long()
        #        y = torch.tensor(y).view(1,1,1)

        #        data_tensor = TensorDataset(X.float(),y.long())

        return X, y
Ejemplo n.º 28
0
def testMC():
    import wavio
    import pyqtgraph as pg
    from pyqtgraph.Qt import QtCore, QtGui

    #wavobj = wavio.read('Sound Files/kiwi_1min.wav')
    wavobj = wavio.read('Sound Files/tril1.wav')
    fs = wavobj.rate
    data = wavobj.data#[:20*fs]

    if data.dtype is not 'float':
        data = data.astype('float')  #/ 32768.0

    if np.shape(np.shape(data))[0] > 1:
        data = data[:, 0]

    import SignalProc
    sp = SignalProc.SignalProc(data,fs,256,128)
    sg = sp.spectrogram(data=data,window_width=256,incr=128,window='Hann',mean_normalise=True,onesided=True,multitaper=False,need_even=False)
    s = Segment(data,sg,sp,fs)

    #print np.shape(sg)

    #s1 = s.medianClip()
    s1,p,t = s.yin(returnSegs=True)
    app = QtGui.QApplication([])

    mw = QtGui.QMainWindow()
    mw.show()
    mw.resize(800, 600)

    win = pg.GraphicsLayoutWidget()
    mw.setCentralWidget(win)
    vb1 = win.addViewBox(enableMouse=False, enableMenu=False, row=0, col=0)
    im1 = pg.ImageItem(enableMouse=False)
    vb1.addItem(im1)
    im1.setImage(10.*np.log10(sg))

    # vb2 = win.addViewBox(enableMouse=False, enableMenu=False, row=1, col=0)
    # im2 = pg.ImageItem(enableMouse=False)
    # vb2.addItem(im2)
    # im2.setImage(c)

    vb3 = win.addViewBox(enableMouse=False, enableMenu=False, row=1, col=0)
    im3 = pg.ImageItem(enableMouse=False)
    vb3.addItem(im3)
    im3.setImage(10.*np.log10(sg))

    vb4 = win.addViewBox(enableMouse=False, enableMenu=False, row=2, col=0)
    im4 = pg.PlotDataItem(enableMouse=False)
    vb4.addItem(im4)
    im4.setData(data)

    for seg in s1:
        a = pg.LinearRegionItem()
        a.setRegion([convertAmpltoSpec(seg[0],fs,128), convertAmpltoSpec(seg[1],fs,128)])
        #a.setRegion([seg[0],seg[1]])
        vb3.addItem(a, ignoreBounds=True)

    QtGui.QApplication.instance().exec_()
Ejemplo n.º 29
0
def showNoiseEnergies():
    import pylab as pl
    import SignalProc
    #sp = SignalProc.SignalProc(data5, sampleRate)
    pl.ion()
    tbd = [0, 1, 3, 7, 15, 31]
    #tbd = np.concatenate([np.arange(30),np.arange(50,63)])
    #tbd = np.arange(50)
    listnodes = np.arange(63)
    listnodes = np.delete(listnodes, tbd)

    for root, dirs, files in os.walk(str('Sound Files/Noise examples/Noise_10s')):
        for filename in files:
            if filename.endswith('.wav'):
                filename = root + '/' + filename
                wavobj = wavio.read(filename)
                sampleRate = wavobj.rate
                data = wavobj.data
                if data.dtype is not 'float':
                    data = data.astype('float')  # / 32768.0
                if np.shape(np.shape(data))[0] > 1:
                    data = np.squeeze(data[:, 0])

                pl.figure()
                e1 = WaveletSegment.computeWaveletEnergy_1s(data,'dmey2')
                pl.plot(e1[listnodes])
                pl.title(filename)
def wav2features(wav_file: str,
                 parameters: dict,
                 normalize: bool = True,
                 allow_zero_padding: bool = False):
    '''
    Extract audio features from a wave file.

    Keyword Parameters:
    ===================
    - wave_file (str) -- wave file path
    - parameters (dict) -- audio and feature parameters used to extract features
    - normalize (bool) -- normalize audio before processing (default True)
    - allow_zero_padding (bool) -- add zeros if input audio features are too short (default False)

    Returns
    =======
    features (np.array) -- return an np.array of size parameters['shape']

    ### Raises
    - ValueError -- Wrong parameters

    '''
    output_shape = parameters['shape']
    features_param = parameters['features_param']

    if features_param['feature_type'] not in supported_features:
        raise ValueError(
            "Unsupported feature type {}, supported features are: {}".format(
                features_param['feature_type'], supported_features))
    try:
        with open(wav_file, 'rb') as fp:
            wav = wavio.read(fp)
    except:
        raise FileNotFoundError("File not found")
    if wav.data.dtype != np.int16:
        raise ValueError('Unsupported data type: ' + str(wav.data.dtype))
    if wav.rate != parameters['sample_rate']:
        raise ValueError('Unsupported sample rate: ' + str(wav.rate))

    signal = np.squeeze(wav.data)

    if 'emphasis' in parameters and parameters['emphasis'] is not None:
        signal = pre_emphasis(signal, parameters['emphasis'])

    if normalize:
        signal = signal.astype(np.float32) / abs(float(np.iinfo(np.int16).min))

    features = signal2features(signal, parameters, features_param)

    # Check output length
    if len(features) < output_shape[0]:
        if allow_zero_padding:
            features = np.vstack((np.zeros((output_shape[0] - len(features),
                                            len(features[0]))), features))
        else:
            return None
    if len(features) > output_shape[0]:
        features = features[:output_shape[0]]

    return features
Ejemplo n.º 31
0
 def check_wavio_read(self, filename, rate, sampwidth, dtype, shape, data):
     w = wavio.read(filename)
     self.assertEqual(w.rate, rate)
     self.assertEqual(w.sampwidth, sampwidth)
     self.assertEqual(w.data.dtype, dtype)
     self.assertEqual(w.data.shape, shape)
     np.testing.assert_equal(w.data, data)
Ejemplo n.º 32
0
    def __getitem__(self, index):
        sample = self.df.iloc[[index]]
        sound = wavio.read(self.audio_path+str(sample.iloc[0, 0])).data.T[0]
        start = sound.nonzero()[0].min()
        end = sound.nonzero()[0].max()
        sound = sound[start: end + 1]

        sound = self.random_crop(sound)

        sound = self.random_gain(sound)
        
        if not self.istest:
            sound = self.pitch_change(sound)
            sound = self.speed_change(sound)
        sound = self.noiser(sound)
        sound = self.pad(sound)

        sound = self.normalize(sound)

        label = np.zeros(self.classes)
        label[sample.iloc[0, 1]] = 1.0
        #label = np.array([sample.iloc[0,1]], np.int32)
        lab = np.array(sample.iloc[0, 1])
        sound, label = self.tensor(sound, label)

        return (sound, lab)
Ejemplo n.º 33
0
    def readWav(self, file, len=None, off=0, silent=False):
        """ Args the same as for wavio.read: filename, length in seconds, offset in seconds. """
        wavobj = wavio.read(file, len, off)
        self.data = wavobj.data

        # take only left channel
        if np.shape(np.shape(self.data))[0] > 1:
            self.data = self.data[:, 0]
        self.audioFormat.setChannelCount(1)

        # force float type
        if self.data.dtype != 'float':
            self.data = self.data.astype('float')
        self.audioFormat.setSampleSize(wavobj.sampwidth * 8)

        # total file length in s read from header (useful for paging)
        self.fileLength = wavobj.nframes

        self.sampleRate = wavobj.rate
        self.audioFormat.setSampleRate(self.sampleRate)

        # *Freq sets hard bounds, *Show can limit the spec display
        self.minFreq = 0
        self.maxFreq = self.sampleRate // 2
        self.minFreqShow = max(self.minFreq, self.minFreqShow)
        self.maxFreqShow = min(self.maxFreq, self.maxFreqShow)

        if not silent:
            print("Detected format: %d channels, %d Hz, %d bit samples" % (self.audioFormat.channelCount(), self.audioFormat.sampleRate(), self.audioFormat.sampleSize()))
Ejemplo n.º 34
0
 def check_assertions(self, expectation: Expectation) -> None:
     """Simplify assertion checks for wave method."""
     self.hash.wave(
         filename=expectation['filename'],
         key=expectation['key'],
         note_duration=expectation['note_duration'],
         sample_rate=expectation['sample_rate'])
     self.assertTrue(
         os.path.isfile(expectation['filename']), 'File not created.')
     wave_file = wavio.read(expectation['filename'])
     self.assertEqual(
         wave_file.rate,
         expectation['sample_rate'],
         'Sample rate of wave file does not match input rate.')
     self.assertEqual(
         len(wave_file.data),
         len(expectation['notes']) * int(
             expectation['note_duration'] * expectation['sample_rate']),
         'Output tune is not the correct length.')
     samples = numpy.squeeze(wave_file.data)
     for note, frequency in enumerate(expectation['notes']):
         spectrum = numpy.fft.fft(
             samples[note * int(
                 expectation['sample_rate'] *
                 expectation['note_duration']):(
                     note + 1) * int(
                         expectation['sample_rate'] *
                         expectation['note_duration'])])
         spectral_density = 10 * numpy.log10(
             numpy.absolute(numpy.square(spectrum[:int(spectrum.size/2)])))
         self.assertGreater(
             spectral_density[int(
                 frequency * expectation['note_duration'])],
             numpy.average(spectral_density),
             'Output pitch is not present within the expected interval.')
Ejemplo n.º 35
0
def main(path="../../data/8bit-C4.wav"):
    wav_control = wavio.read(path)
    print(wav_control)
    # creat signal
    signals = to_signal(wav_control)
    signal = signals[0]
    signal.plot()
Ejemplo n.º 36
0
def feature_extract(path, feature_extraction, window, stride, feat_size, nfilt,
                    nfft, lowfreq, preemph, samplerate):
    print(path)
    tmp = []
    highfreq = samplerate / 2
    appendEnergy = True
    ceplifter = nfilt
    w = wavio.read(path)
    normalized_w = w.data / (2**16. / 2)
    if ('Hamming' in feature_extraction):
        winfunc = lambda x: np.hamming(x)
    else:
        winfunc = lambda x: np.ones((x, ))
#    取abs_fft
    if ('mfcc' in feature_extraction):
        print('mfcc')
        tmp = demfcc.mfcc_(signal=normalized_w,
                           samplerate=samplerate,
                           winlen=window * 0.001,
                           winstep=stride * 0.001,
                           numcep=feat_size,
                           nfilt=nfilt,
                           nfft=nfft,
                           lowfreq=lowfreq,
                           highfreq=highfreq,
                           preemph=preemph,
                           ceplifter=ceplifter,
                           appendEnergy=appendEnergy,
                           winfunc=winfunc)
    elif ('fft_sam' in feature_extraction):
        print('fft_sam')
        tmp = demfcc.fft_sam(signal=normalized_w,
                             samplerate=samplerate,
                             winlen=window * 0.001,
                             winstep=stride * 0.001,
                             nfft=nfft,
                             preemph=preemph,
                             winfunc=winfunc)
    elif ('log_abs_mel' in feature_extraction):
        print('log_abs_mel')
        tmp = demfcc.log_abs_mel(signal=normalized_w,
                                 samplerate=samplerate,
                                 winlen=window * 0.001,
                                 winstep=stride * 0.001,
                                 nfilt=nfilt,
                                 nfft=nfft,
                                 lowfreq=lowfreq,
                                 highfreq=highfreq,
                                 preemph=preemph,
                                 winfunc=winfunc)
    if ('recurr' in feature_extraction):
        pair_len = int(re.search('@(.*?)_', feature_extraction).group(1))
        pair_step = int(re.search('~(.*?)_', feature_extraction).group(1))
        numpairs = math.floor((tmp.shape[0] - pair_len) / pair_step) + 1
        indices = np.tile(np.arange(0, pair_len), (numpairs, 1)) + np.tile(
            np.arange(0, numpairs * pair_step, pair_step), (pair_len, 1)).T
        indices = np.array(indices, dtype=np.int32)
        tmp = tmp[indices]
    return np.float32(tmp)
 def Rawdata_loading(self,filename,N):
     wav = wavio.read(filename)
     bit = 8*wav.sampwidth
     data = wav.data / float( 2**(bit-1) ) # -1.0 to 1.0(normalize)
     g = data[:,0]
     print(len(g))
     fs = wav.rate
     return g,fs
Ejemplo n.º 38
0
def detectClicks():
    import SignalProc
    reload(SignalProc)
    import pyqtgraph as pg
    from pyqtgraph.Qt import QtCore, QtGui
    import wavio
    from scipy.signal import medfilt

    #wavobj = wavio.read('Sound Files/tril1.wav')
    #wavobj = wavio.read('Sound Files/010816_202935_p1.wav')
    #wavobj = wavio.read('Sound Files/20170515_223004 piping.wav')
    wavobj = wavio.read('Sound Files/test/DE66_BIRD_141011_005829.wav')
    #wavobj = wavio.read('/Users/srmarsla/DE66_BIRD_141011_005829_wb.wav')
    #wavobj = wavio.read('/Users/srmarsla/ex1.wav')
    #wavobj = wavio.read('/Users/srmarsla/ex2.wav')
    fs = wavobj.rate
    data = wavobj.data #[:20*fs]

    if data.dtype is not 'float':
        data = data.astype('float') # / 32768.0

    if np.shape(np.shape(data))[0] > 1:
        data = data[:, 0]

    import SignalProc
    sp = SignalProc.SignalProc(data,fs,256,128)
    sg = sp.spectrogram(data,multitaper=False)
    s = Segment(data, sg, sp, fs, 50)

    energy = np.sum(sg,axis=1)
    energy = medfilt(energy,15)
    e2 = np.percentile(energy,95)*2
    # Step 1: clicks have high energy
    clicks = np.squeeze(np.where(energy>e2))
    clicks = s.identifySegments(clicks, minlength=1)

    app = QtGui.QApplication([])

    mw = QtGui.QMainWindow()
    mw.show()
    mw.resize(800, 600)

    win = pg.GraphicsLayoutWidget()
    mw.setCentralWidget(win)
    vb1 = win.addViewBox(enableMouse=False, enableMenu=False, row=0, col=0)
    im1 = pg.ImageItem(enableMouse=False)
    vb1.addItem(im1)
    im1.setImage(10.*np.log10(sg))

    for seg in clicks:
        a = pg.LinearRegionItem()
        a.setRegion([convertAmpltoSpec(seg[0],fs,128), convertAmpltoSpec(seg[1],fs,128)])
        vb1.addItem(a, ignoreBounds=True)

    QtGui.QApplication.instance().exec_()
Ejemplo n.º 39
0
def fFrq(dirName):
    for root, dirs, files in os.walk(str(dirName)):
        for file in files:
            if file.endswith('.wav'):
                fileName = root + '/' + file
                wavobj = wavio.read(fileName)
                sampleRate = wavobj.rate
                data = wavobj.data

                # None of the following should be necessary for librosa
                if data.dtype is not 'float':
                    data = data.astype('float')  # / 32768.0
                if np.shape(np.shape(data))[0] > 1:
                    data = data[:, 0]

                sp = SignalProc.SignalProc([], 0, 512, 256)
                sgRaw = sp.spectrogram(data, 512, 256, mean_normalise=True, onesided=True, multitaper=False)
                segment = Segment.Segment(data, sgRaw, sp, sampleRate, 512, 256)
                pitch, y, minfreq, W = segment.yin(minfreq=100)
                ind = np.squeeze(np.where(pitch > minfreq))
                pitch = pitch[ind]
                if pitch.size == 0:
                    print(file, ' *++ no fundamental freq detected, could be faded kiwi or noise')
                    continue
                ind = ind * W / 512
                x = (pitch * 2. / sampleRate * np.shape(sgRaw)[1]).astype('int')

                from scipy.signal import medfilt
                x = medfilt(pitch, 15)

                if ind.size <2:
                    if pitch>850 and pitch<4500:
                        print(file, round(pitch), ' *##kiwi found')
                    else:
                        print(file, round(pitch), ' *-- fundamental freq is out of kiwi region, could be noise')
                else:
                    # Get the individual pieces
                    segs = segment.identifySegments(ind, maxgap=10, minlength=5)
                    count = 0
                    if segs == []:
                        if np.mean(pitch)>850 and np.mean(pitch)<4500:
                            print(file, round(np.mean(pitch)), ' *## kiwi found ')
                        else:
                            print(file, round(np.mean(pitch)), ' *-- fundamental freq is out of kiwi region, could be noise')
                    for s in segs:
                        count += 1
                        s[0] = s[0] * sampleRate / float(256)
                        s[1] = s[1] * sampleRate / float(256)
                        i = np.where((ind > s[0]) & (ind < s[1]))
                        if np.mean(x[i])>850 and np.mean(x[i])<4500:
                            print(file, round(np.mean(x[i])), ' *## kiwi found ##')
                        else:
                            print(file, round(np.mean(x[i])), ' *-- fundamental freq is out of kiwi region, could be noise')
Ejemplo n.º 40
0
def eRatio(dirName):
    for root, dirs, files in os.walk(str(dirName)):
        for file in files:
            if file.endswith('.wav'):
                wavobj = wavio.read(root + '\\' + file)
                sampleRate = wavobj.rate
                data = wavobj.data
                if data is not 'float':
                    data = data.astype('float')  # data / 32768.0
                if np.shape(np.shape(data))[0] > 1:
                    data = data[:, 0]
                post = SupportClasses.postProcess(data, sampleRate, [])
                print(file, post.eRatioConfd(seg=None))
Ejemplo n.º 41
0
    def loadData(self,fName, trainPerFile=False, wavOnly=False, savedetections=False):
        # Load data
        filename = fName+'.wav' #'train/kiwi/train1.wav'
        filenameAnnotation = fName+'-sec.txt'#'train/kiwi/train1-sec.txt'
        try:
            wavobj = wavio.read(filename)
        except:
            print("unsupported file: ", filename)
            pass
        self.sampleRate = wavobj.rate
        self.data = wavobj.data
        if self.data.dtype is not 'float':
            self.data = self.data.astype('float') #/ 32768.0
        if np.shape(np.shape(self.data))[0]>1:
            self.data = np.squeeze(self.data[:,0])
        n=math.ceil(len(self.data)/self.sampleRate)

        if not wavOnly:
            fileAnnotations = []
            # Get the segmentation from the txt file
            with open(filenameAnnotation) as f:
                reader = csv.reader(f, delimiter="\t")
                d = list(reader)
            if d[-1]==[]:
                d = d[:-1]
            if len(d) != n:
                print("ERROR: annotation length %d does not match file duration %d!" %(len(d), n))
                self.annotation = None
                return

            # for each second, store 0/1 presence:
            sum = 0
            for row in d:
                fileAnnotations.append(int(row[1]))
                sum += int(row[1])

            # TWO VERSIONS FOR COMPATIBILITY WITH BOTH TRAINING LOOPS:
            if trainPerFile:
                self.annotation = np.array(fileAnnotations)
            else:
                self.annotation.extend(fileAnnotations)
                self.filelengths.append(n)
            if savedetections:
                self.filenames.append(filename)
            print("%d blocks read, %d presence blocks found. %d blocks stored so far.\n" % (n, sum, len(self.annotation)))
Ejemplo n.º 42
0
def length(dirName):
    """

    """
    durations = []
    for root, dirs, files in os.walk(str(dirName)):
        for filename in files:
            if filename.endswith('.wav'):
                filename = root + '/' + filename
                wavobj = wavio.read(filename)
                sampleRate = wavobj.rate
                data = wavobj.data
                duration = len(data) / sampleRate  # number of secs
                durations.append(duration)
    print("min duration: ", min(durations), " secs")
    print("max duration: ", max(durations), " secs")
    print("mean duration: ", np.mean(durations), " secs")
    print("median duration: ", np.median(durations), " secs")
    print("total duration: ", sum(durations), " secs")
Ejemplo n.º 43
0
def read_aifs_or_wavs(in_dir,
                      exts=['aif','wav'],
                      module='G0',
                      mix=False,
                      trim=False,
                      norm=False,
                      phase=False,
                      rev=False,
                      fade=256,
                      target={'G0':500000,'S0':200000,'W0':4000,'C0':12000}):
    audio_files = []
    for ext in exts:
        audio_files += glob.glob(in_dir+'/*.'+ext) #load the extensions that we want
        
    data,err,ns = [],[],[]
    for audio_file in audio_files:
        try:
            print('processing %s'%audio_file) #search for aif style file extension
            is_aif = audio_file.rsplit('.')[-1].upper().find('AIF')>-1
            is_wav = audio_file.rsplit('.')[-1].upper().find('WAV')>-1
            if not is_aif and not is_wav: #extension not supported
                ns += [audio_file]
            else:
                if   is_aif: mono,rate = dsp.multi_to_mono(aifcio.read(audio_file),mix)    #convert to mono
                elif is_wav: mono,rate = dsp.multi_to_mono(wavio.read(audio_file),mix)     #convert to mono
            if trim:  mono = dsp.trim(mono)
            if phase: mono = dsp.phase_vocoder(mono,rate,1024,1.0*target[module]/rate)     #timestretching via PV
            resampled = dsp.resample(mono,target,module)                                   #up/down sample
            if norm: resampled = dsp.normalize(resampled)                                  #normalize and clean final result
            if fade > 0: resampled = dsp.fade_out(resampled,fade)                          #exp fade out
            if rev: resampled = dsp.reverse(resampled)                                     #option reverse
            data += [resampled]
            print('---------------------------------------------------')
        except Exception:
            err += [audio_file]
            pass
    if len(err)>0:
        print('Conversion errors with the following supported files:')
        for i in err: print i
    if len(ns)>0:
        print('The following files have unsupported file types:')
        for i in ns: print i
    return data
Ejemplo n.º 44
0
def loadFile(filename):
    wavobj = wavio.read(filename)
    sampleRate = wavobj.rate
    audiodata = wavobj.data

    # None of the following should be necessary for librosa
    if audiodata.dtype is not 'float':
        audiodata = audiodata.astype('float') #/ 32768.0
    if np.shape(np.shape(audiodata))[0]>1:
        audiodata = audiodata[:,0]

    # if sampleRate != 16000:
    #     audiodata = librosa.core.audio.resample(audiodata, sampleRate, 16000)
    #     sampleRate=16000

    # pre-process
    sc = SupportClasses.preProcess(audioData=audiodata, sampleRate=sampleRate, species='Kiwi', df=False)
    audiodata,sampleRate = sc.denoise_filter()
    return audiodata,sampleRate
Ejemplo n.º 45
0
    def test1(self):
        path = tempfile.mkdtemp()
        filename = os.path.join(path, "test1data.wav")
        wavio.write(filename, data1, 44100, sampwidth=3)
        try:
            f = wave.open(filename, 'r')
            self.assertEqual(f.getnchannels(), 1)
            self.assertEqual(f.getsampwidth(), 3)
            self.assertEqual(f.getframerate(), 44100)
            f.close()

            w = wavio.read(filename)
            self.assertEqual(w.rate, 44100)
            self.assertEqual(w.sampwidth, 3)
            self.assertEqual(w.data.dtype, np.int32)
            self.assertEqual(w.data.shape, (len(data1), 1))
            np.testing.assert_equal(w.data[:, 0], data1)
        finally:
            os.remove(filename)
            os.removedirs(path)
def main(args):
    # setup gui
    app = wx.App()
    fr = wx.Frame(None, title='RADAR Spectrogram')
    fr.SetSize((SIZE,SIZE))
    panel = SpectroPanel(fr)

    # turn on sound card
    p = pyaudio.PyAudio()
    # stream = p.open(format=pyaudio.paInt16,
    #                 channels=1,
    #                 rate=TS,
    #                 output=False,
    #                 input=True,
    #                 frames_per_buffer=CHUNK,
    #                 stream_callback=panel.callback)

    # start stuff
#    stream.start_stream()

    wav = wavio.read(args.filename)
    data = wav.data
    rate = wav.rate
    if data.shape[1] > 1:
        data = data[:,1]
    else:
        data = data.transpose()

    if rate != TS:
        data = sig.decimate(data,rate/TS,ftype='fir')

    stopFlag = threading.Event()
    thread = WavThread(stopFlag,data,panel)
    thread.start()

    fr.Show()
    app.MainLoop()

#    stream.stop_stream()
#    stream.close()
    stopFlag.set()
Ejemplo n.º 47
0
def extractSegments(wavFile, destination, copyName, species):
    """
    This extracts the sound segments given the annotation and the corresponding wav file. (Isabel's experiment data extraction)
    """
    datFile=wavFile+'.data'
    try:
        wavobj = wavio.read(wavFile)
        sampleRate = wavobj.rate
        data = wavobj.data
        if os.path.isfile(datFile):
            with open(datFile) as f:
                segments = json.load(f)
            cnt = 1
            for seg in segments:
                if seg[0] == -1:
                    continue
                if copyName:    # extract all - extracted sounds are saved with the same name as the corresponding segment in the annotation (e.g. Rawhiti exp.)
                    filename = destination + '\\' + seg[4] + '.wav'
                    s = int(seg[0] * sampleRate)
                    e = int(seg[1] * sampleRate)
                    temp = data[s:e]
                    wavio.write(filename, temp.astype('int16'), sampleRate, scale='dtype-limits', sampwidth=2)
                elif not species:   # extract all - extracted sounds are saved with the original file name followed by an index starting 1
                    ind = wavFile.rindex('/')
                    filename = destination + '\\' + str(wavFile[ind + 1:-4]) + '-' + str(cnt) + '.wav'
                    cnt += 1
                    s = int(seg[0] * sampleRate)
                    e = int(seg[1] * sampleRate)
                    temp = data[s:e]
                    wavio.write(filename, temp.astype('int16'), sampleRate, scale='dtype-limits', sampwidth=2)
                elif species == seg[4]:   # extract only specific calls - extracted sounds are saved with with the original file name followed by an index starting 1
                    ind = wavFile.rindex('/')
                    ind2 = wavFile.rindex('\\')
                    filename = destination + '\\' + str(wavFile[ind2+1:ind]) + '-' + str(wavFile[ind + 1:-4]) + '-' + str(seg[4]) + '-' + str(cnt) + '.wav'
                    cnt += 1
                    s = int((seg[0]-1) * sampleRate)
                    e = int((seg[1]+1) * sampleRate)
                    temp = data[s:e]
                    wavio.write(filename, temp.astype('int16'), sampleRate, scale='dtype-limits', sampwidth=2)
    except:
        print ("unsupported file: ", wavFile)
Ejemplo n.º 48
0
def resample(dirName):
    """
    Resample to avoid high frq noise
    """
    for root, dirs, files in os.walk(str(dirName)):
        for file in files:
            if file.endswith('.wav'):
                # go through each segment
                file = root + '/' + file
                wavobj = wavio.read(file)
                audioData = wavobj.data
                if audioData.dtype is not 'float':
                    audioData = audioData.astype('float')  # / 32768.0
                if np.shape(np.shape(audioData))[0] > 1:
                    audioData = np.squeeze(audioData[:, 0])
                sampleRate = wavobj.rate
                import librosa
                if sampleRate != 16000:
                    audioData = librosa.core.audio.resample(audioData, sampleRate, 16000)
                    sampleRate = 16000
                    wavio.write(file[:-4] + '_down.wav', audioData.astype('int16'), sampleRate, scale='dtype-limits', sampwidth=2)
Ejemplo n.º 49
0
def create_dataset(src_path, esc50_dst_path):
    print('* {} -> {}'.format(src_path, esc50_dst_path))

    esc50_dataset = {}

    for fold in range(1, 6):
        esc50_dataset['fold{}'.format(fold)] = {}
        esc50_sounds = []
        esc50_labels = []

        for wav_file in sorted(glob.glob(os.path.join(src_path, '{}-*.wav'.format(fold)))):
            sound = wavio.read(wav_file).data.T[0]
            #start = sound.nonzero()[0].min()
            #end = sound.nonzero()[0].max()
            # sound = sound[start: end + 1]  # Remove silent sections
            label = int(os.path.splitext(wav_file)[0].split('-')[-1])
            esc50_sounds.append(sound)
            esc50_labels.append(label)

        esc50_dataset['fold{}'.format(fold)]['sounds'] = esc50_sounds
        esc50_dataset['fold{}'.format(fold)]['labels'] = esc50_labels

    np.savez(esc50_dst_path, **esc50_dataset)
Ejemplo n.º 50
0
    def test_clip(self):
        path = tempfile.mkdtemp()
        filename = os.path.join(path, "testdata.wav")
        data = np.array([-100, 0, 100, 200, 300, 325])

        wavio.write(filename, data, 44100, sampwidth=1, scale='none')
        try:
            f = wave.open(filename, 'r')
            self.assertEqual(f.getnchannels(), 1)
            self.assertEqual(f.getsampwidth(), 1)
            self.assertEqual(f.getframerate(), 44100)
            f.close()

            w = wavio.read(filename)
            self.assertEqual(w.rate, 44100)
            self.assertEqual(w.sampwidth, 1)
            self.assertEqual(w.data.dtype, np.uint8)
            self.assertEqual(w.data.shape, (len(data), 1))
            expected = np.array([0, 0, 100, 200, 255, 255],
                                dtype=np.uint8).reshape(-1, 1)
            np.testing.assert_equal(w.data, expected)
        finally:
            os.remove(filename)
            os.removedirs(path)
Ejemplo n.º 51
0
    def test5(self):
        path = tempfile.mkdtemp()
        filename = os.path.join(path, "test5data.wav")
        data = np.zeros(32, dtype=np.int16)
        data[1::4] = 10000
        data[3::4] = -10000

        wavio.write(filename, data, 44100, sampwidth=2, scale='none')
        try:
            f = wave.open(filename, 'r')
            self.assertEqual(f.getnchannels(), 1)
            self.assertEqual(f.getsampwidth(), 2)
            self.assertEqual(f.getframerate(), 44100)
            f.close()

            w = wavio.read(filename)
            self.assertEqual(w.rate, 44100)
            self.assertEqual(w.sampwidth, 2)
            self.assertEqual(w.data.dtype, np.int16)
            self.assertEqual(w.data.shape, (32, 1))
            np.testing.assert_equal(w.data, data.reshape(-1, 1))
        finally:
            os.remove(filename)
            os.removedirs(path)
def readwav( path ):
    Struct = wavio.read( path )
    wav = Struct.data.astype(float) / np.power(2, Struct.sampwidth*8-1)
    fs = Struct.rate
    return wav, fs
Ejemplo n.º 53
0
def showEnergies():
    import pylab as pl
    pl.ion()

    #filename = 'Sound Files/tril1_d1.wav'
    filename = 'Sound Files/tril1.wav'
    #filename = 'Sound Files/090811_184501.wav'
    #filename = 'Sound Files/kiwi_1min.wav'
    wavobj = wavio.read(filename)
    sampleRate = wavobj.rate
    data = wavobj.data
    if data.dtype is not 'float':
        data = data.astype('float')  # / 32768.0
    if np.shape(np.shape(data))[0] > 1:
        data = np.squeeze(data[:, 0])

    if os.path.isfile(filename + '.data'):
        file = open(filename + '.data', 'r')
        segments = json.load(file)
        file.close()
        if len(segments) > 0:
            if segments[0][0] == -1:
                del segments[0]

    data1 = data[int(segments[0][0]*sampleRate):int(segments[0][1]*sampleRate)]
    data2 = data[int(segments[1][0]*sampleRate):int(segments[1][1]*sampleRate)]
    data3 = data[int(segments[2][0]*sampleRate):int(segments[2][1]*sampleRate)]
    data4 = data[int(segments[3][0]*sampleRate):int(segments[3][1]*sampleRate)]
    data5 = data[int(segments[4][0]*sampleRate):int(segments[4][1]*sampleRate)]

    import SignalProc
    sp = SignalProc.SignalProc(data5, sampleRate)
    pl.figure()
    pl.subplot(5, 1, 1)
    sg = sp.spectrogram(data1,sampleRate)
    pl.imshow(10.*np.log10(sg))
    pl.subplot(5, 1, 2)
    sg = sp.spectrogram(data2,sampleRate)
    pl.imshow(10.*np.log10(sg))
    pl.subplot(5, 1, 3)
    sg = sp.spectrogram(data3,sampleRate)
    pl.imshow(10.*np.log10(sg))
    pl.subplot(5, 1, 4)
    sg = sp.spectrogram(data4,sampleRate)
    pl.imshow(10.*np.log10(sg))
    pl.subplot(5, 1, 5)
    sg = sp.spectrogram(data5,sampleRate)
    pl.imshow(10.*np.log10(sg))

    pl.figure()

    e1 = WaveletSegment.computeWaveletEnergy_1s(data1,'dmey2')
    pl.subplot(5,1,1)
    pl.plot(e1)
    e2 = WaveletSegment.computeWaveletEnergy_1s(data2,'dmey2')
    pl.subplot(5,1,2)
    pl.plot(e2)
    e3 = WaveletSegment.computeWaveletEnergy_1s(data3,'dmey2')
    pl.subplot(5,1,3)
    pl.plot(e3)
    e4 = WaveletSegment.computeWaveletEnergy_1s(data4,'dmey2')
    pl.subplot(5,1,4)
    pl.plot(e4)
    e5 = WaveletSegment.computeWaveletEnergy_1s(data5,'dmey2')
    pl.subplot(5,1,5)
    pl.plot(e5)

    pl.figure()
    pl.plot(e1)
    pl.plot(e2)
    pl.plot(e3)
    pl.plot(e4)
    pl.plot(e5)

    #return e2
    pl.show()
Ejemplo n.º 54
0
    wavelet = pywt.Wavelet(filter_bank=[lowd, highd, lowr, highr])
    wavelet.orthogonal=True
    for t in range(totalTime):
        E = []
        for level in range(1, 6):
            wp = pywt.WaveletPacket(data=fwData[t * sampleRate:(t + 1) * sampleRate], wavelet=wavelet, maxlevel=level)
            e = np.array([np.sum(n.data ** 2) for n in wp.get_level(level, "natural")])
            if np.sum(e) > 0:
                e = 100.0 * e / np.sum(e)
            E = np.concatenate((E, e), axis=0)
        coefs[:, t] = E
    return coefs


import wavio
wavobj = wavio.read('Sound Files/tril1.wav')
sampleRate = wavobj.rate
data = np.squeeze(wavobj.data)
if data.dtype is not 'float':
    data = data.astype('float') #/ 32768.0
if np.shape(np.shape(data))[0]>1:
    data = np.squeeze(data[:,0])

coefs = computeWaveletEnergy(data, sampleRate)

clf = joblib.load('ruruClassifier.pkl')
out=[]
for i in range(int(np.shape(coefs)[1])):
    E = np.ones((1,62)) * coefs[:,i]
    p = clf.predict(E)
    # if p==0.0:
Ejemplo n.º 55
0
def main():

    files = [
        # "/home/nhilton/development/nsound/src/examples/california.wav",
        # "/home/nhilton/development/nsound/src/examples/mynameis.wav",
        # "/home/nhilton/development/nsound/src/examples/Temperature_in.wav",
        # "/home/nhilton/development/nsound/src/examples/walle.wav",
        # "/home/nhilton/development/nsound/src/examples/example1",
        # "empty.bin",
        "chirp1.wav",
    ]

    for i, f in enumerate(files):

        print('-------------------------------------------------------')
        print('Reading file')
        print('    in: %s'  % f)

        try:
            chunks = wavio.read_chunks(f)
        except wavio.InvalidRiffWave:
            print("    Not a RIFF WAVE!")
            continue

        s = json.dumps(chunks, indent = 4, separators = (', ', ' : '), sort_keys = True)

        for line in s.split('\n'):
            print('    %s' % line)

        x, sr = wavio.read(f)

        if x.ndim > 1:
            x = x[:,0]

        plt.figure()
        plt.plot(x, 'b-')
        plt.grid(True)
        plt.xlabel('sample bin')
        plt.ylabel('amplitude')
        plt.title('wav = %s' % f)

        # write out forward & reverse

        fout = 'fwd-%02d.wav' % i

        wavio.write(fout, x, sr, dtype = np.float32)

        print('Wrote %s' % fout)

        f = fout

        chunks = wavio.read_chunks(f)

        s = json.dumps(chunks, indent = 4, separators = (', ', ' : '), sort_keys = True)

        for line in s.split('\n'):
            print('    %s' % line)

        x, sr = wavio.read(f)

        if x.ndim > 1:
            x = x[:,0]

        plt.figure()
        plt.plot(x, 'b-')
        plt.grid(True)
        plt.xlabel('sample bin')
        plt.ylabel('amplitude')
        plt.title('wav = %s' % f)


    plt.show()
Ejemplo n.º 56
0
def deleteClick2(dirName):
    """
    Given the directory of sounds this deletes the annotation segments with rain corrupted.
    Check to make sure the segment to delete has no sign of kiwi - use fundamental frq rather than eRatio in 'deleteClick'
    """
    for root, dirs, files in os.walk(str(dirName)):
        for file in files:
            if file.endswith('.data') and file[:-5] in files:
                # go through each segment
                file = root + '/' + file
                with open(file) as f:
                    segments = json.load(f)
                    newSegments = copy.deepcopy(segments)
                    wavobj = wavio.read(file[:-5])
                    audioData = wavobj.data
                    if audioData is not 'float':
                        audioData = audioData / 32768.0
                    audioData = audioData[:, 0].squeeze()
                    sampleRate = wavobj.rate
                    if sampleRate != 16000:
                        audioData = librosa.core.audio.resample(audioData, sampleRate, 16000)
                        sampleRate = 16000
                    # Find T_ERatio based on first 5 secs as it varies accorss the recordings
                    post = SupportClasses.postProcess(audioData, sampleRate, [])
                    # T_ERatio = post.eRatioConfd([1, 6, "", ""])
                    print(file)
                    if len(segments)>2:
                        ff = Features.Features(audioData, sampleRate)
                        mfcc = ff.get_mfcc()
                        mean = np.mean(mfcc[1, :])
                        std = np.std(mfcc[1, :])
                        thr = mean - 2 * std  # mfcc1 thr for the file
                    else:
                        thr = 0

                    chg = False
                    for seg in segments:
                        if seg[0] == -1:
                            continue
                        else:
                            # read the sound segment and check for wind
                            secs = seg[1] - seg[0]
                            wavobj = wavio.read(file[:-5], nseconds=secs, offset=seg[0])
                            data = wavobj.data
                            sampleRate =wavobj.rate
                            if data is not 'float':
                                data = data / 32768.0
                            data = data[:, 0].squeeze()

                            # check for clicks
                            ff = Features.Features(data, sampleRate)
                            mfcc = ff.get_mfcc()
                            mfcc1 = mfcc[1, :]  # mfcc1 of the segment

                            if thr == 0:
                                ff = Features.Features(audioData, sampleRate)
                                mfcc = ff.get_mfcc()
                                mean = np.mean(mfcc[1,:])
                                std = np.std(mfcc[1,:])
                                thr = mean - 2 * std    # mfcc1 thr for the file

                            if np.min(mfcc1) < thr:
                                # # now check eRatio
                                # eRatio = post.eRatioConfd(seg)  # eRatio(file[:-5], seg, thr=T_ERatio)
                                # eRatioBefore = post.eRatioConfd([seg[0] - 10, seg[0], "", ""])
                                # eRatioAfter = post.eRatioConfd([seg[1], seg[1] + 10, "", ""])
                                # if eRatio > eRatioBefore*1.05 or eRatio > eRatioAfter*1.05:
                                #     continue

                                # now check f. frq.
                                # down sample to avoid higher frq noise
                                if sampleRate != 16000:
                                    data = librosa.core.audio.resample(data, sampleRate, 16000)
                                    sampleRate = 16000
                                # denoise prior to f frq detection
                                waveletDenoiser = WaveletFunctions.WaveletFunctions(data=data, wavelet=None, maxLevel=12)
                                data = waveletDenoiser.waveletDenoise(data, thresholdType='soft', wavelet='dmey2', maxLevel=12)
                                sp = SignalProc.SignalProc([], 0, 512, 256)
                                sgRaw = sp.spectrogram(data, 512,256, mean_normalise=True, onesided=True, multitaper=False)
                                segment = Segment.Segment(data, sgRaw, sp, sampleRate,512, 256)
                                pitch, y, minfreq, W = segment.yin()
                                ind = np.squeeze(np.where(pitch > minfreq))
                                pitch = pitch[ind]
                                ff = np.mean(pitch)
                                if ff>500 and ff<5000:
                                    continue
                                else:
                                    print(seg)
                                    newSegments.remove(seg)
                                    chg = True
                    if chg:
                        file = open(file, 'w')
                        json.dump(newSegments, file)
Ejemplo n.º 57
0
def deleteClick(dirName):
    """
    Given the directory of sounds this deletes the annotation segments with wind/rain corrupted files.
    Targeting moderate wind and above. Check to make sure the segment to delete has no sign of kiwi
    """
    for root, dirs, files in os.walk(str(dirName)):
        for file in files:
            if file.endswith('.data') and file[:-5] in files:
                # go through each segment
                file = root + '/' + file
                with open(file) as f:
                    segments = json.load(f)
                    newSegments = copy.deepcopy(segments)
                    wavobj = wavio.read(file[:-5])
                    audioData = wavobj.data
                    if audioData is not 'float':
                        audioData = audioData / 32768.0
                    audioData = audioData[:, 0].squeeze()
                    sampleRate = wavobj.rate
                    # Find T_ERatio based on first 5 secs as it varies accorss the recordings
                    post = SupportClasses.postProcess(audioData, sampleRate, [])
                    # T_ERatio = post.eRatioConfd([1, 6, "", ""])
                    print(file)
                    chg = False
                    for seg in segments:
                        if seg[0] == -1:
                            continue
                        else:
                            # read the sound segment and check for wind
                            secs = seg[1] - seg[0]
                            wavobj = wavio.read(file[:-5], nseconds=secs, offset=seg[0])
                            data = wavobj.data
                            if data is not 'float':
                                data = data / 32768.0
                            data = data[:, 0].squeeze()

                            # check for clicks
                            ff = Features.Features(data, sampleRate)
                            mfcc = ff.get_mfcc()
                            mfcc1 = mfcc[1, :]  # mfcc1 of the segment

                            ff = Features.Features(audioData, sampleRate)
                            mfcc = ff.get_mfcc()
                            mean = np.mean(mfcc[1,:])
                            std = np.std(mfcc[1,:])
                            thr = mean - 2 * std    # mfcc1 thr for the file

                            if np.min(mfcc1) < thr:
                                # # # now check eRatio
                                # eRatio = post.eRatioConfdV2(seg)
                                # if eRatio > 1.0:
                                #     continue

                                # just check duration>10 sec
                                if secs > 10:
                                    continue
                                else:
                                    print(seg)
                                    newSegments.remove(seg)
                                    chg = True
                    if chg:
                        file = open(file, 'w')
                        json.dump(newSegments, file)
Ejemplo n.º 58
0
def deleteWindRain(dirName, windTest=True, rainTest=False, Tmean_wind = 1e-8):
    """
    Given the directory of sounds this deletes the annotation segments with wind/rain corrupted files.
    Targeting moderate wind and above. Check to make sure the segment to delete has no sign of kiwi
    Automatic Identification of Rainfall in Acoustic Recordings by Carol Bedoya, Claudia Isaza, Juan M.Daza, and Jose D.Lopez
    """
    #Todo: find thrs
    Tmean_rain = 1e-8   # Mean threshold
    Tsnr_rain = 3.5     # SNR threshold

    # Tmean_wind = 1e-9   # Mean threshold
    # Tsnr_wind = 0.5     # SNR threshold

    cnt = 0
    for root, dirs, files in os.walk(str(dirName)):
        for file in files:
            if file.endswith('.data') and file[:-5] in files:
                # go through each segment
                file = root + '/' + file
                with open(file) as f:
                    segments = json.load(f)
                    newSegments=copy.deepcopy(segments)
                    wavobj = wavio.read(file[:-5])
                    audioData = wavobj.data
                    # # ***
                    # if audioData.dtype is not 'float':
                    #     audioData = audioData.astype('float')  # / 32768.0
                    # if np.shape(np.shape(audioData))[0] > 1:
                    #     audioData = np.squeeze(audioData[:, 0])
                    # import librosa
                    # if wavobj.rate != 16000:
                    #     audioData = librosa.core.audio.resample(audioData, wavobj.rate, 16000)
                    #     sampleRate = 16000
                    # # ****
                    if audioData is not 'float':
                        audioData = audioData / 32768.0
                    audioData = audioData[:, 0].squeeze()
                    sampleRate = wavobj.rate

                    # Find T_ERatio based on first 5 secs as it varies accorss the recordings
                    post = SupportClasses.postProcess(audioData, sampleRate, [])
                    # T_ERatio = post.eRatioConfd([1, 6, "", ""])

                    chg = False
                    for seg in segments:
                        if seg[0] == -1:
                            continue
                        else:
                            # read the sound segment and check for wind
                            secs = seg[1]-seg[0]
                            wavobj = wavio.read(file[:-5], nseconds=secs, offset=seg[0])
                            data = wavobj.data
                            # # ***
                            # if data.dtype is not 'float':
                            #     data = data.astype('float')  # / 32768.0
                            # if np.shape(np.shape(data))[0] > 1:
                            #     data = np.squeeze(data[:, 0])
                            # if wavobj.rate != 16000:
                            #     data = librosa.core.audio.resample(data, wavobj.rate, 16000)

                            if data is not 'float':
                                data = data / 32768.0
                            data = data[:,0].squeeze()

                            wind_lower = 2.0 * 100 / sampleRate
                            wind_upper = 2.0 * 250 / sampleRate
                            rain_lower = 2.0 * 600 / sampleRate
                            rain_upper = 2.0 * 1200 / sampleRate

                            f, p = signal.welch(data, fs=sampleRate, window='hamming', nperseg=512, detrend=False)

                            if windTest:
                                limite_inf = int(round(len(p) * wind_lower)) # minimum frequency of the rainfall frequency band 0.00625(in
                                                                     # normalized frequency); in Hz = 0.00625 * (44100 / 2) = 100 Hz
                                limite_sup = int(round(len(p) * wind_upper)) # maximum frequency of the rainfall frequency band 0.03125(in
                                                                     # normalized frequency); in Hz = 0.03125 * (44100 / 2) = 250 Hz
                                a_wind = p[limite_inf:limite_sup] # section of interest of the power spectral density.Step 2 in Algorithm 2.1

                                mean_a_wind = np.mean(a_wind) # mean of the PSD in the frequency band of interest.Upper part of the step 3 in Algorithm 2.1
                                std_a_wind = np.std(a_wind)  # standar deviation of the PSD in the frequency band of the interest. Lower part of the step 3 in Algorithm 2.1

                                # c_wind = mean_a_wind / std_a_wind  # signal to noise ratio of the analysed recording. step 3 in Algorithm 2.1

                                if mean_a_wind > Tmean_wind:
                                    # eRatio = post.eRatioConfd(seg) #eRatio(file[:-5], seg, thr=T_ERatio)
                                    # eRatioBefore = post.eRatioConfd([seg[0]-10, seg[0], "", ""])
                                    # if eRatio > eRatioBefore*1.05: # or eRatio > eRatioAfter:  #it was 10 secs Before eratio
                                    # #version2
                                    # eRatio = post.eRatioConfdV2(seg)
                                    # if eRatio > 1.0:
                                    #     potentialCall = True

                                    # # now check f. frq.
                                    # # down sample will helkp to avoid higher frq noise
                                    # if sampleRate != 16000:
                                    #     data = librosa.core.audio.resample(data, sampleRate, 16000)
                                    #     sampleRate = 16000
                                    # # denoise prior to f. frq. detection
                                    # waveletDenoiser = WaveletFunctions.WaveletFunctions(data=data, wavelet=None,
                                    #                                                     maxLevel=12)
                                    # data = waveletDenoiser.waveletDenoise(data, thresholdType='soft', wavelet='dmey2',
                                    #                                       maxLevel=12)
                                    # sp = SignalProc.SignalProc([], 0, 512, 256)
                                    # sgRaw = sp.spectrogram(data, 512, 256, mean_normalise=True, onesided=True,
                                    #                        multitaper=False)
                                    # segment = Segment.Segment(data, sgRaw, sp, sampleRate, 512, 256)
                                    # pitch, y, minfreq, W = segment.yin(minfreq=600)
                                    # ind = np.squeeze(np.where(pitch > minfreq))
                                    # pitch = pitch[ind]
                                    # ff = np.mean(pitch)
                                    # if ff > 500 and ff < 5000:
                                    #     potentialCall = True

                                    # else:
                                    #     potentialCall = False

                                    # just check duration>10 sec
                                    if secs>10:
                                        potentialCall = True
                                    else:
                                        potentialCall = False
                                    if not potentialCall:
                                        print(file, seg, "--> windy")
                                        newSegments.remove(seg)
                                        chg = True
                                else:
                                    print(file, seg, "--> not windy")
                            if rainTest:
                                limite_inf = int(round(len(p) * rain_lower)) # minimum frequency of the rainfall frequency band 0.0272 (in
                                                                             # normalized frequency); in Hz=0.0272*(44100/2)=599.8  Hz
                                limite_sup = int(round(len(p) * rain_upper)) # maximum frequency of the rainfall frequency band 0.0544 (in
                                                                             # normalized frequency); in Hz=0.0544*(44100/2)=1199.5 Hz
                                a_rain = p[limite_inf:limite_sup]   # section of interest of the power spectral density.Step 2 in Algorithm 2.1

                                mean_a_rain = np.mean(a_rain)   # mean of the PSD in the frequency band of interest.Upper part of the step 3 in Algorithm 2.1
                                std_a_rain = np.std(a_rain)     # standar deviation of the PSD in the frequency band of the interest. Lower part of the step 3 in Algorithm 2.1

                                c_rain = mean_a_rain / std_a_rain   # signal to noise ratio of the analysed recording. step 3 in Algorithm 2.1

                                if c_rain > Tsnr_rain:
                                    # check if it is not kiwi
                                    eRatio = post.eRatioConfd(seg)  # eRatio(file[:-5], seg, thr=T_ERatio)
                                    eRatioBefore = post.eRatioConfd([seg[0] - 10, seg[0], "", ""])
                                    # eRatioAfter = post.eRatioConfd([seg[1], seg[1] + 5, "", ""])
                                    # T_ERatio = (eRatioBefore + eRatioAfter) / 2
                                    if eRatio > eRatioBefore:  # or eRatio > eRatioAfter:  #it was 10 secs Before eratio
                                        potentialCall = True
                                    else:
                                        potentialCall = False
                                    if not potentialCall:
                                        print(file, seg, "--> windy")
                                        newSegments.remove(seg)
                                        chg = True
                                else:
                                    # rainy.append(0)
                                    print(file, "--> not rainy")

                    if chg:
                        file = open(file, 'w')
                        json.dump(newSegments, file)
                cnt += 1
                print(file, cnt)
Ejemplo n.º 59
0
def annotation2GT(wavFile,species,duration=0):
    """
    This generates the ground truth for a given sound file (currently for kiwi and bittern).
    Given the AviaNZ annotation, returns the ground truth as a txt file
    """
    # wavFile=datFile[:-5]
    datFile=wavFile+'.data'
    eFile = datFile[:-9]+'-sec.txt'
    if duration ==0:
        wavobj = wavio.read(wavFile)
        sampleRate = wavobj.rate
        data = wavobj.data
        duration=len(data)/sampleRate   # number of secs
    GT=np.zeros((duration,4))
    GT=GT.tolist()
    GT[:][1]=str(0)
    GT[:][2]=''
    GT[:][3]=''
    if os.path.isfile(datFile):
        print (datFile)
        with open(datFile) as f:
            segments = json.load(f)
        for seg in segments:
            if seg[0]==-1:
                continue
            # x = re.search(species, str(seg[4]))
            # print x
            if not re.search(species, seg[4]):
                continue
            elif species=='Kiwi' or 'Gsk':
                # check M/F
                if '(M)' in str(seg[4]):        # if re.search('(M)', seg[4]):
                    type = 'M'
                elif '(F)' in str(seg[4]):      #if re.search('(F)', seg[4]):
                    type='F'
                elif '(D)' in str(seg[4]):
                    type='D'
                else:
                    type='K'
            elif species=='Bittern':
                # check boom/inhalation
                if '(B)' in str(seg[4]):
                    type = 'B'
                elif '(I)' in str(seg[4]):
                    type='I'
                else:
                    type=''

            # check quality
            if re.search('1', seg[4]):
                quality = '*****'   # v close
            elif re.search('2', seg[4]):
                quality = '****'    # close
            elif re.search('3', seg[4]):
                quality = '***' # fade
            elif re.search('4', seg[4]):
                quality = '**'  # v fade
            elif re.search('5', seg[4]):
                quality = '*'   # v v fade
            else:
                quality = ''

            s=int(math.floor(seg[0]))
            e=int(math.ceil(seg[1]))
            for i in range(s,e):
                GT[i][1] = str(1)
                GT[i][2] = type
                GT[i][3] = quality
    for line in GT:
        if line[1]==0.0:
            line[1]='0'
        if line[2]==0.0:
            line[2]=''
        if line[3]==0.0:
            line[3]=''
    # now save GT as a .txt file
    for i in range(1, duration + 1):
        GT[i-1][0]=str(i)   # add time as the first column to make GT readable
    out = file(eFile, "w")
    for line in GT:
        print >> out, "\t".join(line)
    out.close()