def calc_potential_max(stim_folder, noise_filepath, out_dir): max_wav_samp = 0 max_wav_rms = 0 wavs = globDir(stim_folder, '*.wav') n_files = len(wavs) for ind, wav in enumerate(wavs): x, fs, enc = sndio.read(wav) max_wav_samp = np.max([max_wav_samp, np.max(np.abs(x))]) #max_wav_rms = np.max([max_wav_rms, np.sqrt(np.mean(x**2))]) level = asl_P56(x, fs, 16.)[0] max_wav_rms = np.max([ max_wav_rms, ]) print( f"Calculated level of {Path(wav).name} ({ind+1}/{n_files}): {level}" ) x, fs, enc = sndio.read(noise_filepath) # noise_rms = np.sqrt(np.mean(x**2)) print(f"Calculating level of {Path(noise_filepath).name}") noise_rms, _, _ = asl_P56(x, fs, 16.) print(f"Calculated level of {Path(noise_filepath).name}: {noise_rms}") max_noise_samp = max(np.abs(x)) snr = -15.0 snr_fs = 10**(-snr / 20) max_noise_samp *= max_wav_rms / noise_rms max_sampl = max_wav_samp + (max_noise_samp * snr_fs) reduction_coef = 1.0 / max_sampl np.save(os.path.join(out_dir, "reduction_coef.npy"), reduction_coef)
def gen_rms_peak(files, OutRMSDir, OutPeakDir): rmsFiles = [] peakFiles = [] for file in files: head, tail = os.path.split(file) tail = os.path.splitext(tail)[0] tail = tail + "_rms.npy" dir_must_exist(OutRMSDir) rmsFilepath = os.path.join(OutRMSDir, tail) print("Generating: " + rmsFilepath) y, fs, _ = sndio.read(file) y_rms = window_rms(y, round(0.02 * fs)) np.save(rmsFilepath, y_rms) rmsFiles.append(rmsFilepath) head, tail = os.path.split(file) tail = os.path.splitext(tail)[0] tail = tail + "_peak.npy" dir_must_exist(OutPeakDir) peakFilepath = os.path.join(OutPeakDir, tail) print("Generating: " + peakFilepath) peak = np.abs(y).max() np.save(peakFilepath, peak) peakFiles.append(peakFilepath) return rmsFiles, peakFiles
def calc_spectrum(files, silences, fs=44100, plot=False): window = 4096 sentenceLen = [] sentenceFFT = [] print("Calculating LTASS...") for ind, sentenceList in enumerate(files): for ind2, file in enumerate(sentenceList): x, fs, _ = sndio.read(file) f, t, Zxx = sgnl.stft(x, window=np.ones(window), nperseg=window, noverlap=0) sil = silences[ind * 10 + ind2] sTemp = np.zeros((sil.shape[0], t.size), dtype=bool) for ind3, s in enumerate(sil): sTemp[ind3, :] = np.logical_and(t > s[0], t < s[1]) invalidFFT = np.any(sTemp, axis=0) sentenceFFT.append(np.abs(Zxx[:, ~np.any(sTemp, axis=0)])) sentenceLen.append(x.size) sentenceLen = np.array([sentenceLen]).T sentenceLen = sentenceLen / sentenceLen.max() sentenceFFT = [x * sentenceLen[i] for i, x in enumerate(sentenceFFT)] sentenceFFT = np.concatenate([x.T for x in sentenceFFT]) grandAvgFFT = np.mean(sentenceFFT, axis=0) grandAvgFFT = grandAvgFFT / grandAvgFFT.max() print("Fitting filter to LTASS...") b = sgnl.firls(2049, np.linspace(0, 1, 2049)[1:], grandAvgFFT[1:]) if plot: plt.semilogy(np.abs(sgnl.freqz(b)[1])) plt.plot(np.linspace(0, 512, 2049), grandAvgFFT) plt.show() return b
def calc_potential_max(wavs, noise_filepath, out_dir, out_name): max_wav_samp = 0 max_wav_rms = 0 for wav in wavs: x, fs, enc = sndio.read(wav) max_wav_samp = np.max([max_wav_samp, np.max(np.abs(x))]) max_wav_rms = np.max([max_wav_rms, np.sqrt(np.mean(x**2))]) x, fs, enc = sndio.read(noise_filepath) noise_rms = np.sqrt(np.mean(x**2)) max_noise_samp = max(np.abs(x)) snr = -5. snr_fs = 10**(-snr / 20) max_noise_samp *= max_wav_rms / noise_rms max_sampl = max_wav_samp + (max_noise_samp * snr_fs) reduction_coef = 1.0 / max_sampl np.save(os.path.join(out_dir, "{}.npy".format(out_name)), reduction_coef) return reduction_coef
def main(): wavs = globDir('./', 'stim.wav') for wav in wavs: x, fs, enc, fmt = sndio.read(wav, return_format=True) y = x[:, :2] head, tail = os.path.splitext(wav) out_filepath = "{0}_old{1}".format(head, tail) os.rename(wav, out_filepath) sndio.write(wav, y, rate=fs, format=fmt, enc=enc)
def level_calc(args): ind, wavfile = args x, fs, _ = sndio.read(wavfile) # level = asl_P56(x, fs, 16.)[0] level = rms_no_silences(x, fs, -30.) print( f"Calculated level of {Path(wavfile).name} ({ind+1}/{n_files}): {level}" ) return level
def gen_rms(file, OutDir): head, tail = os.path.split(file) tail = os.path.splitext(tail)[0] tail = tail + "_env.npy" dir_must_exist(OutDir) rmsFilepath = os.path.join(OutDir, tail) print("Generating: " + rmsFilepath) y, fs, _ = sndio.read(file) y = y[:, 0] y_rms = window_rms(y, round(0.02 * fs)) np.save(rmsFilepath, y_rms) return rmsFilepath
def main(): ''' ''' wavs = globDir("./stimulus", "*.wav") for wav in wavs: x, fs, enc, fmt = sndio.read(wav, return_format=True) idx = np.arange(x.shape[0]) breakpoint() y = np.vstack([x, x, np.zeros(x.shape[0])]).T trigger = gen_trigger(idx, 2., 0.01, fs) y[:, 2] = trigger wav_out = os.path.splitext(wav)[0] + "_trig.wav" sndio.write(wav_out, y, rate=fs, format=fmt, enc=enc)
def loadAudio(filename): """ loadAudio: loads audio data from file using pysndfile Note that, by default pysndfile converts the samples into floating point numbers and rescales them in the range [-1, 1]. This is avoided by specifying the option dtype=np.int16 which keeps both the original data type and range of values. """ sndobj = sndio.read(filename, dtype=np.int16) samplingrate = sndobj[1] samples = np.array(sndobj[0]) return samples, samplingrate
def load_sphere(filepath): """ Loads the utterance samples from a file. Source: lab3 of DT2119 Speech and Speaker Recognition at KTH, by prof. Giampiero Salvi (slightly modified) :param filepath: path to the utterance file (.wav) :return: (samples, sample rate), where samples is a numpy array of shape (n_samples,) """ snd_obj = sndio.read(filepath, dtype=np.int16) samples = np.array(snd_obj[0]) sample_rate = snd_obj[1] return samples, sample_rate
def load_audio(file_path): """Loads audio data from wav file using pysndfile. Args: file_path: Path to a wav file. Returns: A tuple containing the samples and the sampling rate of the wav file, in this order. """ data = sndio.read(file_path) sampling_rate = data[1] samples = np.array(data[0], dtype=np.float32) return samples, sampling_rate
def gen_rms(files, OutDir): rmsFiles = [] OutPeakDir = './stimulus/peak' for sentenceList in files: for file in sentenceList: head, tail = os.path.split(file) tail = os.path.splitext(tail)[0] tail_rms = tail + "_rms.npy" dir_must_exist(OutDir) rmsFilepath = os.path.join(OutDir, tail_rms) print("Generating: " + rmsFilepath) y, fs, _ = sndio.read(file) y_rms = calc_rms(y, round(0.02 * fs)) np.save(rmsFilepath, y_rms) rmsFiles.append(rmsFilepath) y, fs, _ = sndio.read(file) tail_peak = tail + "_peak.npy" dir_must_exist(OutPeakDir) peakFilepath = os.path.join(OutPeakDir, tail_peak) print("Generating: " + peakFilepath) peak = np.abs(y).max() np.save(peakFilepath, peak) return rmsFiles
def main(): ''' ''' fs = 44100 f = 1000.0 n = np.arange(fs * 60 * 5) y = np.sin(2 * np.pi * f * n / fs) coef = np.load('./out/calibration_coefficients/click_cal_coef.npy') y *= coef dir_must_exist('./out/calibrated_stim/') sndio.write("./out/calibrated_stim/1k_tone.wav", y, fs, format='wav', enc='pcm16') coef = np.load('./out/calibration_coefficients/da_cal_coef.npy') y, fs, enc = sndio.read('./out/stimulus/da_cal_stim.wav') sndio.write('./out/calibrated_stim/da_cal_stim.wav', y * coef, fs, format='wav', enc='pcm16') coef = np.load('./out/calibration_coefficients/mat_cal_coef.npy') y, fs, enc = sndio.read('./out/stimulus/mat_cal_stim.wav') sndio.write('./out/calibrated_stim/mat_cal_stim.wav', y * coef, fs, format='wav', enc='pcm16') coef = np.load('./out/calibration_coefficients/story_cal_coef.npy') y, fs, enc = sndio.read('./out/stimulus/story_cal_stim.wav') sndio.write('./out/calibrated_stim/story_cal_stim.wav', y * coef, fs, format='wav', enc='pcm16')
def playStimulus(self, wav): ''' Output audio stimulus from numpy array ''' self.newResp = False self.socketio.emit("stim_playing", namespace="/main") x, fs, _ = sndio.read(wav) if self.participant.parameters['hl_sim_active']: y = apply_hearing_loss_sim(x, fs) # Play audio if not self.dev_mode: sd.play(y, fs, blocking=True) else: self.play_wav('./da_stim/DA_170.wav', '') self.socketio.emit("stim_done", namespace="/main")
def loadStimulus(self): # Get folder path of all lists in the list directory lists = next(os.walk(self.listDir))[1] lists.pop(lists.index("demo")) # Don't reload an lists that have already been loaded pop = [lists.index(x) for x in self.loadedLists] for i in sorted(pop, reverse=True): del lists[i] # Randomly select n lists inds = self.inds # random.shuffle(inds) # Pick first n shuffled lists for ind in inds: # Get filepaths to the audiofiles and word csv file for the current # list listAudiofiles = globDir(os.path.join(self.listDir, lists[ind]), "*.wav") listCSV = globDir(os.path.join(self.listDir, lists[ind]), "*.csv") levels = globDir(os.path.join(self.listDir, lists[ind]), "*.mat") with open(listCSV[0]) as csv_file: csv_reader = csv.reader(csv_file) # Allocate empty lists to store audio samples, RMS and words of # each list sentence self.lists.append([]) self.listsRMS.append([]) self.listsString.append([]) # Get data for each sentence for fp, words, level_file in zip(listAudiofiles, csv_reader, levels): # Read in audio file and calculate it's RMS x, self.fs, _ = sndio.read(fp) logger.info(f"Calculating level for {Path(fp).name}") # x_rms, _, _ = asl_P56(x, self.fs, 16.) x_rms = rms_no_silences(x, self.fs, -30.) self.lists[-1].append(x) self.listsRMS[-1].append(x_rms) self.listsString[-1].append(words) # Number of trials to split between adaptive tracks n = len(self.lists[0])*len(inds) #Number of adaptive tracks active tn = len(self.adaptiveTracks) self.trackOrder = list(np.repeat(np.arange(tn), np.floor(n/tn))) random.shuffle(self.trackOrder) # Shuffle order of sentence presentation self.availableSentenceInds = list(range(len(self.lists[0]))) random.shuffle(self.availableSentenceInds)
def loadAudio(filename): """ loadAudio: loads audio data from file using pysndfile Note that, by default pysndfile converts the samples into floating point numbers and rescales them in the range [-1, 1]. This can be avoided by specifying the dtype argument in sndio.read(). However, when I imported' the data in lab 1 and 2, computed features and trained the HMM models, I used the default behaviour in sndio.read() and rescaled the samples in the int16 range instead. In order to compute features that are compatible with the models, we have to follow the same procedure again. This will be simplified in future years. """ sndobj = sndio.read(filename) samplingrate = sndobj[1] samples = np.array(sndobj[0]) * np.iinfo(np.int16).max return samples, samplingrate
def calc_speech_rms(files, silences, rmsDir, fs=44100, plot=False): ''' ''' f = files sumsqrd = 0.0 n = 0 for wavfile, sil in zip(f, silences): y, fs, _ = sndio.read(wavfile) t = np.arange(y.size) sTemp = np.zeros(t.size, dtype=bool) print("Started") for ind, s in enumerate(sil): print("Check {}".format(ind)) sTemp = np.logical_or(sTemp, np.logical_and(t > s[0], t < s[1])) print("Done") silentSamples = np.any(sTemp, axis=0) y_temp = y[~silentSamples] sumsqrd += np.sum(y_temp**2) n += y_temp.size rms = np.sqrt(sumsqrd / n) np.save(os.path.join(rmsDir, 'overall_da_rms.npy'), rms) return rms
def flattenRMS(AudioFile, AnnotationFile): with open(AnnotationFile, 'r') as f: csvData = pd.read_csv(f) data, fs, encStr, fmtStr = sndio.read(AudioFile, return_format=True) csvData['start'] *= fs csvData['start'] = csvData['start'].astype(int) csvData['stop'] *= fs csvData['stop'] = csvData['stop'].astype(int) zerox = np.where(np.diff(np.sign(data)))[0] # get silent sections silences = csvData.loc[csvData['name'] == '#'] audio = csvData.loc[csvData['name'] != '#'] # Find nearest zero-crossing to start and stop times of silences nearestZerox = zerox[np.abs(zerox - csvData['start'][:, np.newaxis]).argmin(axis=1)] csvData['start'] = nearestZerox nearestZerox = zerox[np.abs(zerox - csvData['stop'][:, np.newaxis]).argmin(axis=1)] csvData['stop'] = nearestZerox csvData['rms'] = np.nan for ind, chunk in csvData.iterrows(): if not chunk['name'] == '#': rms = np.sqrt(np.mean(data[chunk['start']:chunk['stop']]**2)) csvData.iloc[ind, csvData.columns.get_loc('rms')] = rms avgRMS = csvData['rms'][csvData['rms'].notnull()].mean() silentData = np.zeros(int(0.3*fs)) out = np.array([]) for ind, chunk in csvData.iterrows(): if chunk['name'] == '#': out = np.append(out, silentData) else: rmsCorFactor = avgRMS / chunk['rms'] out = np.append(out, data[chunk['start']:chunk['stop']])#*rmsCorFactor) print(np.sqrt(np.mean((data[chunk['start']:chunk['stop']]*rmsCorFactor)**2))) sndio.write('./out.wav', out, rate=fs, format=fmtStr, enc=encStr)
def gen_da_stim(n, outpath): da_file = './BioMAP_da-40ms.wav' da_stim, fs, enc, fmt = sndio.read(da_file, return_format=True) prestim_size = 0.0158 # Repetition rate in Hz repetition_rate = 10.9 full_stim_size = 1. / repetition_rate da_size = da_stim.size / fs prestim = np.zeros(int(fs * prestim_size)) poststim = np.zeros(int(fs * ((full_stim_size - prestim_size) - da_size))) y_part = np.concatenate([prestim, da_stim, poststim]) pdb.set_trace() y_part_inv = -y_part loc_part = np.zeros(y_part.size) loc_part[prestim.size + 1] = 1 y_2part = np.concatenate([y_part, y_part_inv]) loc = np.concatenate([loc_part, loc_part]) y_r = np.tile(y_2part, n) loc = np.tile(loc, n) loc = np.insert(loc, 0, np.zeros(fs)) loc = np.where(loc)[0] y_r = np.insert(y_r, 0, np.zeros(fs)) y_r = resampy.resample(y_r, fs, 44100) rat = 44100 / fs fs = 44100 y_l = np.zeros(y_r.size) loc = loc * rat loc = loc.round().astype(int) np.save('./stimulus/3000_da_locs.npy', loc) idx = np.arange(y_l.size) trigger = gen_trigger(idx, 2., 0.01, fs) y = np.vstack((y_l, y_r, trigger)).T sndio.write(outpath, y, rate=44100, format=fmt, enc=enc) return outpath
def getMUSDB_augmented(database_path): subsets = list() rate = None for subset in ['train', 'test']: for root, _, files in os.path.walk(os.path.join("database", sub_set)): if "voice.wav" in files: bass_audio = drums_audio = vocal_audio = mix_audio = None for file in files: if file == "bass.wav": bass_path = os.path.join(root, file) bass_audio, sr, _ = sndio.read(bass_path) elif file == "drums.wav": drums_path = os.path.join(root, file) drums_audio, sr, _ = sndio.read(drums_path) elif file == "rest.wav": other_path = os.path.join(root, file) other_audio, sr, _ = sndio.read(other_path) elif file == "mix.wav": mix_path = os.path.join(root, file) mix_audio, sr, _ = sndio.read(mix_path) elif file == "voice.wav": vocal_path = os.path.join(root, file) vocal_audio, sr, _ = sndio.read(vocal_path) if rate is None: rate = sr else: if rate != sr: raise RuntimeError( "getMUSDB_augmented::error::inconsistent sample rate in {} - {} != {}" .fromat(root, rate, sr)) # Add other instruments to form accompaniment acc_audio = drums_audio + bass_audio + other_audio acc_path = os.path.join(local_path, os.path.basename(root), "accompaniment.wav") acc = write_wav_skip_existing(acc_path, acc_audio, rate) # Create mixture if mix_audio is None: mix_path = os.path.join(local_path, os.path.basename(root), "mix.wav") mix_audio = acc_audio + vocal_audio mix = write_wav_skip_existing(mix_path, mix_audio, rate) else: mix = Sample.from_array(mix_path, mix_audio, rate) diff_signal = np.abs(mix_audio - bass_audio - drums_audio - other_audio - vocal_audio) print( "Maximum absolute deviation from source additivity constraint: " + str(np.max(diff_signal))) # Check if acc+vocals=mix print( "Mean absolute deviation from source additivity constraint: " + str(np.mean(diff_signal))) # Collect all sources for now. Later on for # SVS: [mix, acc, vocal] # Multi-instrument: [mix, bass, drums, other, vocals] samples.append((mix, acc, bass, drums, other, vocal)) subsets.append(samples) return subsets
def loadStimulus(self): ''' ''' self.participant.load('mat_test') try: srt_50 = self.participant.data['mat_test']['srt_50'] s_50 = self.participant.data['mat_test']['s_50'] except KeyError: raise KeyError( "Behavioural matrix test results not available, make " "sure the behavioural test has been run before " "running this test.") save_dir = self.participant.data_paths['eeg_test/stimulus'] ''' # Estimate speech intelligibility thresholds using predicted # psychometric function s_50 *= 0.01 x = logit(self.si * 0.01) snrs = (x/(4*s_50))+srt_50 snrs = np.append(snrs, np.inf) snr_map = pd.DataFrame({"speech_intel" : np.append(self.si, 0.0), "snr": snrs}) snr_map_path = os.path.join(save_dir, "snr_map.csv") snr_map.to_csv(snr_map_path) snrs = np.repeat(snrs[np.newaxis], 4, axis=0) snrs = roll_independant(snrs, np.array([0,-1,-2,-3])) stim_dirs = [x for x in os.listdir(self.listDir) if os.path.isdir(os.path.join(self.listDir, x))] shuffle(stim_dirs) ''' snrs = self.participant.data['parameters']['decoder_test_SNRs'] + srt_50 stim_dirs = [ x for x in os.listdir(self.listDir) if os.path.isdir(os.path.join(self.listDir, x)) ] ordered_stim_dirs = [] for ind in self.participant_parameters['decoder_test_lists']: for folder in stim_dirs: if re.match(f'Stim_({int(ind)})', folder): ordered_stim_dirs.append(folder) # ordered_stim_dirs *= int(len(snrs)) noise_file = PySndfile(self.noise_path, 'r') wav_files = [] wav_metas = [] question = [] marker_files = [] self.socketio.emit('test_stim_load', namespace='/main') for ind, dir_name in enumerate(ordered_stim_dirs[:snrs.shape[1]]): logger.debug( f"Processing list directory {ind+1} of {snrs.shape[1]}") stim_dir = os.path.join(self.listDir, dir_name) wav = globDir(stim_dir, "*.wav")[0] csv_files = natsorted(globDir(stim_dir, "*.csv")) marker_file = csv_files[0] question_files = csv_files[1:] # rms_file = globDir(stim_dir, "*.npy")[0] # speech_rms = float(np.load(rms_file)) snr = snrs[:, ind] audio, fs, enc, fmt = sndio.read(wav, return_format=True) speech = audio[:, :2] triggers = audio[:, 2] #speech_rms, _, _ = asl_P56(speech, fs, 16.) rms_no_silences(speech, fs, -30.) wf = [] wm = [] for ind2, s in enumerate(snr): start = randint(0, noise_file.frames() - speech.shape[0]) noise_file.seek(start) noise = noise_file.read_frames(speech.shape[0]) noise_rms = np.sqrt(np.mean(noise**2)) # noise_rms = asl_P56(noise, fs, 16) snr_fs = 10**(-s / 20) if snr_fs == np.inf: snr_fs = 0. elif snr_fs == -np.inf: raise ValueError( "Noise infinitely louder than signal at snr: {}". format(snr)) noise = noise * (speech_rms / noise_rms) out_wav_path = os.path.join( save_dir, "Stim_{0}_{1}.wav".format(ind, ind2)) out_meta_path = os.path.join( save_dir, "Stim_{0}_{1}.npy".format(ind, ind2)) with np.errstate(divide='raise'): try: out_wav = (speech + (np.stack([noise, noise], axis=1) * snr_fs)) * self.reduction_coef except: set_trace() out_wav = np.concatenate([out_wav, triggers[:, np.newaxis]], axis=1) sndio.write(out_wav_path, out_wav, fs, fmt, enc) np.save(out_meta_path, s) wf.append(out_wav_path) wm.append(out_meta_path) wav_metas.append(wm) wav_files.append(wf) out_marker_path = os.path.join(save_dir, "Marker_{0}.csv".format(ind)) marker_files.append(out_marker_path) copyfile(marker_file, out_marker_path) for q_file in question_files: out_q_path = os.path.join( save_dir, "Questions_{0}_{1}.csv".format(ind, ind2)) self.question_files.append(out_q_path) copyfile(q_file, out_q_path) for q_file_path in question_files: q = [] with open(q_file_path, 'r') as q_file: q_reader = csv.reader(q_file) for line in q_reader: q.append(line) question.append(q) self.wav_files = [item for sublist in wav_files for item in sublist] self.wav_metas = [item for sublist in wav_metas for item in sublist] self.question.extend(question) for item in marker_files: self.marker_files.extend([item] * 4) self.answers = np.empty(np.shape(self.question)[:2]) self.answers[:] = np.nan
# the total number of changes is nrows n = nrows cols = np.random.geometric(0.5, n) cols[cols >= ncols] = 0 rows = np.random.randint(nrows, size=n) array[rows, cols] = np.random.random(n) df = pd.DataFrame(array) df.fillna(method='ffill', axis=0, inplace=True) total = df.sum(axis=1) return total.values da_x, da_fs, da_enc = sndio.read('./stimulus/wav/10min_da.wav') sp_x, sp_fs, sp_enc = sndio.read('./noise_source/male_speech_resamp.wav') # %load test.ipy pink_n = voss(sp_x.size, 1) da_rms = np.sqrt(np.mean(da_x**2)) sp_rms = np.sqrt(np.mean(sp_x**2)) pink_n_rms = np.sqrt(np.mean(pink_n**2)) da_x *= sp_rms / da_rms pink_n *= sp_rms / pink_n_rms f, Pxx_den = signal.welch(pink_n, sp_fs, nperseg=1024) plt.semilogy(f, Pxx_den) f, Pxx_den = signal.welch(sp_x, sp_fs, nperseg=1024) plt.semilogy(f, Pxx_den) f, Pxx_den = signal.welch(da_x[:, 1], da_fs, nperseg=1024) plt.semilogy(f, Pxx_den)
def main(): x, fs, enc = sndio.read( './matrix_test/behavioural_stim/stimulus/wav/sentence-lists/ukmatrix10.1/Trial_00001.wav' ) rms = rms_no_silences(x, fs, -30) breakpoint()
def testLoop(self): ''' Main loop for iteratively finding the SRT ''' self.waitForPageLoad() self.displayInstructions() self.waitForPartReady() while not self.finishTest and not self._stopevent.isSet() and len(self.availableSentenceInds) and len(self.trackOrder): # Plot SNR of current trial to the clinician screen plt.clf() for at in self.adaptiveTracks: at.plotSNR() self.renderSNRPlot() # Get the index of the sentence to be played for the current trial currentSentenceInd = self.availableSentenceInds.pop(0) # Get the index of the current adaptive track to use self.adTrInd = self.trackOrder.pop(0) # Generate trial audioself.wordsCorrect self.y = self.adaptiveTracks[self.adTrInd].generateTrial( self.lists[0][currentSentenceInd], self.listsRMS[0][currentSentenceInd] ) if self.participant.parameters['hl_sim_active']: self.y = apply_hearing_loss_sim(self.y, self.fs, channels=[0]) # Define words presented in the current trial self.currentWords = self.listsString[0][currentSentenceInd] logger.info("-"*78) logger.info("{0:<25}".format("Current trial:") + f"{' '.join(self.currentWords)}") logger.info("{0:<25}".format("Current track index:") + f"{self.adTrInd}") logger.info("{0:<25}".format("Current trial number:") + f"{self.trialN}") logger.info("{0:<25}".format("Current SNR:") + f"{self.adaptiveTracks[self.adTrInd].snr}") if self.audio_cal: y, fs, fmt = sndio.read('./calibration/out/stimulus/mat_cal_stim.wav') self.playStimulus(y, fs) else: self.playStimulus(self.y, self.fs) self.waitForResponse() self.checkSentencesAvailable() if self.finishTest: break if self._stopevent.isSet(): return logger.info("{0:<25}".format("N correct responses:") + f"{int(self.nCorrect*5)}") self.adaptiveTracks[self.adTrInd].calcSNR(self.nCorrect) self.checkSentencesAvailable() self.saveState(out=self.backupFilepath) self.trialN += 1 self.adaptiveTracks[self.adTrInd].incrementTrialN() self.saveState(out=self.backupFilepath) logger.info("-"*78) if not self._stopevent.isSet(): self.unsetPageLoaded() logger.info("Behavioural test complete") self.socketio.emit('processing-complete', {'data': ''}, namespace='/main') self.waitForPageLoad() # Plot SNR of current trial to the clinician screen plt.clf() for at in self.adaptiveTracks: at.plotSNR() self.renderSNRPlot() self.fitLogistic() self.waitForFinalise()
else: continue raise NameError('TIDIGITS root directory not found on system') genders = ["man", "woman"] speakers = ["ae", "ac"] digits = ["o", "z", "1", "2", "3", "4", "5", "6", "7", "8", "9"] repetitions = ["a", "b"] tidigits = [] for idx in range(len(speakers)): for digit in digits: for repetition in repetitions: filename = os.path.join(tidigitsroot, genders[idx], speakers[idx], digit+repetition+'.wav') sndobj = sndio.read(filename) # libsndfile scales the values down to the -1.0 +1.0 range # here we convert back to the range of 16 bit linear PCM # to get similar results as from Kaldi or HTK samples = np.array(sndobj[0])*np.iinfo(np.int16).max samplingrate = sndobj[1] tidigits.append({"filename": filename, "samplingrate": samplingrate, "gender": genders[idx], "speaker": speakers[idx], "digit": digit, "repetition": repetition, "samples": samples}) if sys.version_info.major==3: np.savez('tidigits_python3.npz', tidigits=tidigits)
def concatenateStimuli(MatrixDir, OutDir, Length, n): # Get matrix wav file paths wavFiles = globDir(MatrixDir, '*.wav') stim_parts = os.path.join(MatrixDir, "stim_parts.csv") stim_words = os.path.join(MatrixDir, "stim_words.csv") stim_part_rows = [] with open(stim_parts, 'r') as csvfile: stim_part_rows = [line for line in csv.reader(csvfile)] with open(stim_words, 'r') as csvfile: stim_word_rows = [line for line in csv.reader(csvfile)] wavFiles = natsorted(wavFiles) totalSize = 0 y = [] parts = [] questions = [] i = 0 gapSize = np.uniform(0.8, 1.2, len(wavFiles)) for wav, gap in zip(wavFiles, gapSize): if i == n: break wavObj = PySndfile(wav) fs = wavObj.samplerate() size = wavObj.frames() totalSize += size totalSize += int(gap * fs) if (totalSize / fs) > Length: # total size + 2 second silence at start y.append(np.zeros((totalSize + 2 * fs, 3))) parts.append([]) questions.append([]) i += 1 totalSize = 0 writePtr = 2 * fs idx = np.arange(0, writePtr) chunk = np.zeros(idx.size) chunk = np.vstack([chunk, chunk, chunk]).T trigger = gen_trigger(idx, 2., 0.01, fs) chunk[:, 2] = trigger for i, _ in enumerate(y): y[i][0:writePtr, :] = chunk i = 0 for wav, word, part in zip(wavFiles, stim_word_rows, stim_part_rows): if writePtr >= y[i].shape[0]: i += 1 writePtr = fs * 2 if i == n: break x, fs, encStr, fmtStr = sndio.read(wav, return_format=True) threeMs = int(0.1 * fs) silence = np.zeros(threeMs) chunk = np.append(x, silence) idx = np.arange(writePtr, writePtr + chunk.shape[0]) chunk = np.vstack([chunk, chunk, np.zeros(chunk.shape[0])]).T trigger = gen_trigger(idx, 2., 0.01, fs) chunk[:, 2] = trigger y[i][writePtr:writePtr + chunk.shape[0], :] = chunk questions[i].append(word) parts[i].append(part) writePtr += chunk.shape[0] for ind, (data, q, p) in enumerate(zip(y, questions, parts)): pysndfile.sndio.write(os.path.join(OutDir, 'stim_{}.wav'.format(ind)), data, format=fmtStr, enc=encStr) with open('./out/stim/stim_words_{}.csv'.format(ind), 'w') as csvfile: writer = csv.writer(csvfile) writer.writerows(q) with open('./out/stim/stim_parts_{}.csv'.format(ind), 'w') as csvfile: writer = csv.writer(csvfile) writer.writerows(p)
def serialize(train=True): """ Serialize the TIMIT dataset to TFRecords :param train: :return: """ base_data_path = FLAGS.input_train_dir if train else FLAGS.input_test_dir output_path = os.path.join(FLAGS.data_dir, _FILENAME_TRAIN if train else _FILENAME_TEST) num_frames = TRN_NUM_FRAMES / FrameSize if train else TST_NUM_FRAMES / FrameSize timit = [] print('Parsing .wav files...') for region in REGIONS: # iterate over all speakers for that region region_path = os.path.join(base_data_path, region) for speaker_id in os.listdir(region_path): speaker_path = os.path.join(region_path, speaker_id) # iterate over all utterances for that speaker speaker_wavs = glob.glob(speaker_path + '/*.wav') for wav in speaker_wavs: if "sa" not in wav: # get the sound frequencies and sampling rate sndobj = sndio.read(wav) samplingrate = sndobj[1] samples = np.array(sndobj[0]) * np.iinfo(np.int16).max # parse the phoneme file phonemes = _get_phonemes(wav.replace('.wav', '.phn')) # get sentence words = _get_words(wav.replace('.wav', '.wrd')) timit.append({ 'filename': wav, 'samplingrate': samplingrate, 'phonemes': phonemes, 'words': words, 'gender': speaker_id[0], 'speaker': speaker_id, 'samples': samples }) frame_ctn = 0 frames = np.ndarray(shape=(num_frames, NUM_FILTERS, 1, Total_FEATURES)) labels = np.ndarray(shape=(num_frames)) # transform the samples into MSFC features print('Parsing frames from utterances...') # adding the counter for fix-frames input count = 0 input_sample = np.ndarray(shape=(NUM_FILTERS, 1, Total_FEATURES), dtype=np.float32) label_list = [] for utt in timit: samples = utt['samples'] phonemes = utt['phonemes'] # extract each phoneme mfsc, delta and delta-delta for pho in phonemes: # extract the frames for this phonemes only pho_idx = class2pho[pho['phoneme']]['idx'] pho_samples = samples[pho['start']:pho['end']] # get the filterbanks mfscs = ft.mfsc(pho_samples, samplerate=utt['samplingrate'], nfilt=NUM_FILTERS) # for each frame for mfsc in mfscs: # add the deltas and delta-deltas for each static frame delta = _get_delta(mfsc) delta2 = _get_delta(delta) # create the new frame representation frame = np.ndarray(shape=(NUM_FILTERS, 1, NUM_FEATURES), dtype=np.float32) frame[:, :, 0] = mfsc[:, None] frame[:, :, 1] = delta[:, None] frame[:, :, 2] = delta2[:, None] input_sample[:, :, 3 * count:3 * (count + 1)] = frame label_list.append(pho_idx) count += 1 if count == 9: count = 0 frames[frame_ctn, :, :, :] = input_sample #print(label_list) #print(Counter(label_list).most_common()[0][0]) labels[frame_ctn] = Counter(label_list).most_common()[0][0] #print(label_list[4]) #labels[frame_ctn] = label_list[4] frame_ctn += 1 #print('Finish ', frame_ctn) input_sample = np.ndarray(shape=(NUM_FILTERS, 1, Total_FEATURES), dtype=np.float32) label_list.clear() if frame_ctn % 1000 == 0: print('- {0} frames processed...'.format(frame_ctn)) frames = frames[0:frame_ctn, :, :, :] labels = labels[0:frame_ctn] print('Finished processing {0} frames!'.format(frame_ctn)) means = frames.mean(axis=0) std = frames.std(axis=0) # normalize zero mean and unity variance frames = frames - means frames = frames / std # shuffle the frame frames_shuf = np.ndarray(shape=(frame_ctn, NUM_FILTERS, 1, Total_FEATURES)) labels_shuf = np.ndarray(shape=(frame_ctn)) index_shuf = list(range(len(frames))) shuffle(index_shuf) index = 0 for i in index_shuf: frames_shuf[index, :, :, :] = frames[i] labels_shuf[index] = labels[i] index += 1 print('Finish shuffle.............................') filename = output_path num_of_train = math.ceil(frame_ctn * 0.75) print('Total number of train file: ', num_of_train) print('Writing', filename) writer = tf.python_io.TFRecordWriter(filename) for i in range(frames_shuf.shape[0]): frame = np.ndarray(shape=(1, NUM_FILTERS, 1, Total_FEATURES), dtype=np.float32) label = labels_shuf[i] frame[0, :, :, :] = frames_shuf[i] _convert_to_record(frame, label, writer) if i % 1000 == 0: print('- Wrote {0}/{1} frames...'.format(i, frames_shuf.shape[0])) if i == (num_of_train - 1): writer.close() filename = os.path.join(FLAGS.data_dir, _FILENAME_VAL) print('Writing', filename, '\t from frames', i) writer = tf.python_io.TFRecordWriter(filename) print('Finish Writing ', i) writer.close() # save the phoneme mapping file with open( os.path.join(FLAGS.data_dir, 'phon_tr.json' if train else 'phon_tst.json'), 'w') as f: json.dump(class2pho, f, indent=4, sort_keys=True)
continue raise NameError('TIDIGITS root directory not found on system') genders = ["man", "woman"] speakers = ["bm", "ew"] #speakers = ["ae", "ac"] digits = ["o", "z", "1", "2", "3", "4", "5", "6", "7", "8", "9"] repetitions = ["a", "b"] data = [] for idx in range(len(speakers)): for digit in digits: for repetition in repetitions: filename = os.path.join(tidigitsroot, genders[idx], speakers[idx], digit+repetition+'.wav') sndobj = sndio.read(filename, dtype=np.int16) # the following is not necessary any longer, but I need to check that the feature extraction # still works with int16 numbers. Also I need to change all occurrences in lab 2 and 3!!!! # libsndfile scales the values down to the -1.0 +1.0 range # here we convert back to the range of 16 bit linear PCM # to get similar results as from Kaldi or HTK #samples = np.array(sndobj[0])*np.iinfo(np.int16).max samplingrate = sndobj[1] data.append({"filename": filename, "samplingrate": samplingrate, "gender": genders[idx], "speaker": speakers[idx], "digit": digit, "repetition": repetition, "samples": sndobj[0]})
def main(): stim_dir = "../behavioural_stim/stimulus" wav_dir = "../behavioural_stim/stimulus/wav" base_dir = "../behavioural_stim/stimulus/wav/sentence-lists/" noise_dir = "../behavioural_stim/stimulus/wav/noise/" out_dir = "./out" dir_must_exist(base_dir) dir_must_exist(out_dir) dir_must_exist(wav_dir) dir_must_exist(noise_dir) noise_filepath = "../behavioural_stim/stimulus/wav/noise/noise_norm.wav" folders = os.listdir(base_dir) folders = natsorted(folders)[1:15] folders = list(zip(folders[::2], folders[1::2])) calc_potential_max(base_dir, noise_filepath, out_dir) n_questions = 4 fs = 44100 for ind, (list_folder_1, list_folder_2) in enumerate(folders): out_folder_name = 'Stim_{}'.format(ind) out_folder = os.path.join(out_dir, out_folder_name) delete_if_exists(out_folder) dir_must_exist(out_folder) out_wav_path = os.path.join(out_folder, "stim.wav") out_csv_path = os.path.join(out_folder, "markers.csv") out_rms_path = os.path.join(out_folder, "rms.npy") out_q_path = [ os.path.join(out_folder, "questions_{}.csv".format(x)) for x in range(n_questions) ] out_wav = PySndfile(out_wav_path, 'w', construct_format('wav', 'pcm16'), 3, 44100) list_1_wav = globDir(os.path.join(base_dir, list_folder_1), '*.wav') list_2_wav = globDir(os.path.join(base_dir, list_folder_2), '*.wav') list_1_csv = globDir(os.path.join(base_dir, list_folder_1), '*.csv') list_2_csv = globDir(os.path.join(base_dir, list_folder_2), '*.csv') merged_wavs = list_1_wav + list_2_wav merged_csvs = list_1_csv + list_2_csv words = [] for c in merged_csvs: with open(c, 'r') as csvfile: for line in csv.reader(csvfile): words.append(line) c = list(zip(merged_wavs, words)) shuffle(c) merged_wavs, words = zip(*c) sum_sqrd = 0. n = 0 with open(out_csv_path, 'w') as csvfile, ExitStack() as stack: # Open all question files qfiles = [ stack.enter_context(open(qfile, 'w')) for qfile in out_q_path ] writer = csv.writer(csvfile) qwriters = [csv.writer(qfile) for qfile in qfiles] counter = 0 stim_count = len(merged_wavs) stim_count_half = stim_count // 2 q_inds = np.array([ sample(range(0, stim_count_half), n_questions), sample(range(stim_count_half, stim_count - 1), n_questions) ]).T a = 0 silence = np.zeros((88200, 3)) idx = np.arange(0, silence.shape[0]) trigger = gen_trigger(idx, 2., 0.01, fs) silence[:, 2] = trigger out_wav.write_frames(silence) for ind, (wav, txt) in enumerate(zip(merged_wavs, words)): csv_line = [counter] silence = np.zeros((int( np.random.uniform(int(0.3 * 44100), int(0.4 * 44100), 1)), 3)) idx = np.arange(counter, counter + silence.shape[0]) trigger = gen_trigger(idx, 2., 0.01, fs) silence[:, 2] = trigger out_wav.write_frames(silence) counter += silence.shape[0] csv_line.append(counter) csv_line.append("#") writer.writerow(csv_line) csv_line = [counter] x, fs, enc = sndio.read(wav) sum_sqrd += np.sum(x**2) n += x.size y = np.vstack([x, x, np.zeros(x.size)]).T idx = np.arange(counter, counter + y.shape[0]) trigger = gen_trigger(idx, 2., 0.01, fs) y[:, 2] = trigger out_wav.write_frames(y) counter += y.shape[0] csv_line.append(counter) csv_line.append(" ".join(txt)) writer.writerow(csv_line) if ind in q_inds: writer_ind = int(np.where(ind == q_inds)[0]) blank_ind = randint(0, len(txt) - 1) q_list = copy(txt) q_list[blank_ind] = '_' qwriters[writer_ind].writerow( [" ".join(q_list), txt[blank_ind]]) a += 1 if a != 8: pdb.set_trace() csv_line = [counter] silence = np.zeros( (int(np.random.uniform(int(0.3 * 44100), int(0.4 * 44100), 1)), 3)) idx = np.arange(counter, counter + silence.shape[0]) trigger = gen_trigger(idx, 2., 0.01, fs) silence[:, 2] = trigger out_wav.write_frames(silence) counter += silence.size csv_line.append(counter) csv_line.append("#") writer.writerow(csv_line) rms = np.sqrt(sum_sqrd / n) np.save(out_rms_path, rms) x, fs, enc = sndio.read(out_wav_path)