def make_calibration_signal(REF_RMS_DB): """Add the calibration signal to the start of the signal. Args: signal (ndarray): input signal Returns: ndarray: the processed signal """ # Calibration noise and tone with same RMS as original speech, # Tone at nearest channel centre frequency to 500 Hz # For testing, ref_rms_dB must be equal to -31.2 noise_burst = MSBG.gen_eh2008_speech_noise(duration=2, fs=fs, level=REF_RMS_DB) tone_burst = MSBG.gen_tone(freq=520, duration=0.5, fs=fs, level=REF_RMS_DB) silence = np.zeros(int(0.05 * fs)) # 50 ms duration return ( np.concatenate( (silence, tone_burst, silence, noise_burst, silence)), silence, )
def run_demo(input_filename, outputfilename): """Demo of using MSBG to process a signal according to various levels of hearing impairment, as specified by audiograms. Arguments: input_filename {str} -- Name of the input wav file output_filename {str} -- Stem name of output wav files """ signal = ccs.read_signal(input_filename) logging.info(f"Signal shape is {signal.shape}") # Make the list of ears. Each has a different audiogram. audiograms = MSBG.audiogram.standard_audiograms() ears = [ MSBG.Ear(audiogram=audiogram, src_pos="ff") for audiogram in audiograms ] # process the signal with each ear in the list of ears outputs = [ear.process(signal, add_calibration=True) for ear in ears] # Output the signals for i, output in enumerate(outputs): outfile = Path(outputfilename) ccs.write_signal( f"{outfile.parent}/{outfile.stem}_{i}.wav", output[0], CONFIG.fs, floating_point=True, )
def __init__(self, audiogram, catch_up_level=105.0, fs=44100.0): """Cochlea constructor. Args: audiogram (Audiogram): Audiogram characterising hearing loss catch_up_level (float, optional): loudness catch-up level in dB (default: {105}) fs (float, optional): sampling frequency """ self.fs = fs self.audiogram = audiogram # Audiogram to employ self.catch_up_level = catch_up_level # Level in dB at which catches up with NH # Compute severity level and set parameters accordingly severity_level = self.audiogram.severity self.gtfbank_params = MSBG.file.read_gtf_file( f"data/{HL_PARAMS[severity_level]['gtfbank_file']}.json") self.cf_expansion, self.eq_loud_db_catch_up = compute_recruitment_parameters( self.gtfbank_params["GTn_CentFrq"], audiogram, catch_up_level) # Set-up the smearer self.smearer = None if severity_level != "NOTHING": smear_params = HL_PARAMS[severity_level]["smear_params"] self.smearer = MSBG.Smearer(*smear_params, fs) logging.info(f"Severity level - {severity_level}")
def test_audfilt(filename, regtest): filename = os.path.dirname(os.path.abspath(__file__)) + "/data/" + filename data = scipy.io.loadmat(filename) audfiltered = MSBG.audfilt(data["rl"], data["ru"], data["sampfreq"], data["asize"]) npt.assert_allclose(audfiltered, data["filter"]) print(utils.hash_numpy(audfiltered), file=regtest)
def filterbank_and_recruitment( coch_sig, SPL_equiv_0dB, nchans, erbn_cf, fs, ngamma, gtn_denoms, gtn_nums, gtn_delays, start2poleHP, hp_denoms, hp_nums, expnsn_ratios, eq_loud_db, recombination_dB, ): coch_sig = coch_sig[0, :] start2poleHP = int(start2poleHP) coch_sig_out = MSBG.gammatone_filterbank( coch_sig, ngamma, gtn_denoms, gtn_nums, gtn_delays, start2poleHP, hp_denoms, hp_nums, ) envelope = MSBG.compute_envelope(coch_sig_out, erbn_cf, fs) coch_sig_out = MSBG.recruitment( coch_sig_out, envelope, SPL_equiv_0dB, expnsn_ratios, eq_loud_db, ) coch_sig_out = np.sum(coch_sig_out, axis=0) * np.power( 10, (-0.05 * recombination_dB) ) return coch_sig_out
def test_generate_key_percent(filename, regtest): filename = os.path.dirname(os.path.abspath(__file__)) + "/data/" + filename data = scipy.io.loadmat(filename, squeeze_me=True) key, used_thr_dB = MSBG.generate_key_percent( data["sig"], data["thr_dB"], data["winlen"] ) npt.assert_allclose(key, data["key"] - 1, rtol=RTOL) npt.assert_allclose(used_thr_dB, data["used_thr_dB"], rtol=RTOL) print(utils.hash_numpy(key), file=regtest) print(used_thr_dB, file=regtest)
def test_gen_eh2008_speech_noise(filename): np.random.seed(0) filename = os.path.dirname(os.path.abspath(__file__)) + "/data/" + filename data = scipy.io.loadmat(filename, squeeze_me=True) eh2008_nse = MSBG.gen_eh2008_speech_noise(data["durn"], data["fs"]) assert len(eh2008_nse) == len(data["eh2008_nse"]) sdr = utils.compute_siSDR(eh2008_nse, data["eh2008_nse"]) assert sdr > MIN_SDR npt.assert_allclose(eh2008_nse, data["eh2008_nse"], atol=ATOL, rtol=RTOL)
def test_measure_rms(filename, regtest): filename = os.path.dirname(os.path.abspath(__file__)) + "/data/" + filename data = scipy.io.loadmat(filename, squeeze_me=True) rms, key, rel_dB_thresh, active = MSBG.measure_rms(data["signal"], data["fs"], data["dB_rel_rms"]) npt.assert_allclose(rms, data["rms"], rtol=RTOL) npt.assert_allclose(key, data["key"] - 1, rtol=RTOL) # index so subtract 1 OK npt.assert_allclose(rel_dB_thresh, data["rel_dB_thresh"], rtol=RTOL) print(rms, file=regtest) print(utils.hash_numpy(key), file=regtest) print(rel_dB_thresh, file=regtest)
def test_gen_eh2008_speech_noise_supplied_b(filename, regtest): np.random.seed(0) filename = os.path.dirname(os.path.abspath(__file__)) + "/data/" + filename data = scipy.io.loadmat(filename, squeeze_me=True) # Use filter computed by MATLAB. eh2008_nse = MSBG.gen_eh2008_speech_noise(data["durn"], data["fs"], supplied_b=data["b"]) assert len(eh2008_nse) == len(data["eh2008_nse"]) sdr = utils.compute_siSDR(eh2008_nse, data["eh2008_nse"]) assert sdr > MIN_SDR npt.assert_allclose(eh2008_nse, data["eh2008_nse"], atol=ATOL, rtol=RTOL) print(utils.hash_numpy(eh2008_nse), file=regtest)
def __init__(self, src_pos, audiogram): """Constructor for the Ear class. Args: src_pos (str): Position of the source audiogram (Audiogram): The ear's audiogram """ self.audiogram = audiogram self.cochlea = MSBG.Cochlea(self.audiogram) self.calibration_signal = None if np.max(audiogram.levels[audiogram.levels != None]) > 80: logging.warning( "Impairment too severe: Suggest you limit audiogram max to 80-90 dB HL, \ otherwise things go wrong/unrealistic.") self.src_correction = Ear.get_src_correction(src_pos)
def measure_rms(signal, fs, dB_rel_rms, percent_to_track=None): """Measure rms. A sophisticated method of measuring RMS in a file. It splits the signal up into short windows, performs a histogram of levels, calculates an approximate RMS, and then uses that RMS to calculate a threshold level in the histogram and then re-measures the RMS only using those durations whose individual RMS exceed that threshold. Args: signal (ndarray): the signal of which to measure the rms fs (float): sampling frequency dB_rel_rms (float): threshold for frames to track percent_to_track (float, optional): track percentage of frames, rather than threshold (default: {None}) Returns: (tuple): tuple containing - rms (float): overall calculated rms (linear) - key (ndarray): "key" array of indices of samples used in rms calculation - rel_dB_thresh (float): fixed threshold value of -12 dB - active (float): proportion of values used in rms calculation """ fs = int(fs) # first RMS is of all signal. first_stage_rms = np.sqrt(np.sum(np.power(signal, 2) / len(signal))) # use this RMS to generate key threshold to get more accurate RMS key_thr_dB = max(20 * np.log10(first_stage_rms) + dB_rel_rms, -80) # move key_thr_dB to account for noise less peakier than signal key, used_thr_dB = MSBG.generate_key_percent( signal, key_thr_dB, round(WIN_SECS * fs), percent_to_track=percent_to_track) idx = key.astype(int) # move into generate_key_percent # statistic to be reported later, BUT save for later # (for independent==1 loop where it sets a target for rms measure) active = 100 * len(key) / len(signal) rms = np.sqrt(np.sum(np.power(signal[idx], 2)) / len(key)) rel_dB_thresh = used_thr_dB - 20 * np.log10(rms) return rms, idx, rel_dB_thresh, active
def process( self, chans, add_calibration=False, ): """Run the hearing loss simulation. Args: chans (ndarray): signal to process, shape either N, Nx1, Nx2 add_calibration (bool): prepend calibration tone and speech-shaped noise (default: False) Returns: ndarray: the processed signal """ fs = 44100 # This is the only sampling frequency that can be used if fs != CONFIG.fs: logging.error( "Warning: only a sampling frequency of 44.1kHz can be used by MSBG." ) logging.info(f"Processing {len(chans)} samples") # Get single channel array and convert to list chans = Ear.array_to_list(chans) # Need to know file RMS, and then call that a certain level in SPL: # needs some form of pre-measuring. levelreFS = 10 * np.log10(np.mean(np.array(chans)**2)) equiv_0dB_SPL = CONFIG.equiv0dBSPL + CONFIG.ahr leveldBSPL = equiv_0dB_SPL + levelreFS CALIB_DB_SPL = leveldBSPL TARGET_SPL = leveldBSPL REF_RMS_DB = CALIB_DB_SPL - equiv_0dB_SPL # Measure RMS where 3rd arg is dB_rel_rms (how far below) calculated_rms, idx, rel_dB_thresh, active = MSBG.measure_rms( chans[0], fs, -12) # Rescale input data and check level after rescaling # This is to ensure that the following processing steps are applied correctly change_dB = TARGET_SPL - (equiv_0dB_SPL + 20 * np.log10(calculated_rms)) chans = [x * np.power(10, 0.05 * change_dB) for x in chans] new_rms_db = equiv_0dB_SPL + 10 * np.log10( np.mean(np.power(chans[0][idx], 2.0))) logging.info( f"Rescaling: leveldBSPL was {leveldBSPL:3.1f} dB SPL, now {new_rms_db:3.1f}dB SPL. Target SPL is {TARGET_SPL:3.1f} dB SPL." ) # Add calibration signal at target SPL dB if add_calibration == True: if self.calibration_signal is None: self.calibration_signal = Ear.make_calibration_signal( REF_RMS_DB) chans = [ np.concatenate((self.calibration_signal[0], x, self.calibration_signal[1])) for x in chans ] # Transform from src pos to cochlea, simulate cochlea, transform back to src pos chans = [ Ear.src_to_cochlea_filt(x, self.src_correction, fs) for x in chans ] chans = [self.cochlea.simulate(x, equiv_0dB_SPL) for x in chans] chans = [ Ear.src_to_cochlea_filt(x, self.src_correction, fs, backward=True) for x in chans ] # Implement low-pass filter at top end of audio range: flat to Cutoff freq, tails # below -80 dB. Suitable lpf for signals later converted to MP3, flat to 15 kHz. # Small window to design low-pass FIR, to cut off high freq processing noise # low-pass to something sensible, prevents exaggeration of > 15 kHz winlen = 2 * math.floor(0.0015 * fs) + 1 lpf44d1 = firwin(winlen, UPPER_CUTOFF_HZ / int(fs / 2), window=("kaiser", 8)) chans = [lfilter(lpf44d1, 1, x) for x in chans] return chans
def run_HL_processing(scene, listener, input_path, output_path, fs): """Run baseline HL processing. Applies the MSBG model of hearing loss. Args: scene (dict): dictionary defining the scene to be generated listener (dict): dictionary containing listener data input_path (str): path to the input data output_path (str): path to the output data fs (float): sampling rate """ logging.debug(f"Running HL processing: Listener {listener['name']}") logging.debug("Listener data") logging.debug(listener["name"]) # Get audiogram centre frequencies cfs = np.array(listener["audiogram_cfs"]) # Read HA output and mixed signals signal = read_signal( f"{input_path}/{scene['scene']}_{listener['name']}_HA-output.wav") mixture_signal = read_signal( f"{input_path}/{scene['scene']}_mixed_CH0.wav") # Create discrete delta function (DDF) signal for time alignment ddf_signal = np.zeros((np.shape(signal))) ddf_signal[:, 0] = unit_impulse(len(signal), int(fs / 2)) ddf_signal[:, 1] = unit_impulse(len(signal), int(fs / 2)) # Get flat-0dB ear audiograms flat0dB_audiogram = MSBG.Audiogram(cfs=cfs, levels=np.zeros((np.shape(cfs)))) flat0dB_ear = MSBG.Ear(audiogram=flat0dB_audiogram, src_pos="ff") # For flat-0dB audiograms, process the signal with each ear in the list of ears flat0dB_HL_outputs = listen(signal, [flat0dB_ear, flat0dB_ear]) # Get listener audiograms and build a pair of ears audiogram_left = np.array(listener["audiogram_levels_l"]) left_audiogram = MSBG.Audiogram(cfs=cfs, levels=audiogram_left) audiogram_right = np.array(listener["audiogram_levels_r"]) right_audiogram = MSBG.Audiogram(cfs=cfs, levels=audiogram_right) audiograms = [left_audiogram, right_audiogram] ears = [ MSBG.Ear(audiogram=audiogram, src_pos="ff") for audiogram in audiograms ] # Process the HA output signal, the raw mixed signal, and the ddf signal outputs = listen(signal, ears) mixture_outputs = listen(mixture_signal, ears) ddf_outputs = listen(ddf_signal, ears) # Write the outputs outfile_stem = f"{output_path}/{scene['scene']}_{listener['name']}" signals_to_write = [ ( flat0dB_HL_outputs, f"{output_path}/{scene['scene']}_flat0dB_HL-output.wav", ), (outputs, f"{outfile_stem}_HL-output.wav"), (ddf_outputs, f"{outfile_stem}_HLddf-output.wav"), (mixture_outputs, f"{outfile_stem}_HL-mixoutput.wav"), ] for signal, filename in signals_to_write: write_signal(filename, signal, CONFIG.fs, floating_point=True)
def test_smear3(filename, regtest): filename = os.path.dirname(os.path.abspath(__file__)) + "/data/" + filename data = scipy.io.loadmat(filename, squeeze_me=True) outbuffer = MSBG.smear3(data["fsmear"], data["inbuffer"]) npt.assert_allclose(outbuffer, data["outbuffer"], rtol=RTOL) print(utils.hash_numpy(outbuffer), file=regtest)