def main(): wave_path = "/Users/Tom/Documents/Work/PhD/HTK-AIM/Sounds/" file_name = "ii/ii172.5p112.5s100.0t+000itd" wave_suffix = ".wav" frame_period_ms = 10; wave_filename = wave_path + file_name + wave_suffix (sample_rate, input_wave) = wavfile.read(wave_filename) wave_length = input_wave.size buffer_length = int(frame_period_ms * sample_rate / 1000) input_sig = aimc.SignalBank() input_sig.Initialize(1, buffer_length, sample_rate) parameters = aimc.Parameters() parameters.SetInt("input.buffersize", 480) mod_gt = aimc.ModuleGammatone(parameters) mod_hl = aimc.ModuleHCL(parameters) mod_strobes = aimc.ModuleLocalMax(parameters) mod_gt.AddTarget(mod_hl) mod_hl.AddTarget(mod_strobes) mod_gt.Initialize(input_sig) correct_count = 0; incorrect_count = 0; scaled_wave = [] for sample in input_wave: scaled_wave.append(float(sample / float(pow(2,15) - 1))) i = 0 wave_chunks = grouper(buffer_length, scaled_wave, 0) out_nap = [] out_strobes = [] for chunk in wave_chunks: i = 0 for sample in chunk: input_sig.set_sample(0, i, float(sample)) i += 1 mod_gt.Process(input_sig) out_nap.append(BankToArray(mod_hl.GetOutputBank())) out_strobes.append(StrobesToList(mod_strobes.GetOutputBank())) outmat = dict(nap=out_nap, strobes=out_strobes) io.savemat("src/Scripts/strobes_out.mat", outmat, oned_as='column') pass
def main(): data_file = "src/Modules/Features/testdata/aa153.0p108.1s100.0t+000itd.mat" data = io.loadmat(data_file) # The margin of error allowed between the returned values from AIM-C and # the stored MATLAB values. epsilon = 0.00001 given_profiles = data["Templates"] matlab_features = data["feature"] (profile_count, channel_count) = given_profiles.shape profile_sig = aimc.SignalBank() profile_sig.Initialize(channel_count, 1, 44100) parameters = aimc.Parameters() mod_gauss = aimc.ModuleGaussians(parameters) mod_gauss.Initialize(profile_sig) correct_count = 0 incorrect_count = 0 for p in range(0, profile_count): profile = given_profiles[p] features = matlab_features[p] for i in range(0, channel_count): profile_sig.set_sample(i, 0, profile[i]) mod_gauss.Process(profile_sig) out_sig = mod_gauss.GetOutputBank() error = False for j in range(0, out_sig.channel_count()): if (abs(out_sig.sample(j, 0) - features[j]) > epsilon): error = True incorrect_count += 1 else: correct_count += 1 if error: print("Mismatch at profile %d" % (p)) print("AIM-C values: %f %f %f %f" % (out_sig.sample(0, 0), out_sig.sample( 1, 0), out_sig.sample(2, 0), out_sig.sample(3, 0))) print("MATLAB values: %f %f %f %f" % (features[0], features[1], features[2], features[3])) print("") percent_correct = 100 * correct_count / (correct_count + incorrect_count) print("Total correct: %f percent" % (percent_correct)) if percent_correct == 100: print("=== TEST PASSED ===") else: print("=== TEST FAILED! ===") pass
def main(): data_file = "src/Modules/BMM/testdata/gammatone.mat" data = io.loadmat(data_file) # The margin of error allowed between the returned values from AIM-C and # the stored MATLAB values. epsilon = 0.000001 input_wave = data["input_wave"] sample_rate = data["sample_rate"] centre_frequencies = data["centre_frequencies"] expected_output = data["expected_output"] (channel_count, frame_count) = expected_output.shape buffer_length = 20000 input_sig = aimc.SignalBank() input_sig.Initialize(1, buffer_length, 48000) parameters = aimc.Parameters() parameters.Load("src/Modules/BMM/testdata/gammatone.cfg") mod_gt = aimc.ModuleGammatone(parameters) mod_gt.Initialize(input_sig) correct_count = 0 incorrect_count = 0 out = scipy.zeros((channel_count, buffer_length)) cfs = scipy.zeros((channel_count)) for i in range(0, buffer_length): input_sig.set_sample(0, i, input_wave[i][0]) mod_gt.Process(input_sig) out_sig = mod_gt.GetOutputBank() for ch in range(0, out_sig.channel_count()): cfs[ch] = out_sig.centre_frequency(ch) for i in range(0, buffer_length): out[ch, i] = out_sig.sample(ch, i) outmat = dict(filterbank_out=out, centre_frequencies_out=cfs) io.savemat("src/Modules/BMM/testdata/out_v2.mat", outmat) pass
def main(): wave_path = "/Users/Tom/Documents/Work/PhD/HTK-AIM/Sounds/" features_path = "/Users/Tom/Documents/Work/PhD/HTK-AIM/work08-jess-original-rec_rubber/features/" file_name = "aa/aa161.1p119.4s100.0t+000itd" wave_suffix = ".wav" features_suffix = ".mat" frame_period_ms = 10; wave_filename = wave_path + file_name + wave_suffix features_filename = features_path + file_name + features_suffix (sample_rate, input_wave) = wavfile.read(wave_filename) wave_length = input_wave.size buffer_length = int(frame_period_ms * sample_rate / 1000) #pylab.plot(input_wave) #pylab.show() input_sig = aimc.SignalBank() input_sig.Initialize(1, buffer_length, sample_rate) parameters = aimc.Parameters() parameters.Load("src/Scripts/profile_features.cfg") mod_gt = aimc.ModuleGammatone(parameters) mod_hl = aimc.ModuleHCL(parameters) mod_profile = aimc.ModuleSlice(parameters) mod_scaler = aimc.ModuleScaler(parameters) mod_gt.AddTarget(mod_hl) mod_hl.AddTarget(mod_profile) mod_profile.AddTarget(mod_scaler) mod_gt.Initialize(input_sig) correct_count = 0; incorrect_count = 0; scaled_wave = [] for sample in input_wave: scaled_wave.append(float(sample / float(pow(2,15) - 1))) i = 0 wave_chunks = grouper(buffer_length, scaled_wave, 0) out_frames = [] for chunk in wave_chunks: i = 0 for sample in chunk: input_sig.set_sample(0, i, float(sample)) i += 1 mod_gt.Process(input_sig) out_sig = mod_scaler.GetOutputBank() channel_count = out_sig.channel_count() out_buffer_length = out_sig.buffer_length() cfs = scipy.zeros((channel_count)) out = scipy.zeros((channel_count, out_buffer_length)) for ch in range(0, channel_count): for i in range(0, out_buffer_length): out[ch, i] = out_sig.sample(ch, i) out_frames.append(out) outmat = dict(profile_out=out_frames) io.savemat("src/Scripts/profile_out.mat", outmat) pass
#zero_pad = numpy.zeros((buffer_length-(nSamples%buffer_length),nChannels)).astype('float') #x = numpy.vstack((x,zero_pad)) #OR #Drop last incomplete frame (this is what happens in the C++ code) nFrames = x.shape[0]/buffer_length x = x[:nFrames*buffer_length] assert(x.shape[0]%buffer_length == 0) sig = aimc.SignalBank() sig.Initialize(nChannels,buffer_length,sr) pzfc = aimc.ModulePZFC(aimc.Parameters()) hcl = aimc.ModuleHCL(aimc.Parameters()) local_max = aimc.ModuleLocalMax(aimc.Parameters()) sai = aimc.ModuleSAI(aimc.Parameters()) pzfc.AddTarget(hcl) hcl.AddTarget(local_max) local_max.AddTarget(sai) global_params = aimc.Parameters() pzfc.Initialize(sig,global_params) output_list = [] strobe_list = [] centre_freq_list=[]
#!/usr/bin/env python # Copyright 2010, Thomas Walters # # AIM-C: A C++ implementation of the Auditory Image Model # http://www.acousticscale.org/AIMC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import aimc module_params = aimc.Parameters() global_params = aimc.Parameters() mod_gauss = aimc.ModuleGaussians(module_params) sig = aimc.SignalBank() sig.Initialize(115, 1, 44100) mod_gauss.Initialize(sig, global_params) mod_gauss.Process(sig)
def main(): wave_path = "/Users/Tom/Documents/Work/PhD/HTK-AIM/Sounds/" #features_path = "/Users/Tom/Documents/Work/PhD/HTK-AIM/work08-jess-original-rec_rubber/features/" file_name = "ii/ii172.5p112.5s100.0t+000itd" wave_suffix = ".wav" features_suffix = ".mat" frame_period_ms = 10 wave_filename = wave_path + file_name + wave_suffix #features_filename = features_path + file_name + features_suffix (sample_rate, input_wave) = wavfile.read(wave_filename) wave_length = input_wave.size buffer_length = int(frame_period_ms * sample_rate / 1000) #pylab.plot(input_wave) #pylab.show() input_sig = aimc.SignalBank() input_sig.Initialize(1, buffer_length, sample_rate) parameters = aimc.Parameters() parameters.SetFloat("sai.frame_period_ms", 10.0) parameters.SetInt("input.buffersize", 480) mod_gt = aimc.ModuleGammatone(parameters) mod_hl = aimc.ModuleHCL(parameters) mod_strobes = aimc.ModuleLocalMax(parameters) mod_sai = aimc.ModuleSAI(parameters) parameters.SetBool("ssi.pitch_cutoff", True) parameters.SetBool("ssi.weight_by_cutoff", False) parameters.SetBool("ssi.weight_by_scaling", True) parameters.SetBool("ssi.log_cycles_axis", True) mod_ssi = aimc.ModuleSSI(parameters) parameters.SetFloat("nap.lowpass_cutoff", 100.0) mod_nap_smooth = aimc.ModuleHCL(parameters) mod_scaler = aimc.ModuleScaler(parameters) parameters.SetBool("slice.all", False) parameters.SetInt("slice.lower_index", 77) parameters.SetInt("slice.upper_index", 150) slice_1 = aimc.ModuleSlice(parameters) parameters.SetInt("slice.lower_index", 210) parameters.SetInt("slice.upper_index", 240) slice_2 = aimc.ModuleSlice(parameters) parameters.SetInt("slice.lower_index", 280) parameters.SetInt("slice.upper_index", 304) slice_3 = aimc.ModuleSlice(parameters) parameters.SetInt("slice.lower_index", 328) parameters.SetInt("slice.upper_index", 352) slice_4 = aimc.ModuleSlice(parameters) parameters.SetBool("slice.all", True) slice_5 = aimc.ModuleSlice(parameters) nap_profile = aimc.ModuleSlice(parameters) features_1 = aimc.ModuleGaussians(parameters) features_2 = aimc.ModuleGaussians(parameters) features_3 = aimc.ModuleGaussians(parameters) features_4 = aimc.ModuleGaussians(parameters) features_5 = aimc.ModuleGaussians(parameters) mod_gt.AddTarget(mod_hl) mod_gt.AddTarget(mod_nap_smooth) mod_nap_smooth.AddTarget(nap_profile) nap_profile.AddTarget(mod_scaler) mod_hl.AddTarget(mod_strobes) mod_strobes.AddTarget(mod_sai) mod_sai.AddTarget(mod_ssi) mod_ssi.AddTarget(slice_1) mod_ssi.AddTarget(slice_2) mod_ssi.AddTarget(slice_3) mod_ssi.AddTarget(slice_4) mod_ssi.AddTarget(slice_5) slice_1.AddTarget(features_1) slice_2.AddTarget(features_2) slice_3.AddTarget(features_3) slice_4.AddTarget(features_4) slice_5.AddTarget(features_5) mod_gt.Initialize(input_sig) correct_count = 0 incorrect_count = 0 scaled_wave = [] for sample in input_wave: scaled_wave.append(float(sample / float(pow(2, 15) - 1))) i = 0 wave_chunks = grouper(buffer_length, scaled_wave, 0) out_bmm = [] out_nap = [] out_smooth_nap_profile = [] out_strobes = [] out_sais = [] out_ssis = [] out_slice_1 = [] out_slice_2 = [] out_slice_3 = [] out_slice_4 = [] out_slice_5 = [] out_feat_1 = [] out_feat_2 = [] out_feat_3 = [] out_feat_4 = [] out_feat_5 = [] for chunk in wave_chunks: i = 0 for sample in chunk: input_sig.set_sample(0, i, float(sample)) i += 1 mod_gt.Process(input_sig) #out_bmm.append(BankToArray(mod_gt.GetOutputBank())) #out_nap.append(BankToArray(mod_hl.GetOutputBank())) out_smooth_nap_profile.append(BankToArray(mod_scaler.GetOutputBank())) #out_strobes.append(BankToArray(mod_strobes.GetOutputBank())) #out_sais.append(BankToArray(mod_sai.GetOutputBank())) out_ssis.append(BankToArray(mod_ssi.GetOutputBank())) out_slice_1.append(BankToArray(slice_1.GetOutputBank())) out_slice_2.append(BankToArray(slice_2.GetOutputBank())) out_slice_3.append(BankToArray(slice_3.GetOutputBank())) out_slice_4.append(BankToArray(slice_4.GetOutputBank())) out_slice_5.append(BankToArray(slice_5.GetOutputBank())) out_feat_1.append(BankToArray(features_1.GetOutputBank())) out_feat_2.append(BankToArray(features_2.GetOutputBank())) out_feat_3.append(BankToArray(features_3.GetOutputBank())) out_feat_4.append(BankToArray(features_4.GetOutputBank())) out_feat_5.append(BankToArray(features_5.GetOutputBank())) out_bank = mod_gt.GetOutputBank() channel_count = out_bank.channel_count() cfs = scipy.zeros((channel_count)) for ch in range(0, channel_count): cfs[ch] = out_bank.centre_frequency(ch) outmat = dict(bmm=out_bmm, nap=out_nap, sais=out_sais, ssis=out_ssis, slice1=out_slice_1, slice2=out_slice_2, slice3=out_slice_3, slice4=out_slice_4, slice5=out_slice_5, feat1=out_feat_1, feat2=out_feat_2, feat3=out_feat_3, feat4=out_feat_4, feat5=out_feat_5, nap_smooth=out_smooth_nap_profile, centre_freqs=cfs) io.savemat("src/Scripts/profile_out.mat", outmat, oned_as='column') pass