def test_local_files(): input_signal = wavefile.load(filename="./audio_dataset/test/hi_hat/ALCHH36.WAV") impulse_response = wavefile.load(filename="./impulse_responses/spaceEchoIR.wav") # already floating point bytearray second_IR = wavefile.load(filename="./impulse_responses/echo2IR.wav") (left, right) = input_signal[1] output_signal = signal.fftconvolve(input_signal[1][0], impulse_response[1][0]) output_signal2 = signal.fftconvolve(output_signal, second_IR[1][0]) wavfile.write("./audio_dataset/convolved_hihat.wav", 44100, utility.float2pcm(output_signal2))
def test_save(self) : samplerate = 44100 data = self.fourSinusoids(samples=400) wavefile.save("file.wav", data, samplerate=samplerate) readsamplerate, readdata = wavefile.load("file.wav") np_assert_almost_equal(readdata, data, decimal=7) self.assertEqual(readsamplerate, samplerate)
def test_save(self): samplerate = 44100 data = self.fourSinusoids(samples=400) wavefile.save("file.wav", data, samplerate=samplerate) readsamplerate, readdata = wavefile.load("file.wav") np_assert_almost_equal(readdata, data, decimal=7) self.assertEqual(readsamplerate, samplerate)
def test_save_asCOrder(self) : samplerate = 44100 data = self.fourSinusoids(samples=400) data = np.ascontiguousarray(data) wavefile.save("file.wav", data, samplerate=samplerate) readsamplerate, readdata = wavefile.load("file.wav") np_assert_almost_equal(readdata, data, decimal=7) self.assertEqual(readsamplerate, samplerate)
def test_save_asCOrder(self): samplerate = 44100 data = self.fourSinusoids(samples=400) data = np.ascontiguousarray(data) wavefile.save("file.wav", data, samplerate=samplerate) readsamplerate, readdata = wavefile.load("file.wav") np_assert_almost_equal(readdata, data, decimal=7) self.assertEqual(readsamplerate, samplerate)
def __init__(self, filename: str): wf = wavefile.load(filename=filename) self.signal_vector = wf[1] if self.signal_vector.shape[0] == 2: self.left = self.signal_vector[0] self.right = self.signal_vector[1] elif self.signal_vector.shape[0] == 1: self.mono = self.signal_vector[0]
def reduce_noise(path): file = wavefile.load(path) samplerate = file[0] data = file[1][0] nr_data = nr.reduce_noise(audio_clip=np.array(data), noise_clip=np.array(data[samplerate:2 * samplerate]), verbose=False) sf.write(path, np.array(list(np.float_(nr_data))), samplerate) return
def getMinMaxAmpl(filename): w = wavefile.load(filename) signal = w[1][0] frames = str(len(signal))+" frames" minAmpl = str(min(abs(signal))*100) maxAmpl = str(max(abs(signal))*100) res = [] res.append(minAmpl) res.append(maxAmpl) return res
def assertLoadWav(self, filename, expectedData=None, expectedSamplerate=44100, expectedShape=None): samplerate, data = wavefile.load("file.wav") if expectedShape is not None: self.assertEqual(data.shape, expectedShape) if expectedData is not None: np_assert_almost_equal(expectedData, data, decimal=7) self.assertEqual(expectedSamplerate, samplerate)
def take_action(self, parsed_args): self.conf = config.load_config() self.tsl = os.path.abspath(parsed_args.tsl) if not os.path.exists(self.tsl) or not os.path.isfile(self.tsl): raise Exception('TSL file not found: %s' % self.tsl) self.tsl_name = os.path.splitext(os.path.basename(self.tsl))[0] self.no_record = parsed_args.no_record if not parsed_args.no_record: if parsed_args.dest: dest = parsed_args.dest else: dest = '%s.d' % parsed_args.tsl self.dest = self.prep_dest(dest) self.liveset = tsl.load_tsl_from_file(parsed_args.tsl, self.conf) sr, d = wavefile.load(parsed_args.sample) self.sample_rate = sr self.play_data = d.T self.session = io.Session(self.conf, fake=parsed_args.no_send) self.audition()
def reduce_noise(song_file): # use wavefile module to convert wav from int to float w = wv.load(song_file) data = w[1][0] # select section of data that is noise noisy_part = data[1500:2000] # perform noise reduction reduced_noise = nr.reduce_noise(audio_clip=data, noise_clip=noisy_part, n_std_thresh=1.5, prop_decrease=1, verbose=False) samplerate = 44100 write("reduced_noise_file.wav", samplerate, data) # remember to delete this file after return
def analyzeHarmonicRatios(grain): maxPermissableFreq = 4409 #Maximum to get 4 harmonics numHarmonics = 4 w = wavefile.load(grain["file"]) data = w[1][0] s = source(grain["file"], w[0], len(data)) samplerate = s.samplerate # Compute the fundamental using the "yin" algorithm pitch_o = pitch("yin", len(data), len(data), samplerate) samples, read = s() fundamental = pitch_o(samples)[0] if (fundamental > maxPermissableFreq): return None # Get the periodogram to get energies at harmonics data = data * numpy.hanning(len(data)) f, Pxx_den = signal.periodogram(data, w[0]) Pxx_den = 10 * numpy.log10(Pxx_den) # Set the current harmonic to be twice the fundamental fundEnergy = Pxx_den[freqToBin(f, fundamental)] curHarm = fundamental * 2 curHarmCount = 0 ratios = [] while (curHarmCount < numHarmonics): ratio = fundEnergy / Pxx_den[freqToBin(f, curHarm)] #Do not allow infinites, probably caused by 0 energy if math.isnan(ratio) or math.isinf(ratio): print("Ratio " + str(curHarmCount) + " is " + str(ratio)) return None ratios.append(fundEnergy / Pxx_den[freqToBin(f, curHarm)]) curHarm += fundamental curHarmCount += 1 return ratios
def analyzeHarmonicRatios(grain): maxPermissableFreq = 4409 #Maximum to get 4 harmonics numHarmonics = 4 w = wavefile.load(grain["file"]) data = w[1][0] s = source(grain["file"], w[0], len(data)) samplerate = s.samplerate # Compute the fundamental using the "yin" algorithm pitch_o = pitch("yin", len(data), len(data), samplerate) samples, read = s() fundamental = pitch_o(samples)[0] if (fundamental > maxPermissableFreq): return None # Get the periodogram to get energies at harmonics data = data * numpy.hanning(len(data)) f, Pxx_den = signal.periodogram(data, w[0]) Pxx_den = 10 * numpy.log10(Pxx_den) # Set the current harmonic to be twice the fundamental fundEnergy = Pxx_den[freqToBin(f, fundamental)] curHarm = fundamental * 2 curHarmCount = 0 ratios = [] while(curHarmCount < numHarmonics): ratio = fundEnergy / Pxx_den[freqToBin(f, curHarm)] #Do not allow infinites, probably caused by 0 energy if math.isnan(ratio) or math.isinf(ratio): print("Ratio " + str(curHarmCount) + " is " + str(ratio)) return None ratios.append(fundEnergy / Pxx_den[freqToBin(f, curHarm)]) curHarm += fundamental curHarmCount += 1 return ratios
def normalization_factor(self) -> float: return self.step_response().max() def as_convolved_filter(self): # RENAME return self.byte_array def raw_signal_channels(self): return [self.byte_array] @convolve.register def _alias0(signal1: numpy.ndarray, signal2: numpy.ndarray) -> numpy.ndarray: return signal.fftconvolve(signal1, signal2) @convolve.register def _alias3(filter1: MonoFilter, filter: numpy.ndarray) -> numpy.ndarray: return convolve(filter1.byte_array, filter) input_signal = wavefile.load(filename="./audio_dataset/test/hi_hat/ALCHH36.WAV") impulse_response = wavefile.load(filename="./impulse_responses/spaceEchoIR.wav") # already floating point bytearray second_IR = wavefile.load(filename="./impulse_responses/echo2IR.wav") (left) = input_signal[1] wv = wavefile.load("/users/usuario/Desktop/bad.wav") (trackleft, trackright) = wv[1] (irleft) = impulse_response[1] mf = MonoFilter(irleft) # convolve(trackleft, mf) # convolve(mf, trackleft) [irrealleft] = irleft convolve(trackleft, irrealleft) convolve(MonoFilter(irrealleft), trackleft) convolve(trackleft, MonoFilter(irrealleft)) mf = MonoFilter(irrealleft)
help="Model file.") args = parser.parse_args() # =============================================== # Feature extraction # =============================================== # Get the type of the signal file file_name = args.signal.split("/")[-1] file_format = file_name.split(".")[1] # Load signal - for now, only works with wav or numpy files if file_format == "npy": signal = np.load(args.signal) else: (rate, sig) = wavefile.load(args.signal) signal = sig[0] # Frame and compute MFCCs S = np.transpose( frame(signal, int(args.frame_len * 16), int(args.hop_len * 16))) # For now, only 16kHz sampling rate can be used X = list(map(lambda s: feature_extractor(s, 16000), S)) X = np.array(np.swapaxes(X, 1, 2)) X = X.astype( np.float16 ) # Compression to save memory, 16-bit MFCCs have also been used in the training of the current_best.h5 num_timesteps = X.shape[1] # ===============================================
def wav_to_floats(filename): w = wavefile.load(filename) return w[1][0]
import numpy as np # Lets setup some synthesis audio: def sinusoid(samples, f, samplerate=44100): return np.sin(np.linspace(0, 2*np.pi*f*samples/samplerate, samples))[:,np.newaxis] def channels(*args): return np.hstack(args).T audio = channels( sinusoid(100000, 440), sinusoid(100000, 880), sinusoid(100000, 1760), ) # This is how you save it wavefile.save("sinusoid.wav", audio, 44100) # And this is how you load it again loadedsamplerate, loaded = wavefile.load("sinusoid.wav") print("Loaded audio has shape", loaded.shape) channel1, channel2, channel3 = loaded
def getSignal(utterance): samplerate, signal = wavefile.load(utterance) print(signal) signal = signal[0] #print(utterance, 'dtype:', signal.dtype, 'min:', min(signal), 'max:', max(signal), 'samplerate:', samplerate) return signal, samplerate
def test_load(self): data = self.fourSinusoids(samples=400) self.writeWav("file.wav", data) readsamplerate, readdata = wavefile.load("file.wav") np_assert_almost_equal(readdata, data, decimal=7) self.assertEqual(readsamplerate, 44100)
def transcript_label_generator(audio_file, paths): # Audio os.chdir(paths[0]) (rate, sig) = wavefile.load(audio_file) # Words to be excluded bad_words = [[], ["uh", "huh", "uh-huh", "uh_huh"]] bad_commas = [None, ".", ",", "?"] tc = np.zeros((len(sig[0]), 2)) for j in np.arange(2): os.chdir(paths[j + 1]) audio_id = audio_file.split(".")[0] tc_files = glob.glob(audio_id + "*") for i, file in enumerate(tc_files): tree = ET.parse(file) root = tree.getroot() # Speaker indexing speaker = i + 1 for child in root: v = child.attrib word = child.text # Determine if word is excluded excword = exclude_word(v, word, bad_commas + bad_words[j]) if excword == True: continue else: word = excword[0] start = excword[1] end = excword[2] # Mark indices with overlap temp_sig = tc[start:end, j] ol_indices = np.where(temp_sig != 0)[0] + start tc[ol_indices, j] = -1 # Individual speaker indices is_indices = np.where(temp_sig == 0)[0] + start tc[is_indices, j] = speaker sig = sig[0] # Initialize final transcriptions vad_tc = np.zeros((len(sig))) # Intersection of segments with one speaker os_indices_words = np.where(tc[:, 0] > 0)[0] os_indices_ASR = np.where(tc[:, 1] > 0)[0] os_indices = np.intersect1d(os_indices_words, os_indices_ASR) # Intersection of segments with multiple speakers ms_indices_words = np.where(tc[:, 0] == -1)[0] ms_indices_ASR = np.where(tc[:, 1] == -1)[0] ms_indices = np.intersect1d(ms_indices_words, ms_indices_ASR) # Concatenation + VAD vad_tc[os_indices] = tc[os_indices, 0] vad_tc[ms_indices] = tc[ms_indices, 0] vad_indices = np.where(vad_tc != 0)[0] vad_tc = vad_tc[vad_indices] sig = sig[vad_indices] transcript = vad_tc return sig, transcript
"5539381671692122744.mp4": [], "5542003749222140011.mp4": [], "5544574287152993687.mp4": [], "5544620672795594434.mp4": [], "5547193787702629969.mp4": [], "5549784941472309008.mp4": [], "5552368364300855101.mp4": [], "5555325449284154780.mp4": [], "5555360238519252381.mp4": [] } datapath = f"./data/{d}" wavs = [f.name for f in os.scandir(datapath) if f.name.endswith(".wav")] wavs.sort() for wavfile in wavs: print(f"Diarizing file {wavfile} now.") (rate, sig) = wavefile.load(f"{datapath}/{wavfile}") signal = sig[0] S = np.transpose(frame(signal, int(2000 * 16), int(500 * 16))) X = list(map(lambda s: fe(s, 16000), S)) X = np.array(np.swapaxes(X, 1, 2)) X = X.astype(np.float16) num_timesteps = X.shape[1] if num_timesteps != 201: emb_model.layers.pop(0) new_input = Input(batch_shape=(None, num_timesteps, 30)) new_output = emb_model(new_input) emb_model = Model(new_input, new_output) embs = emb_model.predict(X) try:
def test_load(self) : data = self.fourSinusoids(samples=400) self.writeWav("file.wav", data) readsamplerate, readdata = wavefile.load("file.wav") np_assert_almost_equal(readdata, data, decimal=7) self.assertEqual(readsamplerate, 44100)
maximum_length = 800000 # this is about the maximum length labels = {"snare": 1, "kick": 2, "hi_hat": 3} # load training data training_labels = [] training_values = [] file_directory = './audio_dataset/train/hi_hat' file_list = [ f for f in os.listdir(file_directory) if os.path.isfile(os.path.join(file_directory, f)) and (f != '.DS_Store') ] for fname in file_list: imported_wave = wavefile.load(filename=file_directory + "/" + fname) mono_channel = imported_wave[1][0] # we want the left channel, or mono #mono_channel = numpy.concatenate((hp_filter.convolve(mono_channel), lp_filter.convolve(mono_channel), bp_filter.convolve(mono_channel))) mono_channel = complex_coefficients(mono_channel) normalized_channel = ( numpy.array(mono_channel) ) / 50 # DO THIS IN ONE PASS AS PART OF COMPLEX COEFFICIENTS -- PASS NORMALIZATION FACTOR training_labels.append(labels["hi_hat"]) training_values.append(padded(normalized_channel, maximum_length)) print("done") file_directory = './audio_dataset/train/snare' file_list = [ f for f in os.listdir(file_directory) if os.path.isfile(os.path.join(file_directory, f)) and (f != '.DS_Store') ]