def writeWaveAtPitch(noteOffset, samplePath): fileName = samplePath + '/' + str(noteOffset + (12 * 6)) + ".wav" factor = 2.0**(1.0 * noteOffset / 12.0) snd = AudioSegment(data=pitchShift(getWave(samples, x['a'], x['b']), factor), sample_width=1, frame_rate=44100, channels=1) snd = snd.set_channels(2) snd = snd.set_sample_width(2) snd.export(fileName, format="wav") # filthy hack for looping :( thing = wavfile.read(fileName) wavfile.write(fileName, thing[0], thing[1], loops=[{ 'cuepointid': 0, 'datatype': 0, 'start': 0, 'end': len(thing[1]), 'fraction': 0, 'playcount': 0 }])
def parse_bin(input_filename, output_filename): with open(input_filename, "rb") as f: data = f.read() if data[0:4].decode('ascii') != "BMP\0": print("Not a BMP audio file") exit(1) data_size, loop_start, loop_end = struct.unpack(">III", data[0x04:0x10]) channels, bits = struct.unpack("<HH", data[0x10:0x14]) rate, = struct.unpack(">I", data[0x14:0x18]) is_looped = True if loop_start > 0 or loop_end > 0 else False if is_looped: loops = [(loop_start, loop_end)] print("Found loop offsets: start = %d, end = %d" % (loop_start, loop_end)) # foobar2000 plugin (rename .wav to .wavloop): http://slemanique.com/software/foo_input_wave_loop.html print( "Loop information will be stored in a SMPL chunk for playback in players that have support for SMPL loops" ) else: loops = None data = bytearray(data[0x20:]) output = adpcmwave.decode_data(data, rate, channels, bits) output = numpy.ndarray((int(len(output) // 2 // channels), channels), numpy.int16, output, 0) wavfile.write(output_filename, rate, output, loops=loops)
def writeWaveAtPitch(snd, noteOffset, samplePath): fileName = samplePath + '/' + str(noteOffset + (12 * 6)) + ".wav" factor = 2.0**(1.0 * noteOffset / 12.0) new_sample_rate = (int(snd.frame_rate * factor) // 2) * 2 shifted_sound = snd._spawn(snd.raw_data, overrides={'frame_rate': new_sample_rate}) shifted_sound = shifted_sound.set_frame_rate(44100) shifted_sound = shifted_sound.set_channels(2) shifted_sound = shifted_sound.set_sample_width(2) shifted_sound.export(fileName, format="wav") # filthy hack for looping :( thing = wavfile.read(fileName) wavfile.write(fileName, thing[0], thing[1], loops=[{ 'cuepointid': 0, 'datatype': 0, 'start': 0, 'end': len(thing[1]), 'fraction': 0, 'playcount': 0 }])
def freqManip(): # length of data to read. chunk = 1199520 # open the file for reading. nh = wave.open("GameSong.wav") nwidth = nh.getsampwidth() # create an audio object p = pyaudio.PyAudio() # open stream based on the wave object which has been input. stream = p.open(format = p.get_format_from_width(nh.getsampwidth()), channels = nh.getnchannels(), rate = nh.getframerate(), output = True) data = nh.readframes(chunk) #This is the loop that manipulates audio: while(True): #print("Length of data:", len(data)) #print("Width: ", nwidth) #print("l(d)/w: ", (len(data)/nwidth)) #print("(l(d)/w)*2: ", (len(data)/nwidth)*2) data = np.array(wave.struct.unpack("%dh"%(len(data)/nwidth), data))*2 #print data data = np.fft.fft(data) #print data dcnt = 0 for d in data: #data[dcnt] = (d.real**2 + d.imag**2) data[dcnt] = (d.real+d.imag) #data[dcnt] = (data[dcnt])**0.5 #data[dcnt] += 2 dcnt += 1 if (np.iscomplexobj(d)): d = d + 0j data = np.fft.ifft(data) dataout = np.array(data.real, dtype='int16') chunkout = struct.pack("%dh"%(len(dataout.real)), *list(dataout.real)) #%dh = see https://docs.python.org/2/library/stdtypes.html#string-formatting-operations under String Formatting Operations # writing to the stream is what *actually* plays the sound. #!!!!!!!!!!!---------------------------!!!!!!!!!!!!!!! #try this later: #data = nh.readframes(chunkout) wavfile.write("realPlusImag.wav", nh.getframerate()*2, dataout) stream.stop_stream() stream.close()
def convolve(args): sample_res = wavfile.read(args['input'], normalized=True, forcestereo=True) impulse_res = wavfile.read(args['impulse'], normalized=True, forcestereo=True) debug = args['debug'] stereo = args['channels'] == 'stereo' if debug: logger.debug('sample data: \n{data}', data=sample_res[1]) logger.debug('impulse_res: \n{data}', data=impulse_res[1]) sr = sample_res[1] ir = impulse_res[1] if debug: logger.debug('sample data as float: \n{data}', data=sr) if args['output'] == 'convolve': # use numpy convolve logger.info('Using numpy.convolve') out_0 = numpy.convolve(sr[:, 0], ir[:, 0]) if stereo: out_1 = numpy.convolve(sr[:, 1], ir[:, 1]) else: # use scipy fftconvolve logger.info('Using scipy.signal.fftconvolve') out_0 = signal.fftconvolve(sr[:, 0], ir[:, 0]) if stereo: out_1 = signal.fftconvolve(sr[:, 1], ir[:, 1]) if stereo: # merge channels out = numpy.vstack((out_0, out_1)).T else: out = out_0.T # save output wavfile.write(args['output'], sample_res[0], out, normalized=True) if args['play']: playsound(args['output'])
def fastSong(): chunk = 599760 # open the file for reading. nh = wave.open("GameSong.wav") nwidth = nh.getsampwidth() # create an audio object p = pyaudio.PyAudio() # open stream based on the wave object which has been input. stream = p.open(format = p.get_format_from_width(nh.getsampwidth()), channels = nh.getnchannels(), rate = nh.getframerate(), output = True) data = nh.readframes(chunk) data = np.array(wave.struct.unpack("%dh"%(len(data)/nwidth), data))*2 data = np.array(data, dtype='int16') wavfile.write("fast.wav", nh.getframerate()*4, data) stream.close()
bank = logfbank(signal[:rate], rate, nfilt=26, nfft=1103).T fbank[c] = bank mel = mfcc(signal[:rate], rate, numcep=13, nfilt=26, nfft=1103).T mfccs[c] = mel # plot_signals(signals) # plt.show() # plot_fft(fft) # plt.show() # plot_fbank(fbank) # plt.show() # plot_mfccs(mfccs) # plt.show() if not os.path.exists('clean'): os.makedirs('clean') if len(os.listdir('clean')) == 0: for f in tqdm(df.fname): try: wavdir = find_wavdir(f) # signal, rate = librosa.load(wavdir, sr=22050) _, signal, _ = wavfile.read(wavdir) rate = 22050 # mask = envelope(signal, rate, 0.0005) wavfile.write('clean/' + f, rate, signal) except: logging.exception("AudioWavWritingError")
def train(self, config, devices): """ Train the SEAE """ print('Initializing optimizer...') # init optimizer g_opt = self.g_opt num_devices = len(devices) try: init = tf.global_variables_initializer() except AttributeError: # fall back to old implementation init = tf.initialize_all_variables() print('Initializing variables...') self.sess.run(init) self.saver = tf.train.Saver() self.g_sum = tf.summary.merge([ self.g_loss_sum, self.gen_summ, self.rl_audio_summ, self.real_w_summ, self.gen_audio_summ ]) if not os.path.exists(os.path.join(config.save_path, 'train')): os.makedirs(os.path.join(config.save_path, 'train')) self.writer = tf.summary.FileWriter( os.path.join(config.save_path, 'train'), self.sess.graph) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) print('Sampling some wavs to store sample references...') # Hang onto a copy of wavs so we can feed the same one every time # we store samples to disk for hearing # pick a single batch sample_noisy, \ sample_wav = self.sess.run([self.gtruth_noisy[0], self.gtruth_wavs[0]]) print('sample noisy shape: ', sample_noisy.shape) print('sample wav shape: ', sample_wav.shape) save_path = config.save_path counter = 0 # count number of samples num_examples = 0 for record in tf.python_io.tf_record_iterator(self.e2e_dataset): num_examples += 1 print('total examples in TFRecords {}: {}'.format( self.e2e_dataset, num_examples)) # last samples (those not filling a complete batch) are discarded num_batches = num_examples / self.batch_size print('Batches per epoch: ', num_batches) if self.load(self.save_path): print('[*] Load SUCCESS') else: print('[!] Load failed') batch_idx = 0 curr_epoch = 0 batch_timings = [] g_losses = [] try: while not coord.should_stop(): start = timeit.default_timer() if counter % config.save_freq == 0: # now G iterations _g_opt, _g_sum, \ g_loss = self.sess.run([g_opt, self.g_sum, self.g_losses[0]]) else: _g_opt, \ g_loss = self.sess.run([g_opt, self.g_losses[0]]) end = timeit.default_timer() batch_timings.append(end - start) g_losses.append(g_loss) print('{}/{} (epoch {}), g_loss = {:.5f},' ' time/batch = {:.5f}, ' 'mtime/batch = {:.5f}'.format(counter, config.epoch * num_batches, curr_epoch, g_loss, end - start, np.mean(batch_timings))) batch_idx += num_devices counter += num_devices if (counter / num_devices) % config.save_freq == 0: self.save(config.save_path, counter) self.writer.add_summary(_g_sum, counter) fdict = {self.gtruth_noisy[0]: sample_noisy} canvas_w = self.sess.run(self.Gs[0], feed_dict=fdict) swaves = sample_wav sample_dif = sample_wav - sample_noisy for m in range(min(20, canvas_w.shape[0])): print('w{} max: {} min: {}'.format( m, np.max(canvas_w[m]), np.min(canvas_w[m]))) wavfile.write( os.path.join(save_path, 'sample_{}-{}.wav'.format(counter, m)), 16e3, canvas_w[m]) if not os.path.exists( os.path.join(save_path, 'gtruth_{}.wav'.format(m))): wavfile.write( os.path.join(save_path, 'gtruth_{}.wav'.format(m)), 16e3, swaves[m]) wavfile.write( os.path.join(save_path, 'noisy_{}.wav'.format(m)), 16e3, sample_noisy[m]) wavfile.write( os.path.join(save_path, 'dif_{}.wav'.format(m)), 16e3, sample_dif[m]) np.savetxt(os.path.join(save_path, 'g_losses.txt'), g_losses) if batch_idx >= num_batches: curr_epoch += 1 # re-set batch idx batch_idx = 0 if curr_epoch >= config.epoch: # done training print('Done training; epoch limit {} ' 'reached.'.format(self.epoch)) print('Saving last model at iteration {}'.format(counter)) self.save(config.save_path, counter) self.writer.add_summary(_g_sum, counter) break except tf.errors.OutOfRangeError: print('[!] Reached queues limits in training loop') finally: coord.request_stop() coord.join(threads)
def train(self, config, devices): """ Train the SEGAN """ print('Initializing optimizers...') # init optimizers d_opt = self.d_opt g_opt = self.g_opt num_devices = len(devices) try: init = tf.global_variables_initializer() except AttributeError: # fall back to old implementation init = tf.initialize_all_variables() print('Initializing variables...') self.sess.run(init) g_summs = [ self.d_fk_sum, # self.d_nfk_sum, self.d_fk_loss_sum, # self.d_nfk_loss_sum, self.g_loss_sum, self.g_loss_l1_sum, self.g_loss_adv_sum, self.gen_summ, self.gen_audio_summ ] # if we have prelus, add them to summary if hasattr(self, 'alpha_summ'): g_summs += self.alpha_summ self.g_sum = tf.summary.merge(g_summs) self.d_sum = tf.summary.merge([ self.d_loss_sum, self.d_rl_sum, self.d_rl_loss_sum, self.rl_audio_summ, self.real_w_summ, self.disc_noise_std_summ ]) if not os.path.exists(os.path.join(config.save_path, 'train')): os.makedirs(os.path.join(config.save_path, 'train')) self.writer = tf.summary.FileWriter( os.path.join(config.save_path, 'train'), self.sess.graph) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) print('Sampling some wavs to store sample references...') # Hang onto a copy of wavs so we can feed the same one every time # we store samples to disk for hearing # pick a single batch sample_noisy, sample_wav, \ sample_z = self.sess.run([self.gtruth_noisy[0], self.gtruth_wavs[0], self.zs[0]]) print('sample noisy shape: ', sample_noisy.shape) print('sample wav shape: ', sample_wav.shape) print('sample z shape: ', sample_z.shape) save_path = config.save_path counter = 0 # count number of samples num_examples = 0 for record in tf.python_io.tf_record_iterator(self.e2e_dataset): num_examples += 1 print('total examples in TFRecords {}: {}'.format( self.e2e_dataset, num_examples)) # last samples (those not filling a complete batch) are discarded num_batches = num_examples / self.batch_size print('Batches per epoch: ', num_batches) if self.load(self.save_path): print('[*] Load SUCCESS') else: print('[!] Load failed') batch_idx = 0 curr_epoch = 0 batch_timings = [] d_fk_losses = [] # d_nfk_losses = [] d_rl_losses = [] g_adv_losses = [] g_l1_losses = [] try: while not coord.should_stop(): start = timeit.default_timer() if counter % config.save_freq == 0: for d_iter in range(self.disc_updates): _d_opt, _d_sum, d_fk_loss, d_rl_loss = self.sess.run([ d_opt, self.d_sum, self.d_fk_losses[0], self.d_rl_losses[0] ]) if self.d_clip_weights: self.sess.run(self.d_clip) # d_nfk_loss, \ # now G iterations _g_opt, _g_sum, g_adv_loss, g_l1_loss = self.sess.run([ g_opt, self.g_sum, self.g_adv_losses[0], self.g_l1_losses[0] ]) else: for d_iter in range(self.disc_updates): _d_opt, \ d_fk_loss, \ d_rl_loss = self.sess.run([d_opt, self.d_fk_losses[0], # self.d_nfk_losses[0], self.d_rl_losses[0]]) # d_nfk_loss, \ if self.d_clip_weights: self.sess.run(self.d_clip) _g_opt, \ g_adv_loss, \ g_l1_loss = self.sess.run([g_opt, self.g_adv_losses[0], self.g_l1_losses[0]]) end = timeit.default_timer() batch_timings.append(end - start) d_fk_losses.append(d_fk_loss) # d_nfk_losses.append(d_nfk_loss) d_rl_losses.append(d_rl_loss) g_adv_losses.append(g_adv_loss) g_l1_losses.append(g_l1_loss) print('{}/{} (epoch {}), d_rl_loss = {:.5f}, ' 'd_fk_loss = {:.5f}, ' 'g_adv_loss = {:.5f}, g_l1_loss = {:.5f},' ' time/batch = {:.5f}, ' 'mtime/batch = {:.5f}'.format(counter, config.epoch * num_batches, curr_epoch, d_rl_loss, d_fk_loss, g_adv_loss, g_l1_loss, end - start, np.mean(batch_timings))) batch_idx += num_devices counter += num_devices print("counter:", counter, " num_devices:", num_devices, " config.save_freq:", config.save_freq) if (counter / num_devices) % config.save_freq == 0: self.save(config.save_path, counter) self.writer.add_summary(_g_sum, counter) self.writer.add_summary(_d_sum, counter) fdict = { self.gtruth_noisy[0]: sample_noisy, self.zs[0]: sample_z } canvas_w = self.sess.run(self.Gs[0], feed_dict=fdict) swaves = sample_wav sample_dif = sample_wav - sample_noisy for m in range(min(20, canvas_w.shape[0])): print('w{} max: {} min: {}'.format( m, np.max(canvas_w[m]), np.min(canvas_w[m]))) wavfile.write( os.path.join(save_path, 'sample_{}-{}.wav'.format(counter, m)), 16e3, de_emph(canvas_w[m], self.preemph)) m_gtruth_path = os.path.join(save_path, 'gtruth_{}.wav'.format(m)) if not os.path.exists(m_gtruth_path): print('save_path:', save_path) wavfile.write( os.path.join(save_path, 'gtruth_{}.wav'.format(m)), 16e3, de_emph(swaves[m], self.preemph)) wavfile.write( os.path.join(save_path, 'noisy_{}.wav'.format(m)), 16e3, de_emph(sample_noisy[m], self.preemph)) wavfile.write( os.path.join(save_path, 'dif_{}.wav'.format(m)), 16e3, de_emph(sample_dif[m], self.preemph)) np.savetxt(os.path.join(save_path, 'd_rl_losses.txt'), d_rl_losses) np.savetxt(os.path.join(save_path, 'd_fk_losses.txt'), d_fk_losses) np.savetxt(os.path.join(save_path, 'g_adv_losses.txt'), g_adv_losses) np.savetxt(os.path.join(save_path, 'g_l1_losses.txt'), g_l1_losses) if batch_idx >= num_batches: curr_epoch += 1 # re-set batch idx batch_idx = 0 # check if we have to deactivate L1 if curr_epoch >= config.l1_remove_epoch and self.deactivated_l1 == False: print('** Deactivating L1 factor! **') self.sess.run(tf.assign(self.l1_lambda, 0.)) self.deactivated_l1 = True # check if we have to start decaying noise (if any) if curr_epoch >= config.denoise_epoch and self.deactivated_noise == False: # apply noise std decay rate decay = config.noise_decay if not hasattr(self, 'curr_noise_std'): self.curr_noise_std = self.init_noise_std new_noise_std = decay * self.curr_noise_std if new_noise_std < config.denoise_lbound: print('New noise std {} < lbound {}, setting 0.'. format(new_noise_std, config.denoise_lbound)) print('** De-activating noise layer **') # it it's lower than a lower bound, cancel out completely new_noise_std = 0. self.deactivated_noise = True else: print( 'Applying decay {} to noise std {}: {}'.format( decay, self.curr_noise_std, new_noise_std)) self.sess.run( tf.assign(self.disc_noise_std, new_noise_std)) self.curr_noise_std = new_noise_std if curr_epoch >= config.epoch: # done training print('Done training; epoch limit {} ' 'reached.'.format(self.epoch)) print('Saving last model at iteration {}'.format(counter)) self.save(config.save_path, counter) self.writer.add_summary(_g_sum, counter) self.writer.add_summary(_d_sum, counter) break except tf.errors.OutOfRangeError: print('Done training; epoch limit {} reached.'.format(self.epoch)) finally: coord.request_stop() coord.join(threads)
def fastAudio(ffmpeg: str, theFile: str, outFile: str, chunks: list, speeds: list, audioBit, samplerate, needConvert: bool, temp: str, log, fps: float): if (len(chunks) == 1 and chunks[0][2] == 0): log.error('Trying to create empty audio.') if (not os.path.isfile(theFile)): log.error('fastAudio.py could not find file: ' + theFile) if (needConvert): cmd = [ffmpeg, '-y', '-i', theFile] if (audioBit is not None): cmd.extend(['-b:a', str(audioBit)]) cmd.extend( ['-ac', '2', '-ar', str(samplerate), '-vn', f'{temp}/faAudio.wav']) if (log.is_ffmpeg): cmd.extend(['-hide_banner']) else: cmd.extend(['-nostats', '-loglevel', '8']) subprocess.call(cmd) theFile = f'{temp}/faAudio.wav' samplerate, audioData = read(theFile) newL = getNewLength(chunks, speeds, fps) # Get the new length in samples with some extra leeway. estLeng = int(newL * samplerate * 1.5) + int(samplerate * 2) # Create an empty array for the new audio. newAudio = np.zeros((estLeng, 2), dtype=np.int16) channels = 2 yPointer = 0 totalChunks = len(chunks) beginTime = time.time() for chunkNum, chunk in enumerate(chunks): audioSampleStart = int(chunk[0] / fps * samplerate) audioSampleEnd = int(audioSampleStart + (samplerate / fps) * (chunk[1] - chunk[0])) theSpeed = speeds[chunk[2]] if (theSpeed != 99999): spedChunk = audioData[audioSampleStart:audioSampleEnd] if (theSpeed == 1): yPointerEnd = yPointer + spedChunk.shape[0] newAudio[yPointer:yPointerEnd] = spedChunk else: spedupAudio = np.zeros((0, 2), dtype=np.int16) with ArrReader(spedChunk, channels, samplerate, 2) as reader: with ArrWriter(spedupAudio, channels, samplerate, 2) as writer: phasevocoder(reader.channels, speed=theSpeed).run(reader, writer) spedupAudio = writer.output yPointerEnd = yPointer + spedupAudio.shape[0] newAudio[yPointer:yPointerEnd] = spedupAudio myL = chunk[1] - chunk[0] mySamples = (myL / fps) * samplerate newSamples = int(mySamples / theSpeed) yPointer = yPointer + newSamples else: # Speed is too high so skip this section. yPointerEnd = yPointer progressBar(chunkNum, totalChunks, beginTime, title='Creating new audio') log.debug('\n - Total Samples: ' + str(yPointer)) log.debug(' - Samples per Frame: ' + str(samplerate / fps)) log.debug(' - Expected video length: ' + str(yPointer / (samplerate / fps))) newAudio = newAudio[:yPointer] write(outFile, samplerate, newAudio) if (needConvert): conwrite('')
def fastAudio(theFile, outFile, chunks: list, speeds: list, log, fps: float, machineReadable, hideBar): from wavfile import read, write import os import numpy as np log.checkType(chunks, 'chunks', list) log.checkType(speeds, 'speeds', list) def speedsOtherThan1And99999(a: list) -> bool: return len([x for x in a if x != 1 and x != 99999]) > 0 if (speedsOtherThan1And99999(speeds)): from audiotsm2 import phasevocoder from audiotsm2.io.array import ArrReader, ArrWriter if (len(chunks) == 1 and chunks[0][2] == 0): log.error('Trying to create an empty file.') if (not os.path.isfile(theFile)): log.error('fastAudio.py could not find file: ' + theFile) samplerate, audioData = read(theFile) newL = getNewLength(chunks, speeds, fps) # Get the new length in samples with some extra leeway. estLeng = int(newL * samplerate * 1.5) + int(samplerate * 2) # Create an empty array for the new audio. newAudio = np.zeros((estLeng, 2), dtype=np.int16) channels = 2 yPointer = 0 audioProgress = ProgressBar(len(chunks), 'Creating new audio', machineReadable, hideBar) for chunkNum, chunk in enumerate(chunks): audioSampleStart = int(chunk[0] / fps * samplerate) audioSampleEnd = int(audioSampleStart + (samplerate / fps) * (chunk[1] - chunk[0])) theSpeed = speeds[chunk[2]] if (theSpeed != 99999): spedChunk = audioData[audioSampleStart:audioSampleEnd] if (theSpeed == 1): yPointerEnd = yPointer + spedChunk.shape[0] newAudio[yPointer:yPointerEnd] = spedChunk else: spedupAudio = np.zeros((0, 2), dtype=np.int16) with ArrReader(spedChunk, channels, samplerate, 2) as reader: with ArrWriter(spedupAudio, channels, samplerate, 2) as writer: phasevocoder(reader.channels, speed=theSpeed).run(reader, writer) spedupAudio = writer.output yPointerEnd = yPointer + spedupAudio.shape[0] newAudio[yPointer:yPointerEnd] = spedupAudio myL = chunk[1] - chunk[0] mySamples = (myL / fps) * samplerate newSamples = int(mySamples / theSpeed) yPointer = yPointer + newSamples else: # Speed is too high so skip this section. yPointerEnd = yPointer audioProgress.tick(chunkNum) log.debug('\n - Total Samples: ' + str(yPointer)) log.debug(' - Samples per Frame: ' + str(samplerate / fps)) log.debug(' - Expected video length: ' + str(yPointer / (samplerate / fps))) newAudio = newAudio[:yPointer] write(outFile, samplerate, newAudio)
byte_length = raw_pcm.size nframes_per_channel = byte_length // block_align byte_per_frame = bitrate // 8 length_sec = nframes_per_channel // fs assert chunk_length > overlap >= 0, "Overlap must be non negative and smaller than length" nsegs, segs = split_segments(length_sec, chunk_length, overlap, incltail=True) file, ext = os.path.splitext(input_file) num_digits = get_number_of_digits(nsegs) chunk_name_format = '{}__{:0' + str(num_digits) + 'd}{}' for i, (start, end) in zip(list(range(nsegs)), segs): chunk_name = chunk_name_format.format(file, i + 1, ext) sample_start = start * fs * num_channels * byte_per_frame sample_end = end * fs * num_channels * byte_per_frame chunk_size = sample_end - sample_start chunk_size_per_channel = chunk_size // num_channels // byte_per_frame print(('Chunk #{}/{} named {} is from {} sec to {} sec'.format( i + 1, nsegs, chunk_name, start, end))) chunk_data = raw_pcm[sample_start:sample_end] uint8_data = chunk_data.reshape((chunk_size_per_channel, num_channels, byte_per_frame)).astype(np.uint8) if bitrate == 24: write_24b(chunk_name, fs, uint8_data) else: write(chunk_name, fs, uint8_data, bitrate=bitrate)
def add_noise_and_filter(self, x, noise, play_sounds, files_prefix): t = np.arange(len(x)) / self.sample_rate # Plot original signal. figure() plot(t, x) title('Original signal') grid(True) #------------------------------------------------ # Add noise to original signal #------------------------------------------------ with_noise = x + noise # Plot the signal with noise. figure() plot(t, with_noise) title('Signal with noise') grid(True) # Save audio with noise. output_with_noise = ''.join( [OUTPUT_FOLDER, files_prefix, '_with_noise.wav']) wavfile.write(output_with_noise, self.sample_rate, with_noise, normalized=True) # Override x signal. x = with_noise #------------------------------------------------ # Create a FIR filter and apply it to x. #------------------------------------------------ # The Nyquist rate of the signal. nyq_rate = self.sample_rate / 2.0 # The desired width of the transition from pass to stop, # relative to the Nyquist rate. width = (self.cutoff_hz_2 - self.cutoff_hz_1) / nyq_rate # Compute the order and Kaiser parameter for the FIR filter. N, beta = kaiserord(self.ripple_db, width) N |= 1 # Use firwin with a Kaiser window to create a FIR filter. taps = firwin( N, [self.cutoff_hz_1 / nyq_rate, self.cutoff_hz_2 / nyq_rate], window=('kaiser', beta), pass_zero=True) # Use lfilter to filter x with the FIR filter. filtered_x = lfilter(taps, 1.0, x) #------------------------------------------------ # Plot the magnitude response of the filter. #------------------------------------------------ figure() w, h = freqz(taps, worN=8000) plot((w / np.pi) * nyq_rate, np.absolute(h)) xlabel('Frequency (Hz)') ylabel('Gain') title('Frequency response') ylim(-0.05, 1.05) grid(True) #------------------------------------------------ # Plot the filtered signal. #------------------------------------------------ # The phase delay of the filtered signal. delay = 0.5 * (N - 1) / self.sample_rate # Plot the filtered signal, shifted to compensate for the phase delay. figure() # Plot just the "good" part of the filtered signal. The first N-1 # samples are "corrupted" by the initial conditions. plot(t[N - 1:] - delay, filtered_x[N - 1:], 'g') title('Filtered signal') xlabel('t') grid(True) # Save filtered audio output_filtered = "".join( [OUTPUT_FOLDER, files_prefix, '_filtered.wav']) wavfile.write(output_filtered, self.sample_rate, filtered_x, normalized=True) if play_sounds: playsound(output_with_noise) playsound(output_filtered) # Show plotted figures show()
return np.pad(A[:length - before], pad_width=width, mode='constant') def filter20_20k(x, sr): # filters everything outside out 20 - 20000 Hz nyq = 0.5 * sr sos = signal.butter(5, [20.0 / nyq, 20000.0 / nyq], btype='band', output='sos') return signal.sosfilt(sos, x) sr, a, br = wavfile.read(SWEEPFILE, normalized=True) sr, b, br = wavfile.read(RECFILE, normalized=True) a = padarray(a, sr*50, before=sr*10) b = padarray(b, sr*50, before=sr*10) h = np.zeros_like(b) for chan in [0, 1]: b1 = b[:,chan] b1 = filter20_20k(b1, sr) ffta = np.fft.rfft(a) fftb = np.fft.rfft(b1) ffth = fftb / ffta h1 = np.fft.irfft(ffth) h1 = filter20_20k(h1, sr) h[:,chan] = h1 h = h[:10 * sr,:] h *= ratio(dB=40) wavfile.write('IR.wav', sr, h, normalized=True, bitrate=24)
def fastAudio(ffmpeg, theFile, outFile, chunks, speeds, audioBit, samplerate, debug, needConvert, log, fps=30): if (not os.path.isfile(theFile)): log.error('Could not find file ' + theFile) if (needConvert): # Only print this here so other scripts can use this function. print('Running from fastAudio.py') import tempfile from shutil import rmtree TEMP = tempfile.mkdtemp() cmd = [ ffmpeg, '-i', theFile, '-b:a', audioBit, '-ac', '2', '-ar', str(samplerate), '-vn', f'{TEMP}/fastAud.wav' ] if (not debug): cmd.extend(['-nostats', '-loglevel', '0']) subprocess.call(cmd) theFile = f'{TEMP}/fastAud.wav' samplerate, audioData = read(theFile) newL = getNewLength(chunks, speeds, fps) # Get the new length in samples with some extra leeway. estLeng = int(newL * samplerate * 1.5) + int(samplerate * 2) # Create an empty array for the new audio. newAudio = np.zeros((estLeng, 2), dtype=np.int16) channels = 2 yPointer = 0 totalChunks = len(chunks) beginTime = time.time() for chunkNum, chunk in enumerate(chunks): audioSampleStart = int(chunk[0] / fps * samplerate) audioSampleEnd = int(audioSampleStart + (samplerate / fps) * (chunk[1] - chunk[0])) theSpeed = speeds[chunk[2]] if (theSpeed != 99999): spedChunk = audioData[audioSampleStart:audioSampleEnd] if (theSpeed == 1): yPointerEnd = yPointer + spedChunk.shape[0] newAudio[yPointer:yPointerEnd] = spedChunk else: spedupAudio = np.zeros((0, 2), dtype=np.int16) with ArrReader(spedChunk, channels, samplerate, 2) as reader: with ArrWriter(spedupAudio, channels, samplerate, 2) as writer: phasevocoder(reader.channels, speed=theSpeed).run(reader, writer) spedupAudio = writer.output yPointerEnd = yPointer + spedupAudio.shape[0] newAudio[yPointer:yPointerEnd] = spedupAudio myL = chunk[1] - chunk[0] mySamples = (myL / fps) * samplerate newSamples = int(mySamples / theSpeed) yPointer = yPointer + newSamples else: # Speed is too high so skip this section. yPointerEnd = yPointer progressBar(chunkNum, totalChunks, beginTime, title='Creating new audio') log.debug('yPointer: ' + str(yPointer)) log.debug('samples per frame: ' + str(samplerate / fps)) log.debug('Expected video length: ' + str(yPointer / (samplerate / fps))) newAudio = newAudio[:yPointer] write(outFile, samplerate, newAudio) if ('TEMP' in locals()): rmtree(TEMP) if (needConvert): conwrite('')
def read_vas3(input_filename, output_folder, force_hex=False, mix_audio=False, force_game=None): data = open(input_filename, "rb").read() if data[0:4].decode('ascii') != "VA3W": print("Not a valid VA3 file") exit(1) # v3 header is 1 0 0 2 version_flag1, version_flag2, version_flag3, version_flag4, entry_count, gdx_size, gdx_start, entry_start, data_start = struct.unpack( "<BBBBIIIII", data[0x04:0x1c]) if entry_count <= 0: print("No files to extract") exit(1) gdx_magic = data[gdx_start:gdx_start + 4].decode('ascii') if gdx_magic != "GDXH" and gdx_magic != "GDXG": print("Not a valid GDXH header") exit(1) default_hihat, default_snare, default_bass, default_hightom, default_lowtom, default_rightcymbal = struct.unpack( "<HHHHHH", data[gdx_start + 0x04:gdx_start + 0x10]) if gdx_magic == "GDXH": default_leftcymbal = 0xfff0 default_floortom = 0xfff1 default_leftpedal = 0xfff2 gdx_type_unk1 = data[gdx_start + 0x10] # Not used anywhere? gdx_volume_flag = data[gdx_start + 0x11] # How does this work with GDXG? filename_prefix = "g" elif gdx_magic == "GDXG": default_leftcymbal, default_floortom, default_leftpedal = struct.unpack( "<HHH", data[gdx_start + 0x10:gdx_start + 0x16]) gdx_type_unk1 = 0 gdx_volume_flag = 1 filename_prefix = "d" if force_game: filename_prefix = force_game[0] metadata = { 'type': gdx_magic, 'version': version_flag4, 'defaults': { 'default_hihat': default_hihat, 'default_snare': default_snare, 'default_bass': default_bass, 'default_hightom': default_hightom, 'default_lowtom': default_lowtom, 'default_rightcymbal': default_rightcymbal, 'default_leftcymbal': default_leftcymbal, 'default_floortom': default_floortom, 'default_leftpedal': default_leftpedal, }, 'gdx_type_unk1': gdx_type_unk1, 'gdx_volume_flag': gdx_volume_flag, 'entries': [], } entries = [] prev_filesize = 0 for i in range(entry_count): # sound_flag seems to be related to defaults. If something is set to default, it is 0x02. Else it's 0x04 (for GDXG). Always 0 for GDXH? # entry_unk4 seems to always be 255?? offset, filesize, channels, bits, rate, entry_unk1, entry_unk2, volume, pan, sound_id, sound_flag, entry_unk4 = struct.unpack( "<IIHHIIIBBHHH", data[entry_start + (i * 0x40):entry_start + (i * 0x40) + 0x20]) filename = data[entry_start + (i * 0x40) + 0x20:entry_start + (i * 0x40) + 0x40].decode("ascii").strip('\0') if filename_prefix == "g": if i + 1 == entry_count: filesize = len(data) - (data_start + offset) else: filesize = int.from_bytes( data[entry_start + ((i + 1) * 0x40):entry_start + ((i + 1) * 0x40) + 4], 'little') t = prev_filesize prev_filesize = filesize filesize -= t elif entry_unk1 != 0: filesize = entry_unk1 volume = min(volume, 127) if sound_id >= 0xfff0: print("Verify when sound_id >= 0xfff0") exit(1) if sound_id == 0xfff0: sound_id = default_leftcymbal elif sound_id == 0xfff1: sound_id = default_floortom elif sound_id == 0xfff2: sound_id = default_leftpedal entries.append({ 'sound_id': sound_id, 'filename': filename, 'offset': offset, 'filesize': filesize, 'channels': channels, 'bits': bits, 'rate': rate, 'volume': volume, 'pan': pan, }) metadata['entries'].append({ 'sound_id': sound_id, 'filename': filename, 'volume': volume, 'pan': pan, 'extra': entry_unk4, # Unknown flag, most likely always 255 'flags': [], }) if version_flag4 < 2: if (sound_flag & 0x02) != 0: metadata['entries'][-1]['flags'].append(0x02) if (sound_flag & 0x0100) != 0: metadata['entries'][-1]['flags'].append("NoFilename") if output_folder: basepath = output_folder else: basepath = os.path.splitext(os.path.basename(input_filename))[0] os.makedirs(basepath, exist_ok=True) for entry in entries: # print("Extracting", entry['filename']) print(entry) wave_data = bytearray(data[data_start + entry['offset']:data_start + entry['offset'] + entry['filesize']]) output = adpcmwave.decode_data(wave_data, entry['rate'], entry['channels'], entry['bits']) output_filename = os.path.join( basepath, "{}_{}.wav".format(filename_prefix, entry['filename'])) if (sound_flag & 0x100) != 0 or force_hex: output_filename = os.path.join( basepath, "%s_%04x.wav" % (filename_prefix, entry['sound_id'])) output = numpy.ndarray( (int(len(output) // 2 // entry['channels']), entry['channels']), numpy.int16, output, 0) wavfile.write(output_filename, entry['rate'], output) # If mixing is enabled, mix using AudioSegment if mix_audio: # audio_segment = pydub.AudioSegment( # output_stream.getbuffer(), # frame_rate=entry['rate'], # sample_width=entry['bits'] // 8, # channels=entry['channels'] # ) audio_segment = pydub.AudioSegment.from_file(output_filename) pan = (entry['pan'] - (128 / 2)) / (128 / 2) audio_segment = audio_segment.pan(pan) db = 20 * math.log10(entry['volume'] / 127) audio_segment += db audio_segment.export(output_filename, format="wav") entry['volume'] = 127 entry['pan'] = 64 entry['duration'] = (entry['filesize'] * (entry['bits'] // 8) * entry['channels']) / entry['rate'] for idx in range(len(metadata['entries'])): if metadata['entries'][idx]['sound_id'] == entry['sound_id']: metadata['entries'][idx]['volume'] = entry['volume'] metadata['entries'][idx]['pan'] = entry['pan'] metadata['entries'][idx]['duration'] = entry['duration'] metadata['entries'][idx]['rate'] = entry['rate'] metadata['entries'][idx]['channels'] = entry['channels'] metadata['entries'][idx]['bits'] = entry['bits'] metadata['entries'][idx]['raw_filesize'] = entry['filesize'] break open(os.path.join(basepath, "%s_metadata.json" % filename_prefix), "w").write(json.dumps(metadata, indent=4))
import os from wavfile import read, write for root, dirs, files in os.walk('../data/raw'): for wav in files: rate, data, _, loops = read(f'{root}/{wav}', readloops=True) if loops is not None: print(wav, loops) start, end = loops[0] cut_data = data[start:end] assert (len(cut_data) == end - start) write(f'../data/cut/{wav}', rate, cut_data)
def fastAudio(ffmpeg, theFile, outFile, silentT, frameMargin, SAMPLE_RATE, audioBit, verbose, silentSpeed, soundedSpeed, needConvert, chunks=[], fps=30): if (not os.path.isfile(theFile)): print('Could not find file:', theFile) sys.exit(1) if (outFile == ''): fileName = theFile[:theFile.rfind('.')] outFile = f'{fileName}_ALTERED.wav' if (needConvert): # Only print this here so other scripts can use this function. print('Running from fastAudio.py') import tempfile from shutil import rmtree TEMP = tempfile.mkdtemp() cmd = [ ffmpeg, '-i', theFile, '-b:a', audioBit, '-ac', '2', '-ar', str(SAMPLE_RATE), '-vn', f'{TEMP}/fastAud.wav' ] if (not verbose): cmd.extend(['-nostats', '-loglevel', '0']) subprocess.call(cmd) theFile = f'{TEMP}/fastAud.wav' speeds = [silentSpeed, soundedSpeed] sampleRate, audioData = read(theFile) if (chunks == []): print('Creating chunks') chunks = getAudioChunks(audioData, sampleRate, fps, silentT, 2, frameMargin) newL = getNewLength(chunks, speeds, fps) # Get the new length in samples with some extra leeway. estLeng = int((newL * sampleRate) * 1.5) + int(sampleRate * 2) # Create an empty array for the new audio. newAudio = np.zeros((estLeng, 2), dtype=np.int16) channels = 2 yPointer = 0 totalChunks = len(chunks) beginTime = time.time() for chunkNum, chunk in enumerate(chunks): audioSampleStart = int(chunk[0] / fps * sampleRate) audioSampleEnd = int(audioSampleStart + (sampleRate / fps) * (chunk[1] - chunk[0])) theSpeed = speeds[chunk[2]] if (theSpeed != 99999): spedChunk = audioData[audioSampleStart:audioSampleEnd] if (theSpeed == 1): yPointerEnd = yPointer + spedChunk.shape[0] newAudio[yPointer:yPointerEnd] = spedChunk else: spedupAudio = np.zeros((0, 2), dtype=np.int16) with ArrReader(spedChunk, channels, sampleRate, 2) as reader: with ArrWriter(spedupAudio, channels, sampleRate, 2) as writer: phasevocoder(reader.channels, speed=theSpeed).run(reader, writer) spedupAudio = writer.output yPointerEnd = yPointer + spedupAudio.shape[0] newAudio[yPointer:yPointerEnd] = spedupAudio myL = chunk[1] - chunk[0] mySamples = (myL / fps) * sampleRate newSamples = int(mySamples / theSpeed) yPointer = yPointer + newSamples else: # Speed is too high so skip this section. yPointerEnd = yPointer progressBar(chunkNum, totalChunks, beginTime, title='Creating new audio') if (verbose): print('yPointer', yPointer) print('samples per frame', sampleRate / fps) print('Expected video length', yPointer / (sampleRate / fps)) newAudio = newAudio[:yPointer] write(outFile, sampleRate, newAudio) if ('TEMP' in locals()): rmtree(TEMP) if (needConvert): return outFile
def read_vas3(input_filename, output_folder, force_hex=False, mix_audio=False): data = open(input_filename, "rb").read() if data[0:4].decode('ascii') != "VA3W": print("Not a valid VA3 file") exit(1) # v3 header is 1 0 0 2 version_flag1, version_flag2, version_flag3, version_flag4, entry_count, gdx_size, gdx_start, entry_start, data_start = struct.unpack( "<BBBBIIIII", data[0x04:0x1c]) if entry_count <= 0: print("No files to extract") exit(1) gdx_magic = data[gdx_start:gdx_start + 4].decode('ascii') if gdx_magic != "GDXH" and gdx_magic != "GDXG": print("Not a valid GDXH header") exit(1) default_hihat, default_snare, default_bass, default_hightom, default_lowtom, default_rightcymbal = struct.unpack( "<HHHHHH", data[gdx_start + 0x04:gdx_start + 0x10]) if gdx_magic == "GDXH": # Not used anywhere, can be ignored?? # gdx_type_unk1 default is 0 # gdx_type_unk2 default is 1 default_leftcymbal = 0xfff0 default_floortom = 0xfff1 default_leftpedal = 0xfff2 gdx_type_unk1 = data[gdx_start + 0x10] # Not used anywhere? gdx_volume_flag = data[gdx_start + 0x11] # How does this work with GDXG? elif gdx_magic == "GDXG": default_leftcymbal, default_floortom, default_leftpedal = struct.unpack( "<HHH", data[gdx_start + 0x10:gdx_start + 0x16]) gdx_type_unk1 = 0 gdx_volume_flag = 1 metadata = { 'type': gdx_magic, 'version': version_flag4, 'defaults': { 'default_hihat': default_hihat, 'default_snare': default_snare, 'default_bass': default_bass, 'default_hightom': default_hightom, 'default_lowtom': default_lowtom, 'default_rightcymbal': default_rightcymbal, 'default_leftcymbal': default_leftcymbal, 'default_floortom': default_floortom, 'default_leftpedal': default_leftpedal, }, 'gdx_type_unk1': gdx_type_unk1, 'gdx_volume_flag': gdx_volume_flag, 'entries': [], } entries = [] for i in range(entry_count): # sound_flag seems to be related to defaults. If something is set to default, it is 0x02. Else it's 0x04 (for GDXG). Always 0 for GDXH? # entry_unk4 seems to always be 255?? offset, filesize, channels, bits, rate, entry_unk1, entry_unk2, volume, pan, sound_id, sound_flag, entry_unk4 = struct.unpack( "<IIHHIIIBBHHH", data[entry_start + (i * 0x40):entry_start + (i * 0x40) + 0x20]) filename = data[entry_start + (i * 0x40) + 0x20:entry_start + (i * 0x40) + 0x40].decode("ascii").strip('\0') if entry_unk1 != 0: filesize = entry_unk1 # if entry_unk1 != 0 or entry_unk2 != 0: # print("Unknown fields in entry: %08x %08x" % (entry_unk1, entry_unk2)) # exit(1) # Code for an older version of VA3 files? # I think there's some padding that it's trying to deal with here, but I'm not sure exactly. # Need a sample to verify this functionality. # entry_unk1 and entry_unk2 should always be 0 for v3 # if entry_unk2 != 0 and (entry_unk2 == filesize or entry_unk2 == filesize + 0x20 or entry_unk2 == filesize * 4): # entry_unk2 = 0 # if version_flag4 == 0: # if entry_unk2 > 0 and entry_unk2 >= 0x20: # entry_unk2 -= 0x20 # if entry_unk1 > 0 and entry_unk1 >= 0x20: # entry_unk1 -= 0x20 # if entry_unk2 > filesize: # entry_unk2 = filesize # if entry_unk1 != 0: # valid_file = entry_unk1 == entry_unk2 # else: # valid_file = entry_unk2 == 0 if gdx_volume_flag == 0: # ?? # This code shouldn't be hit unless you're working # with some really old files I suspect volume = 3 * volume / 2 print("Verify volume when gdx_volume_flag == 0") exit(1) else: volume = min(volume, 127) if version_flag1 == 1 and version_flag2 == 0 and version_flag3 == 0 and ( version_flag4 == 0 or version_flag4 == 1): # v1 and v2 use a table for volume? # Need to find a sample to verify #volume2 = VOLUME_TABLE[min(volume, 0x7f)] #print(volume, volume2) #print("Verify when volume table is used (percentages or not?)") #exit(1) pass if sound_id >= 0xfff0: print("Verify when sound_id >= 0xfff0") exit(1) if sound_id == 0xfff0: sound_id = default_leftcymbal elif sound_id == 0xfff1: sound_id = default_floortom elif sound_id == 0xfff2: sound_id = default_leftpedal entries.append({ 'sound_id': sound_id, 'filename': filename, 'offset': offset, 'filesize': filesize, 'channels': channels, 'bits': bits, 'rate': rate, 'volume': volume, 'pan': pan, }) metadata['entries'].append({ 'sound_id': sound_id, 'filename': filename, 'volume': volume, 'pan': pan, 'extra': entry_unk4, # Unknown flag, most likely always 255 'flags': [], }) if version_flag4 < 2: if (sound_flag & 0x02) != 0: metadata['entries'][-1]['flags'].append(0x02) # if (sound_flag & 0x04) != 0: # metadata['entries'][-1]['flags'].append("DefaultSound") # Generate this by checking defaults in header #"DefaultSound" if (sound_flag & 0x04) != 0, if (sound_flag & 0x0100) != 0: metadata['entries'][-1]['flags'].append("NoFilename") if output_folder: basepath = output_folder else: basepath = os.path.splitext(os.path.basename(input_filename))[0] if not os.path.exists(basepath): os.makedirs(basepath) for entry in entries: #print("Extracting", entry['filename']) #print(entry) wave_data = bytearray(data[data_start + entry['offset']:data_start + entry['offset'] + entry['filesize']]) output = adpcmwave.decode_data(wave_data, entry['rate'], entry['channels'], entry['bits']) output_filename = os.path.join(basepath, "{}.wav".format(entry['filename'])) if (sound_flag & 0x100) != 0 or force_hex: output_filename = os.path.join(basepath, "%04x.wav" % entry['sound_id']) output = numpy.ndarray( (int(len(output) // 2 // entry['channels']), entry['channels']), numpy.int16, output, 0) wavfile.write(output_filename, entry['rate'], output) # If mixing is enabled, mix using AudioSegment if mix_audio: # audio_segment = pydub.AudioSegment( # output_stream.getbuffer(), # frame_rate=entry['rate'], # sample_width=entry['bits'] // 8, # channels=entry['channels'] # ) audio_segment = pydub.AudioSegment.from_file(output_filename) pan = (entry['pan'] - (128 / 2)) / (128 / 2) audio_segment = audio_segment.pan(pan) db = 20 * math.log10(entry['volume'] / 127) audio_segment += db audio_segment.export(output_filename, format="wav") entry['volume'] = 127 entry['pan'] = 64 entry['duration'] = len( pydub.AudioSegment.from_file(output_filename)) / 1000 for idx in range(len(metadata['entries'])): if metadata['entries'][idx]['sound_id'] == entry['sound_id']: metadata['entries'][idx]['volume'] = entry['volume'] metadata['entries'][idx]['pan'] = entry['pan'] metadata['entries'][idx]['duration'] = entry['duration'] break open(os.path.join(basepath, "metadata.json"), "w").write(json.dumps(metadata, indent=4))
array = np.zeros((nsamples,), dtype=np.float32) frequency = 440 length = 5 beep_t = np.linspace(0, 1, beep_length) # Produces a 5 second Audio-File beep_value_array = np.sin(frequency * 2 * np.pi * beep_t) # Has frequency of 440Hz previous_beep_position = 0 array[:half_beep_length] = beep_value_array[half_beep_length:] while True: current_beep_position = previous_beep_position + gap_length if current_beep_position + half_beep_length > nsamples: if current_beep_position - half_beep_length < nsamples: last_beep_length = nsamples - (current_beep_position - half_beep_length) array[current_beep_position - half_beep_length:] = beep_value_array[:last_beep_length] break array[current_beep_position - half_beep_length:current_beep_position + half_beep_length] = beep_value_array previous_beep_position = current_beep_position array[array > 1.0] = 1.0 array[array < -1.0] = -1.0 data = np.asarray(array * (2 ** 31 - 1), dtype=np.int32).astype(dtype) uint8_data = np.frombuffer(data.tobytes(), dtype=np.uint8).reshape((nsamples, 1, bytes)) write(filename, fs, uint8_data, bitrate=bits)