def __call__(self, inputs, samplerate, output_filename, input_filenames, to_save=('image', 'noise'), save_as_one_file=False): print(output_filename) for i, f in enumerate(input_filenames): if i == 0: print('\t= {}'.format(f)) else: print('\t+ {}'.format(f)) # 0 source if len(inputs) == 0: overlap_len = 0 sir = float('inf') nsamples = int(np.random.uniform(self._min_sillen, self._max_mixlen) * samplerate) x = np.zeros((2, nsamples)) # 1 source elif len(inputs) == 1: overlap_len = 0 sir = float('inf') x = np.stack([inputs[0], np.zeros(inputs[0].shape)]) # 2+ sources else: # Randomly determine the overlap length. overlap_len = np.random.uniform(self._min_overlap, min(len(inputs[0]),len(inputs[1]))/samplerate) maxspeechsamples = int(samplerate * (self._max_mixlen + overlap_len)) len0 = len(inputs[0]) len1 = len(inputs[1]) if len0 + len1 > maxspeechsamples: x = [] x.append(inputs[0][:int(maxspeechsamples * (len0 / (len0 + len1)))]) x.append(inputs[1][:int(maxspeechsamples * (len1 / (len0 + len1)))]) else: x = copy.deepcopy(inputs[:2]) # Ensure the overlap length is shorter than the source signals. overlap_len = min(overlap_len, len(x[0])/samplerate, len(x[1])/samplerate) target_len = len(x[0]) + len(x[1]) - int(overlap_len * samplerate) x = np.stack([np.pad(x[0], mode='constant', pad_width=(0, target_len - len(x[0]))), np.pad(x[1], mode='constant', pad_width=(target_len - len(x[1]), 0))]) # Randomly determine the SIR. # Note that the SIR is measured for the whole utterances instead of the overlapped segment. # This is because the overlapped segment may contain only non-speech (silence/noise). x[1], sir = libaueffect.signals.scale_noise_to_random_snr(x[1], x[0], self._min_sir, self._max_sir) # Truncate long signals. x = x[:, :min(x.shape[1], int(samplerate * self._max_mixlen))] target_len = x.shape[1] # Filter and mix the signals. target_amp = np.random.uniform(self._min_amplitude, self._max_amplitude) h, h_info = self._room_simulator(nspeakers=2, info_as_display_style=True) z, y, h = libaueffect.reverb_mix(x, h, sample_rate=samplerate, cancel_delay=self._no_delay, second_arg_is_filename=False) # Generage noise. if self._noise_generator is not None: n = self._noise_generator(nsamples=target_len) if len(inputs) > 0: n, snr = libaueffect.signals.scale_noise_to_random_snr(n, z, self._min_snr, self._max_snr) else: snr = float('-inf') # Add the noise and normalize the resultant signal. u = z + n else: u = np.copy(z) # Normalize the generated signal. max_amplitude = np.amax(np.absolute(u)) scale = (32767/32768) / max_amplitude * target_amp u *= scale y *= scale n *= scale for i in range(len(h)): h[i] *= scale # description of the mixing process params = [('mixer', self.__class__.__name__), ('implementation', __name__), ('sir', sir), ('amplitude', target_amp), ('overlap length in seconds', overlap_len)] params += h_info if self._noise_generator is not None: params.append( ('snr', snr) ) path, ext = os.path.splitext(output_filename) # Save the reverberant source signals. if 'image' in to_save: for i in range(len(y)): outfile = '{}_s{}{}'.format(path, i, ext) libaueffect.write_wav(y[i], outfile, sample_rate=samplerate, avoid_clipping=False, save_as_one_file=save_as_one_file) params.append(('source{}'.format(i), outfile)) # Save the noise. if 'noise' in to_save and self._noise_generator is not None: outfile = '{}_s{}{}'.format(path, len(y), ext) libaueffect.write_wav(n, outfile, sample_rate=samplerate, avoid_clipping=False, save_as_one_file=save_as_one_file) params.append(('noise', outfile)) # Save the RIRs. if 'rir' in to_save: for i in range(len(h)): outfile = '{}_r{}{}'.format(path, i, ext) libaueffect.write_wav(h[i], outfile, sample_rate=samplerate, avoid_clipping=False, save_as_one_file=save_as_one_file) params.append(('rir{}'.format(i), outfile)) # Save the anechoic source signals. if 'source' in to_save: path, ext = os.path.splitext(output_filename) for i in range(len(x)): outfile = '{}_a{}{}'.format(path, i, ext) libaueffect.write_wav(x[i], outfile, sample_rate=samplerate, avoid_clipping=False, save_as_one_file=save_as_one_file) params.append(('anechoic{}'.format(i), outfile)) return u, OrderedDict(params)
def __call__(self, inputs, samplerate, output_filename, input_filenames, to_save=('image', 'noise'), save_as_one_file=False): print(output_filename) for i, f in enumerate(input_filenames): if i == 0: print('\t= {}'.format(f)) else: print('\t+ {}'.format(f)) if len(inputs) == 0: sir = float('inf') nsamples = int( np.random.uniform(self._min_sillen, self._max_mixlen) * samplerate) x = np.zeros((2, nsamples)) else: # Pad zeros to the end of each signal so that they have the same length. overlap_len = min([len(x) for x in inputs]) target_len = max([len(x) for x in inputs]) x = [ np.pad(y, mode='constant', pad_width=(0, target_len - len(y))) for y in inputs ] # Generate anechoic source signals. if len(x) == 1: sir = float('inf') x = np.stack([x[0], np.zeros(x[0].shape)]) else: x = x[:2] x[1], sir = libaueffect.signals.scale_noise_to_random_snr( x[1], x[0], self._min_sir, self._max_sir, valid_segment=(0, overlap_len)) x = np.stack(x) # Truncate long signals. x = x[:, :min(x.shape[1], int(samplerate * self._max_mixlen))] target_len = x.shape[1] # Filter and mix the signals. target_amp = np.random.uniform(self._min_amplitude, self._max_amplitude) h, h_info = self._room_simulator(nspeakers=2, info_as_display_style=True) z, y, h = libaueffect.reverb_mix(x, h, sample_rate=samplerate, cancel_delay=self._no_delay, second_arg_is_filename=False) # Generage noise. if self._noise_generator is not None: n = self._noise_generator(nsamples=target_len) if len(inputs) > 0: n, snr = libaueffect.signals.scale_noise_to_random_snr( n, z, self._min_snr, self._max_snr) else: snr = float('-inf') # Add the noise and normalize the resultant signal. u = z + n else: u = np.copy(z) # Normalize the generated signal. max_amplitude = np.amax(np.absolute(u)) scale = (32767 / 32768) / max_amplitude * target_amp u *= scale y *= scale n *= scale for i in range(len(h)): h[i] *= scale # description of the mixing process params = [('mixer', self.__class__.__name__), ('implementation', __name__), ('sir', sir), ('amplitude', target_amp)] params += h_info if self._noise_generator is not None: params.append(('snr', snr)) path, ext = os.path.splitext(output_filename) # Save the reverberant source signals. if 'image' in to_save: for i in range(len(y)): outfile = '{}_s{}{}'.format(path, i, ext) libaueffect.write_wav(y[i], outfile, sample_rate=samplerate, avoid_clipping=False, save_as_one_file=save_as_one_file) params.append(('source{}'.format(i), outfile)) # Save the noise. if 'noise' in to_save and self._noise_generator is not None: outfile = '{}_s{}{}'.format(path, len(y), ext) libaueffect.write_wav(n, outfile, sample_rate=samplerate, avoid_clipping=False, save_as_one_file=save_as_one_file) params.append(('noise', outfile)) # Save the RIRs. if 'rir' in to_save: for i in range(len(h)): outfile = '{}_r{}{}'.format(path, i, ext) libaueffect.write_wav(h[i], outfile, sample_rate=samplerate, avoid_clipping=False, save_as_one_file=save_as_one_file) params.append(('rir{}'.format(i), outfile)) # Save the anechoic source signals. if 'source' in to_save: path, ext = os.path.splitext(output_filename) for i in range(len(x)): outfile = '{}_a{}{}'.format(path, i, ext) libaueffect.write_wav(x[i], outfile, sample_rate=samplerate, avoid_clipping=False, save_as_one_file=save_as_one_file) params.append(('anechoic{}'.format(i), outfile)) return u, OrderedDict(params)
def main(args): # Make the results predictable. if args.random_seed is not None: random.seed(args.random_seed) np.random.seed(args.random_seed) # Read in the IO list. with open(args.iolist) as f: iolist = json.load(f) # Instatiate the mixers. mixers, priors = mixer_array = libaueffect.create_AudioMixerArray( args.mixers_configfile) nmixers = len(mixers) # Create the output directories. os.makedirs(os.path.dirname(os.path.abspath(args.outlist)), exist_ok=True) os.makedirs(os.path.dirname(os.path.abspath(args.log)), exist_ok=True) if args.save_image: to_return = ('image', 'noise') else: to_return = ('source', 'rir', 'noise') with open(args.log, 'w') as log_stream: print('[', file=log_stream) with open(args.outlist, 'w') as outfile_stream: # Process each audio file. for i, iofiles in enumerate(iolist): print('[{}/{} ({:.3f}%)]'.format(i + 1, len(iolist), i / len(iolist))) infiles = [ os.path.abspath(f['path']) for f in iofiles['inputs'] ] offsets = [ int(args.sample_rate * f['offset']) for f in iofiles['inputs'] ] spkr_labs = [f['speaker_id'] for f in iofiles['inputs']] outfile = os.path.abspath(iofiles['output']) # Load each input signal. x = [] for f in infiles: try: _x, sr = libaueffect.read_wav( f, sample_rate=args.sample_rate, channel=0) except RuntimeError: print('Wav file is broken, skipped: {}'.format(f)) continue if args.cancel_dcoffset: _x -= np.mean(_x) x.append(_x) sr = args.sample_rate # Mix the signals. mixer = mixers[np.random.choice(nmixers, p=priors)] y, p, interm = mixer(x, offsets, spkr_labs, to_return=to_return) # Save the output signal. print(os.path.abspath(outfile), file=outfile_stream) libaueffect.write_wav( y, outfile, sample_rate=sr, avoid_clipping=False, save_as_one_file=(not args.save_each_channel_in_onefile)) # Save the intermediate signals. for dt in interm.values(): for key in dt: filename = f"{os.path.splitext(outfile)[0]}_{key}.wav" libaueffect.write_wav( dt[key], filename, avoid_clipping=False, save_as_one_file=( not args.save_each_channel_in_onefile)) input_info = [{ 'path': os.path.abspath(f['path']), 'speaker_id': f['speaker_id'], 'offset': f['offset'], 'length_in_seconds': f['length_in_seconds'] } for f in iofiles['inputs']] params = OrderedDict([('output', outfile), ('inputs', input_info)] + list(p.items())) json.dump(params, log_stream, indent=4) # Print the list element separator. if iofiles == iolist[-1]: print('', file=log_stream) else: print(',', file=log_stream) # end of the list. print(']', file=log_stream) return 0
def __call__(self, inputs, samplerate, output_filename, input_filenames, to_save=('image', 'noise'), save_as_one_file=False): print(output_filename) for i, f in enumerate(input_filenames): if i == 0: print('\t= {}'.format(f)) else: print('\t+ {}'.format(f)) # 0 source if len(inputs) == 0: overlap_len = 0 sir = float('inf') overlap_start = 0 nsamples = int( np.random.uniform(self._min_sillen, self._max_mixlen) * samplerate) x = np.zeros((2, nsamples)) # 1 source elif len(inputs) == 1: overlap_len = 0 sir = float('inf') overlap_start = 0 x = np.stack([inputs[0], np.zeros(inputs[0].shape)]) # 2 sources else: if len(inputs[0]) > int(self._max_mixlen * samplerate): y = inputs[0][:int(self._max_mixlen * samplerate)] else: y = inputs[0] # Randomly determine the interfering signal length. min_overlap = int(self._min_overlap) * samplerate max_overlap = min(int(len(y) * self._max_overlap), len(inputs[1])) if max_overlap > min_overlap: overlap_samples = np.random.randint(min_overlap, max_overlap) else: overlap_samples = max_overlap overlap_len = overlap_samples / samplerate overlap_begin = np.random.randint(0, len(y) - overlap_samples) overlap_end = overlap_begin + overlap_samples overlap_start = overlap_begin / samplerate z = np.zeros(y.shape) z[overlap_begin:overlap_end] = inputs[1][:overlap_samples] # Stack the signals. x = np.stack([y, z]) # Randomly determine the SIR. x[1], sir = libaueffect.signals.scale_noise_to_random_snr( x[1], x[0], self._min_sir, self._max_sir, valid_segment=(overlap_begin, overlap_end)) # Truncate long signals. x = x[:, :min(x.shape[1], int(samplerate * self._max_mixlen))] target_len = x.shape[1] # Filter and mix the signals. target_amp = np.random.uniform(self._min_amplitude, self._max_amplitude) h, h_info = self._room_simulator(nspeakers=2, info_as_display_style=True) z, y, h = libaueffect.reverb_mix(x, h, sample_rate=samplerate, cancel_delay=self._no_delay, second_arg_is_filename=False) # Generage noise. if self._noise_generator is not None: n = self._noise_generator(nsamples=target_len) if len(inputs) > 0: n, snr = libaueffect.signals.scale_noise_to_random_snr( n, z, self._min_snr, self._max_snr) else: snr = float('-inf') # Add the noise and normalize the resultant signal. u = z + n else: u = np.copy(z) # Normalize the generated signal. max_amplitude = np.amax(np.absolute(u)) scale = (32767 / 32768) / max_amplitude * target_amp u *= scale y *= scale n *= scale for i in range(len(h)): h[i] *= scale # description of the mixing process params = [('mixer', self.__class__.__name__), ('implementation', __name__), ('sir', sir), ('amplitude', target_amp), ('overlap start', overlap_start), ('overlap length in seconds', overlap_len)] params += h_info if self._noise_generator is not None: params.append(('snr', snr)) path, ext = os.path.splitext(output_filename) # Save the reverberant source signals. if 'image' in to_save: for i in range(len(y)): outfile = '{}_s{}{}'.format(path, i, ext) libaueffect.write_wav(y[i], outfile, sample_rate=samplerate, avoid_clipping=False, save_as_one_file=save_as_one_file) params.append(('source{}'.format(i), outfile)) # Save the noise. if 'noise' in to_save and self._noise_generator is not None: outfile = '{}_s{}{}'.format(path, len(y), ext) libaueffect.write_wav(n, outfile, sample_rate=samplerate, avoid_clipping=False, save_as_one_file=save_as_one_file) params.append(('noise', outfile)) # Save the RIRs. if 'rir' in to_save: for i in range(len(h)): outfile = '{}_r{}{}'.format(path, i, ext) libaueffect.write_wav(h[i], outfile, sample_rate=samplerate, avoid_clipping=False, save_as_one_file=save_as_one_file) params.append(('rir{}'.format(i), outfile)) # Save the anechoic source signals. if 'source' in to_save: path, ext = os.path.splitext(output_filename) for i in range(len(x)): outfile = '{}_a{}{}'.format(path, i, ext) libaueffect.write_wav(x[i], outfile, sample_rate=samplerate, avoid_clipping=False, save_as_one_file=save_as_one_file) params.append(('anechoic{}'.format(i), outfile)) return u, OrderedDict(params)
def main(args): # Make the results predictable. if args.random_seed is not None: random.seed(args.random_seed) np.random.seed(args.random_seed) # Read in the IO list. with open(args.iolist) as f: iolist = json.load(f) # Instatiate the mixers. mixers, priors = mixer_array = libaueffect.create_AudioMixerArray( args.mixers_configfile) nmixers = len(mixers) # Create the output directories. os.makedirs(os.path.dirname(os.path.abspath(args.outlist)), exist_ok=True) os.makedirs(os.path.dirname(os.path.abspath(args.log)), exist_ok=True) if args.save_image: to_save = ('image', 'noise') else: to_save = ('source', 'rir', 'noise') with open(args.log, 'w') as log_stream: print('[', file=log_stream) with open(args.outlist, 'w') as outfile_stream: # Process each audio file. for i, iofiles in enumerate(iolist): print('[{}/{} ({:.3f}%)]'.format(i + 1, len(iolist), i / len(iolist))) infiles = [os.path.abspath(f) for f in iofiles['inputs']] outfile = os.path.abspath(iofiles['output']) # Load each input signal. x = [] for f in infiles: try: _x, sr = libaueffect.read_wav( f, sample_rate=args.sample_rate, channel=0) except RuntimeError: print('Wav file is broken, skipped: {}'.format(f)) continue if args.cancel_dcoffset: _x -= np.mean(_x) x.append(_x) sr = args.sample_rate # Choose the mixer to use. mixer = mixers[np.random.choice(nmixers, p=priors)] y, p = mixer( x, sr, output_filename=outfile, input_filenames=infiles, to_save=to_save, save_as_one_file=(not args.save_each_channel_in_onefile)) print(os.path.abspath(outfile), file=outfile_stream) libaueffect.write_wav( y, outfile, sample_rate=sr, avoid_clipping=False, save_as_one_file=(not args.save_each_channel_in_onefile)) params = OrderedDict([('output', outfile), ('inputs', infiles)] + list(p.items())) json.dump(params, log_stream, indent=4) # Print the list element separator. if iofiles == iolist[-1]: print('', file=log_stream) else: print(',', file=log_stream) # end of the list. print(']', file=log_stream)