Esempio n. 1
0
    def __call__(self, inputs, samplerate, output_filename, input_filenames, to_save=('image', 'noise'), save_as_one_file=False):
        print(output_filename)
        for i, f in enumerate(input_filenames):
            if i == 0:
                print('\t= {}'.format(f))
            else:
                print('\t+ {}'.format(f))

        # 0 source
        if len(inputs) == 0:
            overlap_len = 0
            sir = float('inf')
            nsamples = int(np.random.uniform(self._min_sillen, self._max_mixlen) * samplerate)
            x = np.zeros((2, nsamples))

        # 1 source
        elif len(inputs) == 1:
            overlap_len = 0           
            sir = float('inf')
            x = np.stack([inputs[0], np.zeros(inputs[0].shape)])

        # 2+ sources
        else:
            # Randomly determine the overlap length. 
            overlap_len = np.random.uniform(self._min_overlap, min(len(inputs[0]),len(inputs[1]))/samplerate)
            maxspeechsamples = int(samplerate * (self._max_mixlen + overlap_len))

            len0 = len(inputs[0])
            len1 = len(inputs[1])           

            if len0 + len1 > maxspeechsamples:
                x = []
                x.append(inputs[0][:int(maxspeechsamples * (len0 / (len0 + len1)))])
                x.append(inputs[1][:int(maxspeechsamples * (len1 / (len0 + len1)))])
            else:
                x = copy.deepcopy(inputs[:2])

            # Ensure the overlap length is shorter than the source signals. 
            overlap_len = min(overlap_len, len(x[0])/samplerate, len(x[1])/samplerate)

            target_len = len(x[0]) + len(x[1]) - int(overlap_len * samplerate)
            x = np.stack([np.pad(x[0], mode='constant', pad_width=(0, target_len - len(x[0]))), 
                          np.pad(x[1], mode='constant', pad_width=(target_len - len(x[1]), 0))])

            # Randomly determine the SIR. 
            # Note that the SIR is measured for the whole utterances instead of the overlapped segment. 
            # This is because the overlapped segment may contain only non-speech (silence/noise). 
            x[1], sir = libaueffect.signals.scale_noise_to_random_snr(x[1], x[0], self._min_sir, self._max_sir)
        
        # Truncate long signals. 
        x = x[:, :min(x.shape[1], int(samplerate * self._max_mixlen))]
        target_len = x.shape[1]

        # Filter and mix the signals. 
        target_amp = np.random.uniform(self._min_amplitude, self._max_amplitude)
        h, h_info = self._room_simulator(nspeakers=2, info_as_display_style=True)
        z, y, h = libaueffect.reverb_mix(x, h, sample_rate=samplerate, cancel_delay=self._no_delay, second_arg_is_filename=False)

        # Generage noise. 
        if self._noise_generator is not None:
            n = self._noise_generator(nsamples=target_len)
            if len(inputs) > 0:
                n, snr = libaueffect.signals.scale_noise_to_random_snr(n, z, self._min_snr, self._max_snr)
            else:
                snr = float('-inf')

            # Add the noise and normalize the resultant signal. 
            u = z + n

        else:
            u = np.copy(z)

        # Normalize the generated signal. 
        max_amplitude = np.amax(np.absolute(u))
        scale = (32767/32768) / max_amplitude * target_amp

        u *= scale
        y *= scale
        n *= scale
        for i in range(len(h)):
            h[i] *= scale

        # description of the mixing process
        params = [('mixer', self.__class__.__name__),
                  ('implementation', __name__), 
                  ('sir', sir), 
                  ('amplitude', target_amp),
                  ('overlap length in seconds', overlap_len)]
        params += h_info
        if self._noise_generator is not None:
            params.append( ('snr', snr) )

        path, ext = os.path.splitext(output_filename)        

        # Save the reverberant source signals. 
        if 'image' in to_save:
            for i in range(len(y)):
                outfile = '{}_s{}{}'.format(path, i, ext)            
                libaueffect.write_wav(y[i], outfile, sample_rate=samplerate, avoid_clipping=False, save_as_one_file=save_as_one_file)
                params.append(('source{}'.format(i), outfile))

        # Save the noise. 
        if 'noise' in to_save and self._noise_generator is not None:
            outfile = '{}_s{}{}'.format(path, len(y), ext)            
            libaueffect.write_wav(n, outfile, sample_rate=samplerate, avoid_clipping=False, save_as_one_file=save_as_one_file)
            params.append(('noise', outfile))

        # Save the RIRs.
        if 'rir' in to_save: 
            for i in range(len(h)):
                outfile = '{}_r{}{}'.format(path, i, ext)            
                libaueffect.write_wav(h[i], outfile, sample_rate=samplerate, avoid_clipping=False, save_as_one_file=save_as_one_file)
                params.append(('rir{}'.format(i), outfile))

        # Save the anechoic source signals. 
        if 'source' in to_save:
            path, ext = os.path.splitext(output_filename)        
            for i in range(len(x)):
                outfile = '{}_a{}{}'.format(path, i, ext)            
                libaueffect.write_wav(x[i], outfile, sample_rate=samplerate, avoid_clipping=False, save_as_one_file=save_as_one_file)
                params.append(('anechoic{}'.format(i), outfile))

        return u, OrderedDict(params)
    def __call__(self,
                 inputs,
                 samplerate,
                 output_filename,
                 input_filenames,
                 to_save=('image', 'noise'),
                 save_as_one_file=False):
        print(output_filename)
        for i, f in enumerate(input_filenames):
            if i == 0:
                print('\t= {}'.format(f))
            else:
                print('\t+ {}'.format(f))

        if len(inputs) == 0:
            sir = float('inf')
            nsamples = int(
                np.random.uniform(self._min_sillen, self._max_mixlen) *
                samplerate)
            x = np.zeros((2, nsamples))

        else:
            # Pad zeros to the end of each signal so that they have the same length.
            overlap_len = min([len(x) for x in inputs])
            target_len = max([len(x) for x in inputs])
            x = [
                np.pad(y, mode='constant', pad_width=(0, target_len - len(y)))
                for y in inputs
            ]

            # Generate anechoic source signals.
            if len(x) == 1:
                sir = float('inf')
                x = np.stack([x[0], np.zeros(x[0].shape)])
            else:
                x = x[:2]
                x[1], sir = libaueffect.signals.scale_noise_to_random_snr(
                    x[1],
                    x[0],
                    self._min_sir,
                    self._max_sir,
                    valid_segment=(0, overlap_len))
                x = np.stack(x)

        # Truncate long signals.
        x = x[:, :min(x.shape[1], int(samplerate * self._max_mixlen))]
        target_len = x.shape[1]

        # Filter and mix the signals.
        target_amp = np.random.uniform(self._min_amplitude,
                                       self._max_amplitude)
        h, h_info = self._room_simulator(nspeakers=2,
                                         info_as_display_style=True)
        z, y, h = libaueffect.reverb_mix(x,
                                         h,
                                         sample_rate=samplerate,
                                         cancel_delay=self._no_delay,
                                         second_arg_is_filename=False)

        # Generage noise.
        if self._noise_generator is not None:
            n = self._noise_generator(nsamples=target_len)
            if len(inputs) > 0:
                n, snr = libaueffect.signals.scale_noise_to_random_snr(
                    n, z, self._min_snr, self._max_snr)
            else:
                snr = float('-inf')

            # Add the noise and normalize the resultant signal.
            u = z + n

        else:
            u = np.copy(z)

        # Normalize the generated signal.
        max_amplitude = np.amax(np.absolute(u))
        scale = (32767 / 32768) / max_amplitude * target_amp

        u *= scale
        y *= scale
        n *= scale
        for i in range(len(h)):
            h[i] *= scale

        # description of the mixing process
        params = [('mixer', self.__class__.__name__),
                  ('implementation', __name__), ('sir', sir),
                  ('amplitude', target_amp)]
        params += h_info
        if self._noise_generator is not None:
            params.append(('snr', snr))

        path, ext = os.path.splitext(output_filename)

        # Save the reverberant source signals.
        if 'image' in to_save:
            for i in range(len(y)):
                outfile = '{}_s{}{}'.format(path, i, ext)
                libaueffect.write_wav(y[i],
                                      outfile,
                                      sample_rate=samplerate,
                                      avoid_clipping=False,
                                      save_as_one_file=save_as_one_file)
                params.append(('source{}'.format(i), outfile))

        # Save the noise.
        if 'noise' in to_save and self._noise_generator is not None:
            outfile = '{}_s{}{}'.format(path, len(y), ext)
            libaueffect.write_wav(n,
                                  outfile,
                                  sample_rate=samplerate,
                                  avoid_clipping=False,
                                  save_as_one_file=save_as_one_file)
            params.append(('noise', outfile))

        # Save the RIRs.
        if 'rir' in to_save:
            for i in range(len(h)):
                outfile = '{}_r{}{}'.format(path, i, ext)
                libaueffect.write_wav(h[i],
                                      outfile,
                                      sample_rate=samplerate,
                                      avoid_clipping=False,
                                      save_as_one_file=save_as_one_file)
                params.append(('rir{}'.format(i), outfile))

        # Save the anechoic source signals.
        if 'source' in to_save:
            path, ext = os.path.splitext(output_filename)
            for i in range(len(x)):
                outfile = '{}_a{}{}'.format(path, i, ext)
                libaueffect.write_wav(x[i],
                                      outfile,
                                      sample_rate=samplerate,
                                      avoid_clipping=False,
                                      save_as_one_file=save_as_one_file)
                params.append(('anechoic{}'.format(i), outfile))

        return u, OrderedDict(params)
def main(args):
    # Make the results predictable.
    if args.random_seed is not None:
        random.seed(args.random_seed)
        np.random.seed(args.random_seed)

    # Read in the IO list.
    with open(args.iolist) as f:
        iolist = json.load(f)

    # Instatiate the mixers.
    mixers, priors = mixer_array = libaueffect.create_AudioMixerArray(
        args.mixers_configfile)
    nmixers = len(mixers)

    # Create the output directories.
    os.makedirs(os.path.dirname(os.path.abspath(args.outlist)), exist_ok=True)
    os.makedirs(os.path.dirname(os.path.abspath(args.log)), exist_ok=True)

    if args.save_image:
        to_return = ('image', 'noise')
    else:
        to_return = ('source', 'rir', 'noise')

    with open(args.log, 'w') as log_stream:
        print('[', file=log_stream)

        with open(args.outlist, 'w') as outfile_stream:
            # Process each audio file.
            for i, iofiles in enumerate(iolist):
                print('[{}/{} ({:.3f}%)]'.format(i + 1, len(iolist),
                                                 i / len(iolist)))

                infiles = [
                    os.path.abspath(f['path']) for f in iofiles['inputs']
                ]
                offsets = [
                    int(args.sample_rate * f['offset'])
                    for f in iofiles['inputs']
                ]
                spkr_labs = [f['speaker_id'] for f in iofiles['inputs']]
                outfile = os.path.abspath(iofiles['output'])

                # Load each input signal.
                x = []
                for f in infiles:
                    try:
                        _x, sr = libaueffect.read_wav(
                            f, sample_rate=args.sample_rate, channel=0)
                    except RuntimeError:
                        print('Wav file is broken, skipped: {}'.format(f))
                        continue

                    if args.cancel_dcoffset:
                        _x -= np.mean(_x)
                    x.append(_x)
                sr = args.sample_rate

                # Mix the signals.
                mixer = mixers[np.random.choice(nmixers, p=priors)]
                y, p, interm = mixer(x,
                                     offsets,
                                     spkr_labs,
                                     to_return=to_return)

                # Save the output signal.
                print(os.path.abspath(outfile), file=outfile_stream)
                libaueffect.write_wav(
                    y,
                    outfile,
                    sample_rate=sr,
                    avoid_clipping=False,
                    save_as_one_file=(not args.save_each_channel_in_onefile))

                # Save the intermediate signals.
                for dt in interm.values():
                    for key in dt:
                        filename = f"{os.path.splitext(outfile)[0]}_{key}.wav"
                        libaueffect.write_wav(
                            dt[key],
                            filename,
                            avoid_clipping=False,
                            save_as_one_file=(
                                not args.save_each_channel_in_onefile))

                input_info = [{
                    'path': os.path.abspath(f['path']),
                    'speaker_id': f['speaker_id'],
                    'offset': f['offset'],
                    'length_in_seconds': f['length_in_seconds']
                } for f in iofiles['inputs']]
                params = OrderedDict([('output',
                                       outfile), ('inputs', input_info)] +
                                     list(p.items()))
                json.dump(params, log_stream, indent=4)

                # Print the list element separator.
                if iofiles == iolist[-1]:
                    print('', file=log_stream)
                else:
                    print(',', file=log_stream)

            # end of the list.
            print(']', file=log_stream)

    return 0
Esempio n. 4
0
    def __call__(self,
                 inputs,
                 samplerate,
                 output_filename,
                 input_filenames,
                 to_save=('image', 'noise'),
                 save_as_one_file=False):
        print(output_filename)
        for i, f in enumerate(input_filenames):
            if i == 0:
                print('\t= {}'.format(f))
            else:
                print('\t+ {}'.format(f))

        # 0 source
        if len(inputs) == 0:
            overlap_len = 0
            sir = float('inf')
            overlap_start = 0
            nsamples = int(
                np.random.uniform(self._min_sillen, self._max_mixlen) *
                samplerate)
            x = np.zeros((2, nsamples))

        # 1 source
        elif len(inputs) == 1:
            overlap_len = 0
            sir = float('inf')
            overlap_start = 0
            x = np.stack([inputs[0], np.zeros(inputs[0].shape)])

        # 2 sources
        else:
            if len(inputs[0]) > int(self._max_mixlen * samplerate):
                y = inputs[0][:int(self._max_mixlen * samplerate)]
            else:
                y = inputs[0]

            # Randomly determine the interfering signal length.
            min_overlap = int(self._min_overlap) * samplerate
            max_overlap = min(int(len(y) * self._max_overlap), len(inputs[1]))
            if max_overlap > min_overlap:
                overlap_samples = np.random.randint(min_overlap, max_overlap)
            else:
                overlap_samples = max_overlap
            overlap_len = overlap_samples / samplerate

            overlap_begin = np.random.randint(0, len(y) - overlap_samples)
            overlap_end = overlap_begin + overlap_samples
            overlap_start = overlap_begin / samplerate

            z = np.zeros(y.shape)
            z[overlap_begin:overlap_end] = inputs[1][:overlap_samples]

            # Stack the signals.
            x = np.stack([y, z])

            # Randomly determine the SIR.
            x[1], sir = libaueffect.signals.scale_noise_to_random_snr(
                x[1],
                x[0],
                self._min_sir,
                self._max_sir,
                valid_segment=(overlap_begin, overlap_end))

        # Truncate long signals.
        x = x[:, :min(x.shape[1], int(samplerate * self._max_mixlen))]
        target_len = x.shape[1]

        # Filter and mix the signals.
        target_amp = np.random.uniform(self._min_amplitude,
                                       self._max_amplitude)
        h, h_info = self._room_simulator(nspeakers=2,
                                         info_as_display_style=True)
        z, y, h = libaueffect.reverb_mix(x,
                                         h,
                                         sample_rate=samplerate,
                                         cancel_delay=self._no_delay,
                                         second_arg_is_filename=False)

        # Generage noise.
        if self._noise_generator is not None:
            n = self._noise_generator(nsamples=target_len)
            if len(inputs) > 0:
                n, snr = libaueffect.signals.scale_noise_to_random_snr(
                    n, z, self._min_snr, self._max_snr)
            else:
                snr = float('-inf')

            # Add the noise and normalize the resultant signal.
            u = z + n

        else:
            u = np.copy(z)

        # Normalize the generated signal.
        max_amplitude = np.amax(np.absolute(u))
        scale = (32767 / 32768) / max_amplitude * target_amp

        u *= scale
        y *= scale
        n *= scale
        for i in range(len(h)):
            h[i] *= scale

        # description of the mixing process
        params = [('mixer', self.__class__.__name__),
                  ('implementation', __name__), ('sir', sir),
                  ('amplitude', target_amp), ('overlap start', overlap_start),
                  ('overlap length in seconds', overlap_len)]
        params += h_info
        if self._noise_generator is not None:
            params.append(('snr', snr))

        path, ext = os.path.splitext(output_filename)

        # Save the reverberant source signals.
        if 'image' in to_save:
            for i in range(len(y)):
                outfile = '{}_s{}{}'.format(path, i, ext)
                libaueffect.write_wav(y[i],
                                      outfile,
                                      sample_rate=samplerate,
                                      avoid_clipping=False,
                                      save_as_one_file=save_as_one_file)
                params.append(('source{}'.format(i), outfile))

        # Save the noise.
        if 'noise' in to_save and self._noise_generator is not None:
            outfile = '{}_s{}{}'.format(path, len(y), ext)
            libaueffect.write_wav(n,
                                  outfile,
                                  sample_rate=samplerate,
                                  avoid_clipping=False,
                                  save_as_one_file=save_as_one_file)
            params.append(('noise', outfile))

        # Save the RIRs.
        if 'rir' in to_save:
            for i in range(len(h)):
                outfile = '{}_r{}{}'.format(path, i, ext)
                libaueffect.write_wav(h[i],
                                      outfile,
                                      sample_rate=samplerate,
                                      avoid_clipping=False,
                                      save_as_one_file=save_as_one_file)
                params.append(('rir{}'.format(i), outfile))

        # Save the anechoic source signals.
        if 'source' in to_save:
            path, ext = os.path.splitext(output_filename)
            for i in range(len(x)):
                outfile = '{}_a{}{}'.format(path, i, ext)
                libaueffect.write_wav(x[i],
                                      outfile,
                                      sample_rate=samplerate,
                                      avoid_clipping=False,
                                      save_as_one_file=save_as_one_file)
                params.append(('anechoic{}'.format(i), outfile))

        return u, OrderedDict(params)
def main(args):
    # Make the results predictable.
    if args.random_seed is not None:
        random.seed(args.random_seed)
        np.random.seed(args.random_seed)

    # Read in the IO list.
    with open(args.iolist) as f:
        iolist = json.load(f)

    # Instatiate the mixers.
    mixers, priors = mixer_array = libaueffect.create_AudioMixerArray(
        args.mixers_configfile)
    nmixers = len(mixers)

    # Create the output directories.
    os.makedirs(os.path.dirname(os.path.abspath(args.outlist)), exist_ok=True)
    os.makedirs(os.path.dirname(os.path.abspath(args.log)), exist_ok=True)

    if args.save_image:
        to_save = ('image', 'noise')
    else:
        to_save = ('source', 'rir', 'noise')

    with open(args.log, 'w') as log_stream:
        print('[', file=log_stream)

        with open(args.outlist, 'w') as outfile_stream:
            # Process each audio file.
            for i, iofiles in enumerate(iolist):
                print('[{}/{} ({:.3f}%)]'.format(i + 1, len(iolist),
                                                 i / len(iolist)))

                infiles = [os.path.abspath(f) for f in iofiles['inputs']]
                outfile = os.path.abspath(iofiles['output'])

                # Load each input signal.
                x = []
                for f in infiles:
                    try:
                        _x, sr = libaueffect.read_wav(
                            f, sample_rate=args.sample_rate, channel=0)
                    except RuntimeError:
                        print('Wav file is broken, skipped: {}'.format(f))
                        continue

                    if args.cancel_dcoffset:
                        _x -= np.mean(_x)
                    x.append(_x)
                sr = args.sample_rate

                # Choose the mixer to use.
                mixer = mixers[np.random.choice(nmixers, p=priors)]

                y, p = mixer(
                    x,
                    sr,
                    output_filename=outfile,
                    input_filenames=infiles,
                    to_save=to_save,
                    save_as_one_file=(not args.save_each_channel_in_onefile))

                print(os.path.abspath(outfile), file=outfile_stream)
                libaueffect.write_wav(
                    y,
                    outfile,
                    sample_rate=sr,
                    avoid_clipping=False,
                    save_as_one_file=(not args.save_each_channel_in_onefile))

                params = OrderedDict([('output', outfile), ('inputs',
                                                            infiles)] +
                                     list(p.items()))
                json.dump(params, log_stream, indent=4)

                # Print the list element separator.
                if iofiles == iolist[-1]:
                    print('', file=log_stream)
                else:
                    print(',', file=log_stream)

            # end of the list.
            print(']', file=log_stream)