Esempio n. 1
0
def separate(ckpt, mtgph, input_path, output_path):
    model = inference.SeparationModel(ckpt, mtgph)
    if not os.path.exists(input_path):
        raise Exception("Wrong input file {}".format(input_path))
    file_list = [input_path]
    from scipy.io.wavfile import read as read_wav
    from scipy.io.wavfile import write as write_wav
    sr, f = read_wav(file_list[0])

    with model.graph.as_default():
        dataset = data_io.wavs_to_dataset(file_list,
                                          batch_size=1,
                                          num_samples=len(f),
                                          repeat=False)
        # Strip batch and mic dimensions.
        dataset['receiver_audio'] = dataset['receiver_audio'][0, 0]
        dataset['source_images'] = dataset['source_images'][0, :, 0]

    waveforms = model.sess.run(dataset)
    separated_waveforms = model.separate(waveforms['receiver_audio'])
    # print(separated_waveforms)
    # print(separated_waveforms.shape)
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    for i in range(separated_waveforms.shape[0]):
        write_wav(output_path + '/sub_target{}.wav'.format(i), sr,
                  separated_waveforms[i, :])
Esempio n. 2
0
 def __call__(self, feats, hparams, name=None):
     linear_feats = self._logmel_to_linear(feats)
     samples = self._griffin_lim(linear_feats, hparams.gl_iters)
     #samples = samples / 32768
     if not os.path.exists(hparams.output_directory):
         os.makedirs(hparams.output_directory)
     output_path = os.path.join(hparams.output_directory,
                                '%s.wav' % str(name))
     write_wav(output_path, self.sample_rate,
               (samples * np.iinfo(np.int16).max).astype(np.int16))
Esempio n. 3
0
    def save_to_wav(self, file_name):
        """ Save this time series to a wav format audio file.

        Parameters
        ----------
        file_name : string
             The output file name
        """
        scaled = _numpy.int16(self.numpy()/max(abs(self)) * 32767)
        write_wav(file_name, self.sample_rate, scaled)
Esempio n. 4
0
    def save_to_wav(self, file_name):
        """ Save this time series to a wav format audio file.

        Parameters
        ----------
        file_name : string
             The output file name
        """
        scaled = _numpy.int16(self.numpy() / max(abs(self)) * 32767)
        write_wav(file_name, int(self.sample_rate), scaled)
Esempio n. 5
0
def wav2guitar_distortion(inFile,outFile=None, deepValue=0.5):
    """ преобразовать звук в файле таким образом, как будто он
    прошел через дисторшн примочку"""
    if outFile is None:
        outFile = changeFileExt(inFile,'_distortion.wav')

    fps,data_in = read_wav(inFile)
    Amax = int(deepValue*np.max(data_in))
    Amin = int(deepValue*np.min(data_in))
    data_out = np.clip(data_in,Amin,Amax) # clip sound amplitude
    write_wav(outFile,fps,data_out)
Esempio n. 6
0
    def save(self, path):
        """save Audio to .wav file using scipy.io.wavfile
        
        Args:
            self
            path: destination for wav file """

        if path.split(".")[-1] != "wav":
            raise ValueError("file extension must be .wav")

        from scipy.io.wavfile import write as write_wav

        write_wav(path, self.sample_rate, self.samples)
Esempio n. 7
0
    def handleConvolve(self):
        """Runs the convolution when the convolve button is clicked.
        """
        def _getFile(widget, name):
            filename = widget.text()
            if filename == '':
                raise Exception(('Cannot run convolution: must specify the {}'
                                 ).format(name))
            if not os.path.exists:
                raise Exception(
                    ('Cannot run convolution: path specified {} does not exist'
                     ).format(name))

            rate, data = read_wav(filename)
            data = pcm2float(data, 'float32')

            if len(data.shape) == 1:
                data = data.reshape((len(data), 1))

            return data, rate

        try:
            signal, rate = _getFile(self.signal_loc, 'signal')
            filter, _ = _getFile(self.filter_loc, 'filter')

            print('Signal has {} channel(s).'.format(signal.shape[1]))
            print('Filter has {} channel(s).'.format(signal.shape[1]))

            rs = None
            count = 0
            for i in range(signal.shape[1]):
                for j in range(filter.shape[1]):
                    convolution = float2pcm(
                        convolve(signal[:, i], filter[:, j]), 'int16')
                    convolution = convolution.reshape((len(convolution), 1))
                    if rs is None:
                        rs = convolution
                    else:
                        rs = np.append(rs, convolution, axis=1)
                    count += 1
            filename = self.saveFileDialog()
            if not filename:
                raise Exception('Error: Save file not specified')

            write_wav(filename, rate, convolution)
            self.save_loc = filename

        except Exception as e:
            self.output.setText(str(e))
Esempio n. 8
0
def save_sample(model_name, model, waveglow_path, tacotron2_path, phrase_path,
                filepath, sampling_rate, fp16):
    if phrase_path is None:
        return
    phrase = torch.load(phrase_path, map_location='cpu')
    if model_name == 'Tacotron2':
        if waveglow_path is None:
            raise Exception(
                "WaveGlow checkpoint path is missing, could not generate sample"
            )
        with torch.no_grad():
            checkpoint = torch.load(waveglow_path, map_location='cpu')
            waveglow = models.get_model('WaveGlow',
                                        checkpoint['config'],
                                        to_fp16=False,
                                        to_cuda=False)
            waveglow.eval()
            model.eval()
            mel = model.infer(phrase.cuda())[0].cpu()
            model.train()
            if fp16:
                mel = mel.float()
            audio = waveglow.infer(mel, sigma=0.6)
    elif model_name == 'WaveGlow':
        if tacotron2_path is None:
            raise Exception(
                "Tacotron2 checkpoint path is missing, could not generate sample"
            )
        with torch.no_grad():
            checkpoint = torch.load(tacotron2_path, map_location='cpu')
            tacotron2 = models.get_model('Tacotron2',
                                         checkpoint['config'],
                                         to_fp16=False,
                                         to_cuda=False)
            tacotron2.eval()
            mel = tacotron2.infer(phrase)[0].cuda()
            model.eval()
            if fp16:
                mel = mel.half()
            audio = model.infer(mel, sigma=0.6).cpu()
            model.train()
            if fp16:
                audio = audio.float()
    else:
        raise NotImplementedError(
            "unknown model requested: {}".format(model_name))
    audio = audio[0].numpy()
    audio = audio.astype('int16')
    write_wav(filepath, sampling_rate, audio)
Esempio n. 9
0
def write_wav_test(fileName):
    """ записать тест для wav файла"""
    fps = 44100 # cd quality
    time = np.linspace(0,1,fps) # время 1 секунда
    frame_raw = np.ndarray(shape=(fps),dtype = np.int16)
    Ampl = 1000
    notes_track = ['c','d','e','f','g','a','b']
    audio_frames = []
    for i in range(-2,2):
        for n in notes_track:
            f = note2freq(n,i)
            frame_raw[:] = Ampl*np.sin(2*np.pi*f*time)
            audio_frames.append(np.copy(frame_raw))

    write_wav(fileName,fps,np.block(audio_frames))
Esempio n. 10
0
def write_to_file(arr, ext, gzip=False):
        global file_index, record_name
        sys.stdout.write('start write to file ' + str(len(arr)) + ' values...')
        sys.stdout.flush()


        data_dir = 'data-temp/'
        # fileprefix = 'fio-disease-'
        fileprefix = record_name + '-'

        if not os.path.exists(data_dir):
            os.makedirs(data_dir)

        filename = data_dir + fileprefix + str(file_index) + '.' + ext


        if ext == 'dat':
            with open(filename, 'w') as f:
                arr.tofile(f)
        elif ext == 'txt':
            np.savetxt(filename, arr)
        elif ext == 'wav':
            rate = int(arr[1])
            arr = arr[2:] # del record_time and rate
            # scaled = np.int16(arr / np.max(np.abs(arr)) * 32767)
            # write_wav(filename, rate, scaled)
            write_wav(filename, rate, arr)
        else:
            print('wrong file extension')

        file_index += 1

        filesize = os.stat(filename).st_size
        print(" done (", filesize, ' bytes)', sep='')
        print(filename)
        if gzip:
            sys.stdout.write('gzip data compression: ' + str(filesize / 1000000) + 'MB...')
            sys.stdout.flush()

            with open(filename, 'rb') as f_in, gzip.open(filename + '.gz', 'wb') as f_out:
                shutil.copyfileobj(f_in, f_out)
            gzfilesize = os.stat(filename + '.gz').st_size
            print(' done. File reduced to ', gzfilesize / 1000000, 'MB (%0.0f' % (gzfilesize/filesize*100), '% of uncompressed)', sep='')
Esempio n. 11
0
def compare(
    model: keras.Model,
    generator: SoundGenerator,
    parameters: ParameterSet,
    orig_file: str,
    output_dir: str,
    orig_params,
    length: float,
    sample_rate: int,
    extra: dict = {},
):
    # (copy original file if given)
    base_filename = orig_file.replace(".wav", "")
    base_filename = re.sub(r".*/", "", base_filename)
    copy_file: str = f"{output_dir}/{base_filename}_copy.wav"
    regen_file: str = f"{output_dir}/{base_filename}_duplicate.wav"
    reconstruct_file: str = f"{output_dir}/{base_filename}_reconstruct.wav"
    print(f"Creating copy as {copy_file}")

    # Load the wave file
    fs, data = wavfile.read(orig_file)
    # Copy original file to make sure
    write_wav(copy_file, sample_rate, data)

    # Decode original params, and regenerate output (make sure its correct)
    orig = parameters.encoding_to_settings(orig_params)
    generator.generate(orig, regen_file, length, sample_rate, extra)

    # Run the wavefile into the model for prediction
    X = [data]
    Xd = np.expand_dims(np.vstack(X), axis=2)
    # Get encoded parameters out of model
    result = model.predict(Xd)[0]

    # Decode prediction, and reconstruct output
    predicted = parameters.encoding_to_settings(result)
    generator.generate(predicted, reconstruct_file, length, sample_rate, extra)
Esempio n. 12
0
                     batch_size=batch_size,
                     epochs=epochs,
                     callbacks=[tensorboard_callback])
        else:
            wgan.fit(train_ds, batch_size=batch_size, epochs=epochs)

        # Saving the models
        if args.save_model:
            os.mkdir('trained')
            generator.save_weights('trained/g.h5')
            discriminator.save_weights('trained/d.h5')

    # Generating a Sample
    if args.generate:
        if not os.path.exists('trained'):
            print('unable to generate samples. No trained model exists')
        else:
            print("Generating a random Sample of audio as 'output.wav'")
            if not args.train:
                generator = Generator()
                discriminator = Discrimiator()
                try:
                    generator.load_weights('trained/g.h5')
                    discriminator.load_weights('trained/d.h5')
                except:
                    print("An Error occured while loading the model")
            noise = tf.random.normal([1, 100])
            generated = generator(noise, training=False)
            data = generated[0, :, :, :].reshape(128 * 128 * 1)
            write_wav('output.wav', 16000, data)
Esempio n. 13
0
 def write_file(self, data: np.ndarray, filename: str, sample_rate: int):
     # REVIEW: is this needed?
     # int_data = (data * np.iinfo(np.int16).max).astype(int)
     write_wav(filename, sample_rate, data)
def split_ltts(do_loud: bool) -> None:
    """MAKEDOC: what is split_ltts doing?"""
    logg = logging.getLogger(f"c.{__name__}.split_ltts")
    # logg.setLevel("INFO")
    logg.debug("Start split_ltts")

    # the location of this file
    this_file_folder = Path(__file__).parent.absolute()
    logg.debug(f"this_file_folder: {this_file_folder}")

    ####################################################################################
    #   Build the dict of sentences without training words inside                      #
    ####################################################################################

    # the location of the original dataset
    ltts_base_folder = Path.home() / "audiodatasets" / "LibriTTS" / "dev-clean"
    logg.debug(f"ltts_base_folder: {ltts_base_folder}")

    # build good_IDs dict
    # from wav_ID to orig_wav_path
    good_IDs: ty.Dict[str, Path] = {}

    # get the words
    train_words = words_types["all"]
    # train_words = words_types["num"]

    # count good sentences
    good_sent = 0
    skip_sent = 0

    for reader_ID_path in ltts_base_folder.iterdir():
        for chapter_ID_path in reader_ID_path.iterdir():
            for file_path in chapter_ID_path.iterdir():

                # extract the name of the file
                file_name = file_path.name

                # only process normalized files
                if "normalized" not in file_name:
                    continue
                # logg.debug(f"file_path: {file_path}")

                # read the normalized transcription
                norm_tra = file_path.read_text()

                # skip this row if one of the training words is in the sentence
                found_train_word = False
                for word in train_words:
                    if word in norm_tra:
                        # logg.debug(f"Found '{word}' in {norm_tra}")
                        found_train_word = True
                        skip_sent += 1
                        break
                if found_train_word:
                    continue
                good_sent += 1

                # build the wav path
                #    file_path /path/to/file/wav_ID.normalized.txt
                #    with stem extract wav_ID.normalized
                #    then remove .normalized
                wav_ID = file_path.stem[:-11]
                # logg.debug(f"wav_ID: {wav_ID}")
                orig_wav_path = chapter_ID_path / f"{wav_ID}.wav"

                # save the file path
                good_IDs[wav_ID] = orig_wav_path

    logg.debug(f"good_sent: {good_sent}")
    logg.debug(f"skip_sent: {skip_sent}")

    ####################################################################################
    #   Create the samples                                                             #
    ####################################################################################

    # where to save the results
    ltts_proc_folder = this_file_folder / "data_raw"

    if do_loud:
        loud_tag = "_loud"
    else:
        loud_tag = ""

    ltts_label = f"_other_ltts{loud_tag}"
    ltts_label_folder = ltts_proc_folder / ltts_label

    logg.debug(f"ltts_label_folder: {ltts_label_folder}")
    if not ltts_label_folder.exists():
        ltts_label_folder.mkdir(parents=True, exist_ok=True)

    # the file to list the file names for the testing fold
    val_list_path = ltts_proc_folder / f"validation_list_lttspeech{loud_tag}.txt"
    word_val_list: ty.List[str] = []
    test_list_path = ltts_proc_folder / f"testing_list_lttspeech{loud_tag}.txt"
    word_test_list: ty.List[str] = []

    # the sizes of the test and validation folds
    val_fold_size = 0.1
    test_fold_size = 0.1

    # the target sr
    new_sr = 16000

    # how many samples to generate
    max_num_samples = 5000
    saved_samples = 0

    # a random number generator to use
    rng = np.random.default_rng(12345)

    for wav_ID in good_IDs:
        orig_wav_path = good_IDs[wav_ID]
        logg.debug(f"good_IDs[{wav_ID}]: {good_IDs[wav_ID]}")

        # load the original signal
        orig_sig, orig_sr = librosa.load(orig_wav_path, sr=None)

        # resample it to 16000 Hz
        new_sig = librosa.resample(orig_sig, orig_sr, new_sr)

        # how long is the sample to extract
        if do_loud:
            sample_len = new_sr // 2
        else:
            sample_len = new_sr

        # split it in 1 second samples
        len_new_sig = new_sig.shape[0]
        num_samples = len_new_sig // sample_len

        sample_wav_template = f"ltts_{wav_ID}_{{:03d}}.wav"

        for i_sample in range(num_samples):

            # cut the sample
            sample_sig = new_sig[i_sample * sample_len : (i_sample + 1) * sample_len]

            # get the name of the wav file
            sample_name = sample_wav_template.format(i_sample)

            # save the sample
            sample_path = ltts_label_folder / sample_name
            write_wav(sample_path, new_sr, sample_sig)
            saved_samples += 1

            # the ID of the sample
            sample_id = f"{ltts_label}/{sample_name}"

            # split in trainin/validation/testing
            x = rng.random()
            if x < test_fold_size:
                word_test_list.append(sample_id)
            elif x < test_fold_size + val_fold_size:
                word_val_list.append(sample_id)

        if saved_samples > max_num_samples:
            break

    # save the IDs
    word_val_str = "\n".join(word_val_list)
    val_list_path.write_text(word_val_str)
    word_test_str = "\n".join(word_test_list)
    test_list_path.write_text(word_test_str)
Esempio n. 15
0
 def save_wav(self, wav, path):
     wav = wav * 32767 / max(0.01, np.max(np.abs(wav)))
     write_wav(path, self.hparams.sample_rate, wav.astype(np.int16))
Esempio n. 16
0
def split_ljspeech() -> None:
    """MAKEDOC: what is split_ljspeech doing?"""
    logg = logging.getLogger(f"c.{__name__}.split_ljspeech")
    # logg.setLevel("INFO")
    logg.debug("Start split_ljspeech")

    # the location of this file
    this_file_folder = Path(__file__).parent.absolute()
    logg.debug(f"this_file_folder: {this_file_folder}")

    ####################################################################################
    #   Find the list of sentences without training words inside                       #
    ####################################################################################

    # the location of the original dataset
    ljs_base_folder = Path("~").expanduser() / "audiodatasets" / "LJSpeech-1.1"
    logg.debug(f"ljs_base_folder: {ljs_base_folder}")
    ljs_wav_folder = ljs_base_folder / "wavs"
    logg.debug(f"ljs_wav_folder: {ljs_wav_folder}")

    # column names in the metadata file
    column_names = ["wav_ID", "tra", "norm_tra"]

    # load the metadata file
    metadata_path = ljs_base_folder / "metadata.csv"
    meta_df = pandas.read_csv(metadata_path,
                              sep="|",
                              header=0,
                              names=column_names,
                              index_col=False)

    # shuffle it
    meta_df = meta_df.sample(frac=1)

    # remove NaN
    meta_df = meta_df.dropna()

    # show some rows to be sure
    logg.debug(f"meta_df.head():\n{meta_df.head()}")

    # get the words
    train_words = words_types["all"]
    # train_words = words_types["num"]

    # count good sentences
    good_sent = 0
    skip_sent = 0

    # find good sentences
    good_IDs: ty.List[str] = []
    for index_row, row in meta_df.iterrows():
        norm_tra = row["norm_tra"]

        # skip this row if one of the training words is in the sentence
        found_train_word = False
        for word in train_words:
            if word in norm_tra:
                # logg.debug(f"Found '{word}' in {norm_tra}")
                found_train_word = True
                skip_sent += 1
                break
        if found_train_word:
            continue
        good_sent += 1

        # save the file ID
        good_IDs.append(row["wav_ID"])

    logg.debug(f"good_sent: {good_sent}")
    logg.debug(f"skip_sent: {skip_sent}")

    ####################################################################################
    #   Create the samples                                                             #
    ####################################################################################

    # where to save the results
    # ljs_proc_folder = this_file_folder / "data_ljs_raw"
    ljs_proc_folder = this_file_folder / "data_raw"

    ljs_label = "_other_ljs"
    ljs_label_folder = ljs_proc_folder / ljs_label

    logg.debug(f"ljs_label_folder: {ljs_label_folder}")
    if not ljs_label_folder.exists():
        ljs_label_folder.mkdir(parents=True, exist_ok=True)

    # the file to list the file names for the testing fold
    val_list_path = ljs_proc_folder / "validation_list_ljspeech.txt"
    word_val_list: ty.List[str] = []
    test_list_path = ljs_proc_folder / "testing_list_ljspeech.txt"
    word_test_list: ty.List[str] = []

    # the sizes of the test and validation folds
    val_fold_size = 0.1
    test_fold_size = 0.1

    # the target sr
    new_sr = 16000

    # how many samples to generate
    max_num_samples = 5000
    saved_samples = 0

    # a random number generator to use
    rng = np.random.default_rng(12345)

    for wav_ID in good_IDs[:]:
        wav_name = f"{wav_ID}.wav"

        # load the original signal
        orig_wav_path = ljs_wav_folder / wav_name
        orig_sig, orig_sr = librosa.load(orig_wav_path, sr=None)
        # logg.debug(f"orig_sig.shape: {orig_sig.shape} orig_sr: {orig_sr}")

        # resample it to 16000 Hz
        new_sig = librosa.resample(orig_sig, orig_sr, new_sr)
        # logg.debug(f"new_sig.shape: {new_sig.shape}")

        # split it in 1 second samples
        len_new_sig = new_sig.shape[0]
        num_samples = len_new_sig // new_sr

        sample_wav_template = f"ljs_{wav_ID}_{{:03d}}.wav"
        for i_sample in range(num_samples):

            # cut the sample
            sample_sig = new_sig[i_sample * new_sr:(i_sample + 1) * new_sr]

            # get the name of the wav file
            sample_name = sample_wav_template.format(i_sample)

            # save the sample
            sample_path = ljs_label_folder / sample_name
            write_wav(sample_path, new_sr, sample_sig)
            saved_samples += 1

            # the ID of the sample
            sample_id = f"{ljs_label}/{sample_name}"

            # split in trainin/validation/testing
            x = rng.random()
            if x < test_fold_size:
                word_test_list.append(sample_id)
            elif x < test_fold_size + val_fold_size:
                word_val_list.append(sample_id)

        if saved_samples > max_num_samples:
            break

    # save the IDs
    word_val_str = "\n".join(word_val_list)
    val_list_path.write_text(word_val_str)
    word_test_str = "\n".join(word_test_list)
    test_list_path.write_text(word_test_str)
                                    velocities=NSYNTH_VELOCITIES, 
                                    transpose=float(args.transpose)
                                )
        synth.preload_notes(instrument=instrument['name'], source_type=instrument['source_type'])
        
        instrument_folder = instrument['name']+'_'+instrument['source_type']
        init_directory(os.path.join(args.audios_path, instrument_folder))
    
        for mid in midifiles:
            _, seq_name = os.path.split(mid)
            output_name = os.path.join(args.audios_path, instrument_folder, os.path.splitext(seq_name)[0]+'.wav')

            print("Instrumento: \t", instrument_folder)
            print("Secuencia: \t", mid)
            print("Salida: \t", output_name, '\n')

            if(not os.path.isfile(output_name)):
                audio, _ = synth.render_sequence(
                                                    sequence=str(mid),
                                                    instrument=instrument['name'],
                                                    source_type=instrument['source_type'],
                                                    preset=instrument['preset'],
                                                    playback_speed=float(args.playback_speed),
                                                    duration_scale=float(args.duration_rate),
                                                )

                if(DEFAULT_SAMPLING_RATE != NSYNTH_SAMPLE_RATE):
                    audio = librosa.core.resample(audio, NSYNTH_SAMPLE_RATE, DEFAULT_SAMPLING_RATE)
                # write_audio(output_name, audio, DEFAULT_SAMPLING_RATE)
                write_wav(output_name, DEFAULT_SAMPLING_RATE, np.array(32000.*audio, np.short))
Esempio n. 18
0
def apiquery():
    """api interface"""
    msg = str(json.loads(request.data).get('text', ''))
    wav = tts.generate(msg)
    write_wav('tmp.wav', hp.sr, wav)
    return send_file('tmp.wav', attachment_filename='tmp.wav')
 def save_wav(self, out_path, data):
     write_wav(out_path, self.rate, data)
Esempio n. 20
0
def preprocess_fsdd() -> None:
    """MAKEDOC: what is preprocess_fsdd doing?

    Get the dataset with:
    git clone https://github.com/Jakobovski/free-spoken-digit-dataset.git ~/free_spoken_digit_dataset
    """
    logg = logging.getLogger(f"c.{__name__}.preprocess_fsdd")
    # logg.setLevel("INFO")
    logg.debug("Start preprocess_fsdd")

    # the location of the original dataset
    fsdd_base_folder = Path("~").expanduser().absolute()
    fsdd_rec_folder = fsdd_base_folder / "free_spoken_digit_dataset" / "recordings"
    logg.debug(f"fsdd_rec_folder: {fsdd_rec_folder}")

    # the location of this file
    this_file_folder = Path(__file__).parent.absolute()
    logg.debug(f"this_file_folder: {this_file_folder}")

    # where to save the results
    # fsdd_proc_folder = this_file_folder / "data_fsdd_raw"
    fsdd_proc_folder = this_file_folder / "data_raw"
    logg.debug(f"fsdd_proc_folder: {fsdd_proc_folder}")
    if not fsdd_proc_folder.exists():
        fsdd_proc_folder.mkdir(parents=True, exist_ok=True)

    # the file to list the file names for the testing fold
    test_list_path = fsdd_proc_folder / "testing_list_fsdd.txt"
    word_test_list = []

    num2name = {
        0: "fsdd_zero",
        1: "fsdd_one",
        2: "fsdd_two",
        3: "fsdd_three",
        4: "fsdd_four",
        5: "fsdd_five",
        6: "fsdd_six",
        7: "fsdd_seven",
        8: "fsdd_eight",
        9: "fsdd_nine",
    }

    for num, name in num2name.items():
        name_folder = fsdd_proc_folder / name
        if not name_folder.exists():
            name_folder.mkdir(parents=True, exist_ok=True)

    # a random number generator to use
    rng = np.random.default_rng(12345)

    # the target sr
    new_sr = 16000

    for wav_path in tqdm(fsdd_rec_folder.iterdir()):
        # logg.debug(f"wav_path: {wav_path}")

        wav_name = wav_path.name
        # logg.debug(f"wav_name: {wav_name}")

        orig_sig, orig_sr = librosa.load(wav_path, sr=None)
        # logg.debug(f"orig_sig.shape: {orig_sig.shape} orig_sr: {orig_sr}")

        new_sig = librosa.resample(orig_sig, orig_sr, new_sr)
        # logg.debug(f"new_sig.shape: {new_sig.shape}")

        padded_sig = pad_signal(new_sig, new_sr)
        # logg.debug(f"padded_sig.shape: {padded_sig.shape}")

        # if there is space to move the signal around, roll it a bit
        max_roll = (new_sr - new_sig.shape[0] - 50) // 2
        if max_roll > 50:
            sample_shift = rng.integers(-max_roll, max_roll)
            rolled_sig = np.roll(padded_sig, sample_shift)
        else:
            rolled_sig = padded_sig

        # the label of this word
        word_label = num2name[int(wav_name[0])]

        # save the processed file
        name_folder = fsdd_proc_folder / word_label
        new_path = name_folder / wav_name
        write_wav(new_path, new_sr, rolled_sig)

        # create the id for this word
        word_id = f"{word_label}/{wav_name}"
        word_test_list.append(word_id)

    # save the IDs
    word_test_str = "\n".join(word_test_list)
    test_list_path.write_text(word_test_str)
        ap.add_argument('--preload', required=False, default=True, help="Load all notes in memory before rendering for better performance (at least 1 GB of RAM is required)")
    args = vars(ap.parse_args())

    assert os.path.isdir(args['db']), 'Dataset not found in ' + args['db']
    assert os.path.isfile(args['seq']), 'File ' + args['seq'] + ' not found.'

    synth = NoteSynthesizer(
                                args['db'], 
                                sr=NSYNTH_SAMPLE_RATE, 
                                velocities=NSYNTH_VELOCITIES, 
                                preload=args['preload']
                            )    
    if(args['preload']):
        synth.preload_notes(args['instrument'], args['source_type'], int(args['preset']))

    y, _ = synth.render_sequence(
                                    sequence=args['seq'], 
                                    instrument=args['instrument'], 
                                    source_type=args['source_type'], 
                                    preset=int(args['preset']),
                                    transpose=int(args['transpose']),
                                    playback_speed=float(args['playback_speed']),
                                    duration_scale=float(args['duration_scale'])
                                )

    if(int(args['sr']) != NSYNTH_SAMPLE_RATE):
        y = librosa.core.resample(y, NSYNTH_SAMPLE_RATE, int(args['sr']))
    
    print("Saving audio output to", args['output'])
    write_wav(args['output'], int(args['sr']), np.array(32000.*y, np.short))
Esempio n. 22
0
x, y = generate_sine_wave(2)
plt.plot(x, y)
plt.show()

# Mezclando las señales de audio, superponiendolas
_, nice_tone = generate_sine_wave(400, SAMPLE_RATE, DURATION)
_, noise_tone = generate_sine_wave(4000, SAMPLE_RATE, DURATION)
noise_tone = noise_tone * 0.3  # atenuar 30%
mixed_tone = nice_tone + noise_tone
normalized_tone = np.int16((mixed_tone / mixed_tone.max()) * 32767)
plt.plot(normalized_tone[:1000])
plt.show()

# Guardando las muestras generadas en un archivo de audio
wav_path = os.path.join(BASE_DIR, 'signals_mixed.wav')
write_wav(wav_path, SAMPLE_RATE, normalized_tone)

# Usando FFT
# Number of samples in normalized_tone
N = SAMPLE_RATE * DURATION
yf = fft(normalized_tone)
xf = fftfreq(N, 1 / SAMPLE_RATE)
plt.plot(xf, np.abs(yf))
plt.xlabel('Frecuencia')
plt.title('Resultado FFT, reflejo en y=0')
print('FFT con reflejo en y=0')
plt.show()

# Volviendo la FFT mas rapida calculado solo la mitad positiva
# Note the extra 'r' at the front
yf = rfft(normalized_tone)
Esempio n. 23
0
x_limited = x[:muestras_grafica]
y_limited = y[:muestras_grafica]
plt.plot(x_limited, y_limited)
plt.xlabel('Tiempo')
plt.ylabel('Amplitud')
plt.title('Señal pura con frecuencia f')
plt.show()
# Tono Deseado
tone = y

# Generando ruido
_, noise1 = generate_sine_wave(NOISE, FS, DURATION)
mixed_tone = tone + noise1
normalized =  np.int16((mixed_tone / mixed_tone.max()) * 32767)
print('Generando audio con ruido')
write_wav(audio_filename, FS, normalized)
plt.plot(normalized[:muestras_grafica])
plt.xlabel('Tiempo')
plt.ylabel('Amplitud')
plt.title('Señal Tono + Ruido')
plt.show()

# FFT
# Note the extra 'r' at the front
N = FS * DURATION
yf = rfft(normalized)
xf = rfftfreq(N, 1 / FS)
plt.plot(xf, np.abs(yf))
plt.xlabel('Frecuencia')
plt.title('Cuadrante positivo')
plt.show()