コード例 #1
0
def mir_1k_data_generator(train):
    for wav in glob.glob('Dataset/MIR-1K/Wavfile/*.wav'):
        filename = os.path.split(wav)[1]
        if (filename.startswith('abjones')
                or filename.startswith('amy')) == train:
            origin_source, origin_sr = librosa.load(
                wav, sr=None,
                mono=False)  # TODO 这里的采样率,该文中用了8k,实际上原音频是16k,能不能不重采样
            resample_source = librosa.resample(origin_source, origin_sr, 8000)
            mixed_source_origin = librosa.to_mono(resample_source)
            left_resample_origin = resample_source[0]
            right_resample_origin = resample_source[1]
            # print(np.min(left_source), np.min(right_source), np.min(mixed_source)) TODO 为什么声压有负的?
            mixed_source_magnitude_spectrum = np.abs(
                to_spectrum(mixed_source_origin))
            left_source_magnitude_spectrum = np.abs(
                to_spectrum(np.asfortranarray(left_resample_origin))
            )  # 以前是不需要做这一步的来让flags['F_CONTIGUOUS']=True的
            right_source_magnitude_spectrum = np.abs(
                to_spectrum(np.asfortranarray(right_resample_origin)))

            # 归一化 TODO 可以试试不做归一化会怎么样
            max_value = np.max(mixed_source_magnitude_spectrum)
            mixed_source_magnitude_spectrum = mixed_source_magnitude_spectrum / max_value
            left_source_magnitude_spectrum = left_source_magnitude_spectrum / max_value
            right_source_magnitude_spectrum = right_source_magnitude_spectrum / max_value
            # TODO 原文只用了幅度来做,能不能把相位也加进来
            mixed_spec_phase = np.angle(to_spectrum(mixed_source_origin))

            yield origin_source[0, :], origin_source[1, :], librosa.to_mono(
                origin_source
            ), left_source_magnitude_spectrum, right_source_magnitude_spectrum, mixed_source_magnitude_spectrum, max_value, mixed_spec_phase
コード例 #2
0
ファイル: test_effects.py プロジェクト: lostanlen/librosa
    def __test(y, top_db, ref, trim_duration):
        yt, idx = librosa.effects.trim(y, top_db=top_db,
                                       ref=ref)

        # Test for index position
        fidx = [slice(None)] * y.ndim
        fidx[-1] = slice(*idx.tolist())
        assert np.allclose(yt, y[tuple(fidx)])

        # Verify logamp
        rms = librosa.feature.rmse(y=librosa.to_mono(yt), center=False)
        logamp = librosa.power_to_db(rms**2, ref=ref, top_db=None)
        assert np.all(logamp > - top_db)

        # Verify logamp
        rms_all = librosa.feature.rmse(y=librosa.to_mono(y)).squeeze()
        logamp_all = librosa.power_to_db(rms_all**2, ref=ref,
                                         top_db=None)

        start = int(librosa.samples_to_frames(idx[0]))
        stop = int(librosa.samples_to_frames(idx[1]))
        assert np.all(logamp_all[:start] <= - top_db)
        assert np.all(logamp_all[stop:] <= - top_db)

        # Verify duration
        duration = librosa.get_duration(yt)
        assert np.allclose(duration, trim_duration, atol=1e-1), duration
コード例 #3
0
    def __test(y, top_db, ref, trim_duration):
        yt, idx = librosa.effects.trim(y, top_db=top_db,
                                       ref=ref)

        # Test for index position
        fidx = [slice(None)] * y.ndim
        fidx[-1] = slice(*idx.tolist())
        assert np.allclose(yt, y[tuple(fidx)])

        # Verify logamp
        rms = librosa.feature.rms(y=librosa.to_mono(yt), center=False)
        logamp = librosa.power_to_db(rms**2, ref=ref, top_db=None)
        assert np.all(logamp > - top_db)

        # Verify logamp
        rms_all = librosa.feature.rms(y=librosa.to_mono(y)).squeeze()
        logamp_all = librosa.power_to_db(rms_all**2, ref=ref,
                                         top_db=None)

        start = int(librosa.samples_to_frames(idx[0]))
        stop = int(librosa.samples_to_frames(idx[1]))
        assert np.all(logamp_all[:start] <= - top_db)
        assert np.all(logamp_all[stop:] <= - top_db)

        # Verify duration
        duration = librosa.get_duration(yt)
        assert np.allclose(duration, trim_duration, atol=1e-1), duration
コード例 #4
0
 def __init__(self, track, part_size=25):
     self.stft = librosa.stft(librosa.to_mono(track))
     padding = np.zeros((self.stft.shape[0], part_size // 2))
     self.stftPadded = np.abs(
         np.concatenate((padding, librosa.stft(
             librosa.to_mono(track)), padding, padding),
                        axis=1))
     self.size = part_size
コード例 #5
0
 def visualize_wavs(self, wavs, sr):
     for med in wavs:
         mixed, src1, src2 = med
         plt.subplot(311)
         librosa.display.waveplot(librosa.to_mono(src1[:10 * sr]), sr)
         plt.subplot(312)
         librosa.display.waveplot(librosa.to_mono(src2[:10 * sr]), sr)
         plt.subplot(313)
         librosa.display.waveplot(librosa.to_mono(mixed[:10 * sr]), sr)
         plt.show()
         input("Visualization done.")
コード例 #6
0
def make_spectrograms(data, test):
	"""
	Generates all of the spectrograms from the musdb18 dataset.
	:return: None
	"""
	if data=="train":
		mus_data = musdb.DB(root="data/musdb18", subsets="train")
	else:
		mus_data = musdb.DB(root="data/musdb18", subsets="test")
	

	dictionary = {}
	dictionary["mix"] = []
	dictionary["vocals"] = []
	dictionary["instrumental"] = []

	# Creating the spectogram arrays from the training data
	num_tracks = len(mus_data)
	percent = 0.1

	for i, track in enumerate(mus_data):

		if i / num_tracks > percent:
			print(int(100 * percent), "%", "of " + data +  " data generated")
			percent += 0.1

		# Converting samples to target rate of 22050
		original_sr = track.rate
		target_sr = 22050
		mix_data = librosa.resample(librosa.to_mono(track.audio.T), orig_sr=original_sr, target_sr=target_sr, res_type='kaiser_best', fix=True, scale=False)
		vocal_data = librosa.resample(librosa.to_mono(track.targets['vocals'].audio.T), orig_sr=original_sr, target_sr=target_sr, res_type='kaiser_best', fix=True, scale=False)
		instrumental_data = librosa.resample(librosa.to_mono(track.targets['accompaniment'].audio.T), orig_sr=original_sr, target_sr=target_sr, res_type='kaiser_best', fix=True, scale=False)

		# Length of frame; 66150 should be 3 seconds (appears as 6 seconds on graph)
		len_frame = target_sr*3
		num_frames = int(len(mix_data)/len_frame)

		# Saving each frame as a spectrogram array (and putting track in mix folders and vocals in vocals folder)
		for frame in range(num_frames):
			
			dictionary["mix"].append(generate_spectrogram_array(mix_data[frame * len_frame : frame * len_frame + len_frame]))
			dictionary["vocals"].append(generate_spectrogram_array(vocal_data[frame * len_frame : frame * len_frame + len_frame]))
			dictionary["instrumental"].append(generate_spectrogram_array(instrumental_data[frame * len_frame : frame * len_frame + len_frame]))

			if test:
				pickle.dump(dictionary, open( "data/spectrograms/" + data + "-1", "wb" ))
				make_spectrogram_image(mix_data[frame * len_frame : frame * len_frame + len_frame],"test-image")
				return

	# pickle dictionary here
	pickle.dump(dictionary, open( "data/spectrograms/" + data, "wb" ))
	return
コード例 #7
0
def calculate_SDR(music, model, n_fft=2048, hop_length=512, slice_duration=2):
    model.eval()
    scores = []
    sr = music.rate
    ind = 0
    mixture = librosa.to_mono(music.audio.transpose())
    vocal = librosa.to_mono(music.targets['vocals'].audio.transpose())
    for i in range(0, len(music.audio), slice_duration * sr):
        ind += 1
        mixture = mixture[i:i + slice_duration * sr]
        vocal = vocal[i:i + slice_duration * sr]

        if np.all(vocal == 0):
            # print('[!] -  all 0s, skipping')
            continue

        if i + 2 * sr >= len(music.audio):
            break
        resampled_mixture = mixture
        mixture_stft = librosa.stft(resampled_mixture,
                                    n_fft=n_fft,
                                    hop_length=512,
                                    window='hann',
                                    center=True)
        magnitude_mixture_stft, mixture_phase = librosa.magphase(mixture_stft)
        normalized_magnitude_mixture_stft = torch.Tensor(Normalize().forward(
            [magnitude_mixture_stft])[0])

        sr_v = music.rate
        with torch.no_grad():
            mask = model.forward(
                normalized_magnitude_mixture_stft.unsqueeze(0)).squeeze(0)
            out = mask * torch.Tensor(normalized_magnitude_mixture_stft)
        predicted_vocal_stft = out.numpy() * mixture_phase
        predicted_vocal_audio = librosa.istft(predicted_vocal_stft.squeeze(0),
                                              win_length=n_fft,
                                              hop_length=hop_length,
                                              window='hann',
                                              center='True')
        try:
            scores.append(
                mir_eval.separation.bss_eval_sources(
                    vocal[:predicted_vocal_audio.shape[0]],
                    predicted_vocal_audio)[0])
        except ValueError:
            print(vocal.all() == 0)
            print(predicted_vocal_stft.all() == 0)
            print('Error but skipping')
コード例 #8
0
def process(directory, sources, target_sr, save_only_mono=False):
    for track_i, track in enumerate(sources):
        original_sr = track.rate

        mix = librosa.core.resample(track.audio.T, original_sr, target_sr)
        drums = librosa.core.resample(track.targets['drums'].audio.T,
                                      original_sr, target_sr)
        bass = librosa.core.resample(track.targets['bass'].audio.T,
                                     original_sr, target_sr)
        other = librosa.core.resample(track.targets['other'].audio.T,
                                      original_sr, target_sr)
        vocal = librosa.core.resample(track.targets['vocals'].audio.T,
                                      original_sr, target_sr)
        acc = librosa.core.resample(track.targets['accompaniment'].audio.T,
                                    original_sr, target_sr)

        stereo = [mix, drums, bass, other, vocal, acc]
        length = min([t.shape[1] for t in stereo])
        if length <= 1: continue

        left = np.array([t[0, :length] for t in stereo])
        right = np.array([t[1, :length] for t in stereo])
        mono = np.array([librosa.to_mono(t[:, :length]) for t in stereo])

        if save_only_mono:
            together = mono
        else:
            together = np.array([left, right, mono])

        if not os.path.exists(directory):
            os.makedirs(directory)
        np.savez_compressed(f'{directory}/{track_i:04d}',
                            together.astype('float32'))

        print(f"Track: {track_i}, sampling rate: {target_sr}")
コード例 #9
0
def get_audio(path,
              shape=None,
              mean=[0, 0, 0],
              std=[1, 1, 1],
              sample_len=16000,
              streams=1,
              sample_rate=1,
              extension='.wav',
              start=0,
              stop=None,
              channels=1,
              **kwargs):
    audio, fs = libr.load(path, sr=sample_len)
    if len(audio.shape) > channels:
        audio = libr.to_mono(audio)
    if len(audio) < sample_len:
        pad = np.zeros(sample_len - len(audio))
        audio = np.append(audio, pad)
    batches = torch.from_numpy(audio).split(sample_len)
    if not len(audio) % sample_len == 0:
        audio = batches[:-1]
    else:
        audio = batches
    audio = torch.stack(audio)
    return audio
コード例 #10
0
    def _decode_non_mp3_file_like(self, file, format=None):
        try:
            import librosa
            import soundfile as sf
        except ImportError as err:
            raise ImportError(
                "To support decoding audio files, please install 'librosa' and 'soundfile'."
            ) from err

        if format == "opus":
            if version.parse(
                    sf.__libsndfile_version__) < version.parse("1.0.30"):
                raise RuntimeError(
                    "Decoding .opus files requires 'libsndfile'>=1.0.30, " +
                    "it can be installed via conda: `conda install -c conda-forge libsndfile>=1.0.30`"
                )
        array, sampling_rate = sf.read(file)
        array = array.T
        if self.mono:
            array = librosa.to_mono(array)
        if self.sampling_rate and self.sampling_rate != sampling_rate:
            array = librosa.resample(array,
                                     sampling_rate,
                                     self.sampling_rate,
                                     res_type="kaiser_best")
            sampling_rate = self.sampling_rate
        return array, sampling_rate
コード例 #11
0
ファイル: deformation.py プロジェクト: ruohoruotsi/amen
def pitch_shift(audio, steps, step_size=12):
    """
    Wraps librosa's `pitch_shift` function, and returns a new Audio object.
    Note that this folds to mono.

    Parameters
    ---------
    audio : Audio
        The Audio object to act on.

    steps : float
        The pitch shift amount.
        The default unit is semitones, as set by `step_size`.

    step_size : float > 0
        The number of equal-tempered steps per octave.
        The default is semitones, as set by `step_size=12`.
        Quarter-tones, for example, would be `step_size=24`.
    """
    shifted = librosa.effects.pitch_shift(
        librosa.to_mono(audio.raw_samples),
        audio.sample_rate,
        steps,
        bins_per_octave=step_size,
    )
    stretched_audio = Audio(raw_samples=shifted, sample_rate=audio.sample_rate)

    return stretched_audio
コード例 #12
0
def resamplig(file):
    SAMPLE_RATE = 16000
    y, sr = librosa.load(file)
    os.remove(file)
    data = librosa.resample(y, sr, SAMPLE_RATE)
    data = librosa.to_mono(data)
    librosa.output.write_wav(file, data, SAMPLE_RATE)
コード例 #13
0
def file_to_input(filename, srate=44100):

    try:
        y, sr = librosa.load(filename, sr=None)
    except:
        raise IOError('Give me an audio  file which I can read!!')

    if len(y.shape) > 1:
        print('Mono Conversion')
        y = librosa.to_mono(y)

    if sr != srate:
        print('Resampling to {}'.format(srate))
        y = librosa.resample(y, sr, srate)

    mel_feat = librosa.feature.melspectrogram(y=y,
                                              sr=srate,
                                              n_fft=n_fft,
                                              hop_length=hop_length,
                                              n_mels=128)
    inpt = librosa.power_to_db(mel_feat).T

    # input needs to be 4D, batch_size X 1 X inpt_size[0] X inpt_size[1]
    inpt = np.reshape(inpt, (1, 1, inpt.shape[0], inpt.shape[1]))
    return inpt
コード例 #14
0
def main():
    args = parse_args()
    filepath = Path(args.file).resolve()
    if not filepath.exists():
        raise FileNotFoundError()

    loop = AudioLoop(path=filepath,
                     hop_length=args.hop,
                     estimated_bpm=args.tempo,
                     align_beats_to_start=not args.no_align)

    if args.num_beats is not None:
        print(f"Using given num_beats to compute the tempo and beat times")
        samples_per_beat = loop.samples / args.num_beats
        loop.beat_samples = np.rint(
            np.linspace(0, samples_per_beat * (args.num_beats - 1),
                        args.num_beats))
        loop.beat_frames = np.rint(loop.beat_samples / args.hop)

    print(f"tempo: {loop.tempo}")

    click_track = librosa.core.clicks(frames=loop.beat_frames,
                                      sr=loop.sample_rate,
                                      length=loop.samples)
    sd.play(librosa.to_mono(loop.audio.T) + click_track)

    val = input("save audio loop? (y/n)")

    sd.stop()
    if val == 'y':
        loop.save(f"{filepath.stem}_LOOP.pkl")
        print(f"Saved to {filepath.stem}_LOOP.pkl")

    print("Goodbye!")
コード例 #15
0
def read_audios(file):
    print(splitext(basename(file))[0])

    # read spl values
    for root, dirs, files in os.walk(levels_path):
        for name in files:
            name

    spl = np.zeros(64)

    # read spl-data and write to file name
    for i in range(len(files)):
        if splitext(basename(file))[0] == splitext(files[i])[0]:
            spl = np.loadtxt(levels_path + files[i], delimiter=";")

    y, sr = librosa.load(file, sr=None, mono=False)

    y_mono = librosa.to_mono(y)

    # Save as .wav
    for i in range(int(900 * sr //
                       (sr * frame_length))):  # for first 900 sec = 15 min
        yx = y_mono[int(i * sr * frame_length):int(sr * frame_length *
                                                   (i + 1))]
        soundfile.write(
            splitted_wav + file.split('\\')[-1].split('.webm')[0] + ' _ ' +
            str(i + 1) + ' _ ' + str(spl[i]) + ' _ ' + 'dBA' + '.wav', yx, sr,
            'PCM_16')
コード例 #16
0
ファイル: resample.py プロジェクト: jam1garner/discord-forge
def resample(in_file, out_file):
    target_rate = 48000
    audio_data, audio_sample_rate = soundfile.read(in_file, dtype='float32')
    resampled_data = librosa.resample(librosa.to_mono(audio_data.T),
                                      audio_sample_rate, target_rate)
    audio_data = resampled_data.T
    soundfile.write(in_file, audio_data, target_rate, subtype='PCM_16')
コード例 #17
0
def read_audio_file(file_path,
                    target_sample_rate,
                    duration=None,
                    samples=None):
    """Read audio samples from a file. If duration/samples argument is specified audio
    is padded or clipped to match the value.
    """
    try:
        y, sample_rate = soundfile.read(file_path)
        y = librosa.to_mono(y.T)
        y = librosa.resample(y, sample_rate, target_sample_rate)
    except:
        logging.error('Failed to read audio from "{}"'.format(file_path))
        raise

    if duration or samples:
        if duration:
            total_samples = int(target_sample_rate * duration)
        elif samples:
            total_samples = samples
        if len(y) < total_samples:
            # Pad audio files smaller than duration with silence
            pad = total_samples - len(y)
            y = np.concatenate((y, np.zeros(pad)))
        elif len(y) > total_samples:
            # Clip audio files longer than duration
            y = y[:total_samples]
    return y
コード例 #18
0
ファイル: effects.py プロジェクト: Ravel-Labs/ravel
    def __init__(self, signal, n_fft, window_size, hop_length, peak,
                 audio_type, sr):
        # self.path = path
        # self.sr = librosa.get_samplerate(self.path)
        self.sr = sr
        self.n_fft = n_fft
        self.window_size = window_size
        self.hop_length = hop_length
        self.signal = signal
        self.mono_signal = librosa.to_mono(self.signal)
        self.signal_db = librosa.amplitude_to_db(self.mono_signal)
        self.peak = peak
        self.audio_type = audio_type
        self.x_norm = preprocessing.normalize(self.mono_signal, self.peak)

        self.fft = np.abs(
            librosa.core.stft(self.mono_signal,
                              n_fft=self.n_fft,
                              win_length=self.window_size,
                              hop_length=self.hop_length))
        self.num_bins = self.fft.shape[0]
        self.fft_db = librosa.amplitude_to_db(self.fft)
        self.norm_fft_db = preprocessing.compute_norm_fft_db(
            self.x_norm, self.n_fft, self.window_size, self.hop_length)
        self.freqs = np.array(
            [i * self.sr / self.fft.shape[0] for i in range(self.num_bins)])
コード例 #19
0
    def load_file(self):
        '''
        Load samples from an audio file

        Uses:
            self.filename: path to audio file from which to make spectrogram (optional)
            self.sample_rate: rate at which to resample audio

        Returns:
            samples: the samples from the wav file
        '''

        samples, sample_rate = load(
            self.filename,
            mono=
            False,  # Don't automatically load as mono, so we can warn if we force to mono
            sr=self.sample_rate,  # Resample
            res_type='kaiser_fast',
        )

        # Force to mono if wav has multiple channels
        if samples.ndim > 1:
            samples = to_mono(samples)
            if self.verbosity > 1:
                print(
                    f"WARNING: Multiple-channel file detected ({filename}). Automatically mixed to mono."
                )

        return samples
コード例 #20
0
 def plot_spectrogram(self):
     plt.figure()
     mono = librosa.to_mono(self.waveform.T)
     D = librosa.stft(mono)
     S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)
     img = librosa.display.specshow(S_db, y_axis='log', x_axis='time')
     plt.colorbar(img, format="%+2.f dB")
コード例 #21
0
def extract(args):
    audio_directory, output_directory, af, overwrite = args
    subdir, output_file = os.path.split(af.split(audio_directory)[1])
    output_file = os.path.splitext(output_file)[0]
    output_file = os.path.join(output_directory, output_file)

    if os.path.exists(output_file) and not overwrite:
        print('Skipping {}. Already exists.'.format(output_file))
        return

    output = dict()

    try:
        y, _sr = soundfile.read(af)
        y = to_mono(y)
        sr = 22050
        y = resample(y, _sr, sr)
    except Exception as e:
        y, sr = load(af)

    output['linspec_mag'], output['linspec_phase'] = linspec(y)
    output['melspec'] = melspec(y, sr=sr)
    output['logspec'] = logspec(y, sr=sr)
    output['hcqt_mag'], output['hcqt_phase'] = hcqt(y, sr=sr)
    output['vggish_melspec'] = vggish_melspec(y, sr=sr)

    # high-level
    output['percussive_ratio'], output['percussive_rms'], output[
        'total_rms'] = percussive_ratio(y, margin=3.0)
    output['onset_strength'] = onset_strength(y, detrend=True)
    output['tempogram'] = tempogram(y)
    output['onset_patterns'] = onset_patterns(y, sr=sr)

    np.savez_compressed(output_file, **output)
コード例 #22
0
ファイル: swingify.py プロジェクト: wolftune/swingify
def swingify(file_path,
             outfile,
             factor,
             sr=None,
             format=None,
             max_length=None):
    y, sr = librosa.load(file_path, mono=False, sr=sr)
    print(y.shape)
    if max_length:
        print('trimming audio to max_len: {} seconds'.format(max_length))
        if len(y.shape) > 1:
            y = y[:, :max_length * sr]
        else:
            y = y[:max_length * sr]
        print(y.shape)
    anal_samples = librosa.to_mono(y)
    raw_samples = np.atleast_2d(y)
    # force stereo
    if raw_samples.shape[0] < 2:
        print('doubling mono signal to be stereo')
        raw_samples = np.vstack([raw_samples, raw_samples])

    beats = get_beats(anal_samples, sr, 512)

    output = synthesize(raw_samples, beats, factor)

    output = output * 0.7
    print(sr)
    sf.write(outfile, output.T, int(sr), format=format)
    # librosa.output.write_wav(outfile, output, sr, norm=True)
    return beats
コード例 #23
0
def convert():
    data_original = []
    for file_name in file_names:
        for line in pickle.load(open(file_name, 'rb')):
            data_original.append(line)
    log('Original data length is {}'.format(len(data_original)))

    percentage = 0
    begin = timer()
    data_convert = []

    for data in data_original:
        name = data[0]
        array = librosa.resample(data[1], data[2], target_rate)
        if to_mono:
            array = librosa.to_mono(array)
        data_convert.append((name, array, target_rate))
        if len(data_convert) / len(data_original) - percentage > 0.05:
            percentage = len(data_convert) / len(data_original)
            log('Now converted {} ({}%). Cost time {}'.format(
                len(data_convert), percentage * 100,
                timer() - begin))

    with open(output_file, 'wb') as file:
        pickle.dump(data_convert, file)

    log('Converted data length is {}. All finished.'.format(len(data_convert)))
コード例 #24
0
ファイル: audioProcessing.py プロジェクト: intelliChirp/SNAW
    def resample(data, fs, new_fs):

        # more the sampling rate - more the number of samples in one second
        # Less samples, less quality
        # More samples, good quality, but my lead to more storage requirements.
        # optimal value of sampling rate - 44100 samples per second

        # print("Changing from sampling rate {} to {}".format(fs,new_fs))

        # converting into single channel (monosteric)
        if data.ndim > 1:
            data = librosa.to_mono(data)

        fs = float(fs)
        new_fs = float(new_fs)
        size = data.size

        # old time axis
        old_time_axis = np.arange(size) / fs
        total_time = old_time_axis[-1]
        total_samples = round(total_time * new_fs)

        # getting new time axis wrt old time axis and new sampling rate
        new_time_axis = np.arange(total_samples) / new_fs

        # fills in the values between the samples
        f = interpolate.interp1d(old_time_axis, data)

        new_data = f(new_time_axis)
        return new_data
コード例 #25
0
ファイル: utils.py プロジェクト: mcartwright/dafx2018_adt
def read_audio(filepath, sr=None, mono=True, peak_norm=False):
    """
    Read audio

    Parameters
    ----------
    filepath
    sr
    mono

    Returns
    -------
    y, sr
    """
    try:
        y, _sr = psf.read(filepath)
        y = y.T
    except RuntimeError:
        y, _sr = librosa.load(filepath, mono=False, sr=None)

    if sr is not None and sr != _sr:
        y = resampy.resample(y, _sr, sr, filter='kaiser_fast')
    else:
        sr = _sr

    if mono:
        y = librosa.to_mono(y)

    if peak_norm:
        y /= np.max(np.abs(y))

    return y, sr
コード例 #26
0
    def sox(x, fs, *args):
        assert fs > 0

        fdesc, infile = tempfile.mkstemp(suffix=".wav")
        os.close(fdesc)
        fdesc, outfile = tempfile.mkstemp(suffix=".wav")
        os.close(fdesc)

        psf.write(infile, x, fs)

        try:
            arguments = ["sox", infile, outfile, "-q"]
            arguments.extend(args)

            subprocess.check_call(arguments)

            x_out, fs = psf.read(outfile)
            x_out = x_out.T
            if x.ndim == 1:
                x_out = librosa.to_mono(x_out)

        finally:
            os.unlink(infile)
            os.unlink(outfile)

        return x_out
コード例 #27
0
def specShow(sig):
    plt.figure(figsize=(8, 5))
    # multiframe spectrogram
    #make mono
    try:
        sig = sig.frames
    except:
        pass
    sig = np.nan_to_num(list(sig))
    try:
        sig = lib.to_mono(np.transpose(sig))
    except:
        return
    X = lib.stft(sig)
    Xdb = lib.amplitude_to_db(abs(X))
    plt.figure(figsize=(14, 5))
    plt.subplot(1, 2, 1)
    lib.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='hz')

    # single frame spectrogram
    X = scipy.fft(sig)
    X_mag = librosa.core.amplitude_to_db(np.absolute(X))
    f = np.linspace(0, sr, len(X_mag))  # frequency variable
    plt.subplot(1, 2, 2)
    res = int(len(sig) / 2)
    plt.plot(f[:res], X_mag[:res])
    plt.xlabel('Frequency (Hz)')
コード例 #28
0
ファイル: transforms.py プロジェクト: eonu/sigment
    def _transform(self, X, sr):
        X = self._val.signal(X)
        duration = self.random_state.uniform(*self.duration)

        # Convert stereo signals to mono and take the absolute value
        mono_amp = np.abs(librosa.to_mono(X))
        # Calculate the length of the section in terms of frames
        total_frames = len(mono_amp)
        frames = ceil(total_frames * duration)
        # Initialize variables for keeping track of loudest section
        previous_amp, section_amp = None, 0
        start, end = 0, frames
        loudest_amp, loudest_idx = -1, (start, end)

        # Slide the moving section window
        while end < total_frames:
            # Calculate volume for current section
            section_amp += mono_amp[start:end].sum(
            ) if previous_amp is None else mono_amp[end] - previous_amp
            # Update loudest section indices if current section is loudest
            if section_amp > loudest_amp:
                loudest_amp, loudest_idx = section_amp, (start, end)
            # Store volume of the frame leaving the moving window
            previous_amp = mono_amp[start]
            # Update section indices
            start, end = start + 1, end + 1

        # Return section of the original signal which was the loudest
        return X[:, loudest_idx[0]:loudest_idx[1]]
コード例 #29
0
def waveShow(sig):
    try:
        sig = sig.frames
    except:
        pass
    sig = lib.to_mono(np.transpose(sig))
    lib.display.waveplot(sig)
コード例 #30
0
ファイル: utils.py プロジェクト: rhine3/squiggle-detector
def load_file(filename, sample_rate=22050):
    '''
    Load samples from an audio file
    
    Inputs:
        filename: path to audio file from which to make spectrogram (optional)
        sample_rate: rate at which to resample audio
    
    Returns:
        samples: the samples from the wav file
        sample_rate: the sample rate from the wav file
    '''

    samples, sample_rate = load(
        filename,
        mono=
        False,  # Don't automatically load as mono, so we can warn if we force to mono
        sr=sample_rate,  # Resample
        res_type='kaiser_best',
    )

    # Force to mono if wav has multiple channels
    if samples.ndim > 1:
        samples = to_mono(samples)
        #print(
        #    f"WARNING: Multiple-channel file detected ({filename}). Automatically mixed to mono."
        #)

    return samples, int(sample_rate)
コード例 #31
0
 def detect_onsets(self):
     mono = librosa.to_mono(self.waveform.T)
     onset_frames = librosa.onset.onset_detect(y=mono, sr=self.fs, 
         units='frames', pre_max=500, post_max=500, pre_avg=100,
         post_avg=100, delta=0.01)
     times = librosa.frames_to_time(onset_frames, sr=self.fs)
     return times
コード例 #32
0
def to_mono(stereo_array):
    """Calls librosa.to_mona on the given audio file.

    :param stereo_array: input stereo array
    :returns: mono audio array (numpy ndarray)
    """

    return librosa.to_mono(stereo_array)
コード例 #33
0
ファイル: utils.py プロジェクト: instinct2k18/pyannote-audio
def read_audio(current_file, sample_rate=None, mono=True):
    """Read audio file

    Parameters
    ----------
    current_file : dict
        Dictionary given by pyannote.database.
    sample_rate: int, optional
        Target sampling rate. Defaults to using native sampling rate.
    mono : int, optional
        Convert multi-channel to mono. Defaults to True.

    Returns
    -------
    y : (n_samples, n_channels) np.array
        Audio samples.
    sample_rate : int
        Sampling rate.

    Notes
    -----
    In case `current_file` contains a `channel` key, data of this (1-indexed)
    channel will be returned.

    """

    # sphere files
    if current_file['audio'][-4:] == '.sph':

        # dump sphere file to a temporary wav file
        # and load it from here...
        from sphfile import SPHFile
        sph = SPHFile(current_file['audio'])
        with tempfile.NamedTemporaryFile() as f:
            sph.write_wav(f.name)
            y, sample_rate = librosa.load(f.name, sr=sample_rate, mono=False)

    # all other files
    else:
        y, sample_rate = librosa.load(current_file['audio'],
                                      sr=sample_rate,
                                      mono=False)

    # reshape mono files to (1, n) [was (n, )]
    if y.ndim == 1:
        y = y.reshape(1, -1)

    # extract specific channel if requested
    channel = current_file.get('channel', None)
    if channel is not None:
        y = y[channel - 1, :]

    # convert to mono
    if mono:
        y = librosa.to_mono(y)

    return y.T, sample_rate
コード例 #34
0
ファイル: test_core.py プロジェクト: luqueburgosjm/librosa
    def __test(filename, mono):
        y, sr = librosa.load(filename, mono=mono)

        y_mono = librosa.to_mono(y)

        eq_(y_mono.ndim, 1)
        eq_(len(y_mono), y.shape[-1])

        if mono:
            assert np.allclose(y, y_mono)
コード例 #35
0
ファイル: background.py プロジェクト: bmcfee/muda
def slice_clip(filename, start, stop, n_samples, sr, mono=True):
    '''Slice a fragment of audio from a file.

    This uses pysoundfile to efficiently seek without
    loading the entire stream.

    Parameters
    ----------
    filename : str
        Path to the input file

    start : int
        The sample index of `filename` at which the audio fragment should start

    stop : int
        The sample index of `filename` at which the audio fragment should stop (e.g. y = audio[start:stop])

    n_samples : int > 0
        The number of samples to load

    sr : int > 0
        The target sampling rate

    mono : bool
        Ensure monophonic audio

    Returns
    -------
    y : np.ndarray [shape=(n_samples,)]
        A fragment of audio sampled from `filename`

    Raises
    ------
    ValueError
        If the source file is shorter than the requested length

    '''

    with psf.SoundFile(str(filename), mode='r') as soundf:
        n_target = stop - start

        soundf.seek(start)

        y = soundf.read(n_target).T

        if mono:
            y = librosa.to_mono(y)

        # Resample to initial sr
        y = librosa.resample(y, soundf.samplerate, sr)

        # Clip to the target length exactly
        y = librosa.util.fix_length(y, n_samples)

        return y
コード例 #36
0
ファイル: background.py プロジェクト: EQ4/muda
def sample_clip(filename, n_samples, sr, mono=True):
    '''Sample a fragment of audio from a file.

    This uses pysoundfile to efficiently seek without
    loading the entire stream.

    Parameters
    ----------
    filename : str
        Path to the input file

    n_samples : int > 0
        The number of samples to load

    sr : int > 0
        The target sampling rate

    mono : bool
        Ensure monophonic audio

    Returns
    -------
    y : np.ndarray [shape=(n_samples,)]
        A fragment of audio sampled randomly from `filename`

    Raises
    ------
    ValueError
        If the source file is shorter than the requested length

    '''

    with psf.SoundFile(str(filename), mode='r') as soundf:

        n_target = int(np.ceil(n_samples * soundf.samplerate / sr))

        # Draw a random clip
        start = np.random.randint(0, len(soundf) - n_target)

        soundf.seek(start)

        y = soundf.read(n_target).T

        if mono:
            y = librosa.to_mono(y)

        # Resample to initial sr
        y = librosa.resample(y, soundf.samplerate, sr)

        # Clip to the target length exactly
        y = librosa.util.fix_length(y, n_samples)

        return y
コード例 #37
0
ファイル: audio.py プロジェクト: Asudano/amen
    def __init__(self, file_path=None, raw_samples=None, convert_to_mono=False,
                 sample_rate=44100, analysis_sample_rate=22050):
        """
        Audio constructor.
        Opens a file path, loads the audio with librosa, and prepares the features

        Parameters
        ----------

        file_path: string
            path to the audio file to load

        raw_samples: np.array
            samples to use for audio output

        convert_to_mono: boolean
            (optional) converts the file to mono on loading

        sample_rate: number > 0 [scalar]
            (optional) sample rate to pass to librosa.


        Returns
        ------
        An Audio object
        """

        if file_path:
            y, sr = librosa.load(file_path, mono=convert_to_mono, sr=sample_rate)
        elif raw_samples is not None:
            # This assumes that we're passing in raw_samples
            # directly from another Audio's raw_samples.
            y = raw_samples
            sr = sample_rate

        self.file_path = file_path
        self.sample_rate = float(sr)
        self.analysis_sample_rate = float(analysis_sample_rate)
        self.num_channels = y.ndim
        self.duration = librosa.get_duration(y=y, sr=sr)

        self.analysis_samples = librosa.resample(librosa.to_mono(y),
                                                 sr, self.analysis_sample_rate,
                                                 res_type='kaiser_best')
        self.raw_samples = np.atleast_2d(y)

        self.zero_indexes = self._create_zero_indexes()
        self.features = self._create_features()
        self.timings = self._create_timings()
コード例 #38
0
def get_random_wav(filename, sr, duration):

    # Get a random range from wav

    wav, _ = librosa.load(filename, sr = sr, mono = False)
    print(wav)
    assert (wav.ndim == 2) and (wav.shape[0] == 2), 'Require wav to have two channels'

    wav_pad = pad_wav(wav = wav, sr = sr, duration = duration)
    wav_sample = sample_range(wav = wav, sr = sr, duration = duration)

    wav_sample_mono = librosa.to_mono(wav_sample)
    wav_sample_src1 = wav_sample[0, :]
    wav_sample_src2 = wav_sample[1, :]

    return wav_sample_mono, wav_sample_src1, wav_sample_src2
コード例 #39
0
def load_wavs(filenames, sr):

    wavs_mono = list()
    wavs_src1 = list()
    wavs_src2 = list()

    for filename in filenames:
        wav, _ = librosa.load(filename, sr = sr, mono = False)
        assert (wav.ndim == 2) and (wav.shape[0] == 2), 'Require wav to have two channels'
        wav_mono = librosa.to_mono(wav) * 2 # Cancelling average
        wav_src1 = wav[0, :]
        wav_src2 = wav[1, :]
        wavs_mono.append(wav_mono)
        wavs_src1.append(wav_src1)
        wavs_src2.append(wav_src2)

    return wavs_mono, wavs_src1, wavs_src2
コード例 #40
0
ファイル: sox.py プロジェクト: bmcfee/muda
def __sox(y, sr, *args):
    '''Execute sox

    Parameters
    ----------
    y : np.ndarray
        Audio time series

    sr : int > 0
        Sampling rate of `y`

    *args
        Additional arguments to sox

    Returns
    -------
    y_out : np.ndarray
        `y` after sox transformation
    '''

    assert sr > 0

    fdesc, infile = tempfile.mkstemp(suffix='.wav')
    os.close(fdesc)
    fdesc, outfile = tempfile.mkstemp(suffix='.wav')
    os.close(fdesc)

    # Dump the audio
    librosa.output.write_wav(infile, y, sr)

    try:
        arguments = ['sox', infile, outfile, '-q']
        arguments.extend(args)

        subprocess.check_call(arguments)

        y_out, sr = psf.read(outfile)
        y_out = y_out.T
        if y.ndim == 1:
            y_out = librosa.to_mono(y_out)

    finally:
        os.unlink(infile)
        os.unlink(outfile)

    return y_out
コード例 #41
0
ファイル: audio_io.py プロジェクト: cghawthorne/magenta
def wav_data_to_samples(wav_data, sample_rate):
  """Read PCM-formatted WAV data and return a NumPy array of samples.

  Uses scipy to read and librosa to process WAV data. Audio will be converted to
  mono if necessary.

  Args:
    wav_data: WAV audio data to read.
    sample_rate: The number of samples per second at which the audio will be
        returned. Resampling will be performed if necessary.

  Returns:
    A numpy array of audio samples, single-channel (mono) and sampled at the
    specified rate, in float32 format.

  Raises:
    AudioIOReadError: If scipy is unable to read the WAV data.
    AudioIOError: If audio processing fails.
  """
  try:
    # Read the wav file, converting sample rate & number of channels.
    native_sr, y = scipy.io.wavfile.read(six.BytesIO(wav_data))
  except Exception as e:  # pylint: disable=broad-except
    raise AudioIOReadError(e)

  if y.dtype == np.int16:
    # Convert to float32.
    y = int16_samples_to_float32(y)
  elif y.dtype == np.float32:
    # Already float32.
    pass
  else:
    raise AudioIOError(
        'WAV file not 16-bit or 32-bit float PCM, unsupported')
  try:
    # Convert to mono and the desired sample rate.
    if y.ndim == 2 and y.shape[1] == 2:
      y = y.T
      y = librosa.to_mono(y)
    if native_sr != sample_rate:
      y = librosa.resample(y, native_sr, sample_rate)
  except Exception as e:  # pylint: disable=broad-except
    raise AudioIOError(e)
  return y
コード例 #42
0
def swingify(file_path, outfile, factor, sr=None, format=None):
    y, sr = librosa.load(file_path, mono=False, sr=sr)
    print(y.shape)
    anal_samples = librosa.to_mono(y)
    raw_samples = np.atleast_2d(y)
    # force stereo
    if raw_samples.shape[0] < 2:
        print('doubling mono signal to be stereo')
        raw_samples = np.vstack([raw_samples, raw_samples])

    beats = get_beats(anal_samples, sr, 512)

    output = synthesize(raw_samples, beats, factor)

    output = output * 0.7
    print(sr)
    sf.write(outfile, output.T, int(sr), format=format)
    # librosa.output.write_wav(outfile, output, sr, norm=True)
    return beats
コード例 #43
0
ファイル: resample_all.py プロジェクト: gsmafra/py-aasp-casa
def resample_all():

	audio_folder = 'scenes_stereo/'
	subsamp_folder = 'scenes_mono_8k/'

	chdir(audio_folder)
	mkdir(subsamp_folder)

	for sub_folder in glob('*'):

		mkdir(subsamp_folder + sub_folder)

		for filename in glob(sub_folder + '/*.wav'):

			print(filename)

			[fs, sig] = read(filename)

			sig = to_mono(sig.T)
			sig = resample(sig, fs, 8000)
		
			write(subsamp_folder + filename, 8000, sig)
コード例 #44
0
import numpy as np
import os
import pprint
import librosa
import matplotlib.pyplot as plt
from audio_models import BeatInterval
print('#'*180)

print(os.path.abspath(librosa.__file__))

# load audio file
audio_path = librosa.util.example_audio_file()
audio_path = '../src/audio/medium.m4a'
# audio_path = '../audio/.mp3'
y, sr = librosa.load(audio_path)
y_mono = librosa.to_mono(y)
print('y', y.shape)
print('sr', sr)
print('y_mono', y_mono.shape)
# assert(False)

# mel spectrogram
def mel_spetrogram(y, sr):
    S = librosa.feature.melspectrogram(y, sr=sr, n_mels=128)
    log_S = librosa.logamplitude(S, ref_power=np.max)
    print('spectrogram', S.shape)

    plt.figure(figsize=(12,4))
    librosa.display.specshow(log_S, sr=sr, x_axis='time', y_axis='mel')
    plt.title('mel power spectrogram')
    plt.colorbar(format='%+02.0f dB')
コード例 #45
0
ファイル: WnLFeatures.py プロジェクト: JosephDevaney/FYP
def lib_to_mono(data):
    return lib.to_mono(data)