예제 #1
0
파일: sentence.py 프로젝트: kpister/oratio
    def compress_translated_sentence(self, translation, max_allowed_duration,
                                     locale, client):

        audio_bytes = translation.raw_audio
        samplerate = translation.samplerate

        # duration = frames / samplerate
        translation.compression_ratio = float(
            (audio_bytes.shape[0] / samplerate)) / float(max_allowed_duration)

        if client and client.gcloud_speedup and translation.compression_ratio > 1.0:
            audio_bytes, samplerate = sf.read(
                io.BytesIO(
                    client.get_audio_chunk_for_sentence(
                        translation.text,
                        locale,
                        speedup=translation.compression_ratio)),
                always_2d=True,
            )

            # we might be slightly off on timing here. Gcloud isn't perfect
            # recalc the compression
            translation.compression_ratio = float(
                (audio_bytes.shape[0] /
                 samplerate)) / float(max_allowed_duration)

        if translation.compression_ratio > 1.0:
            audio_bytes = pyrubberband.time_stretch(
                audio_bytes, samplerate, translation.compression_ratio)
        translation.audio = audio_bytes
예제 #2
0
    def stretch_seg(self, frameCount):
        # parameter should be tuned
        # decrease or increase multiplier if the frame after adjusted is not
        # equal to frameCount
        approach_para = 0.00001
        # # difference_upbound = 10
        # if difference of two signal frame is bigger than difference_upbound,
        # multiplier would be adjusted
        # # approach_para_mul = 1  # increase the speed of approach

        ori = self.signal
        multiplier = float(self.chroma.shape[1] / frameCount)
        count = 1
        # aprroaching if frame count is not same
        # but it should be useless right now QQ
        while self.chroma.shape[1] != frameCount:
            self.signal = pyrb.time_stretch(ori, self.sr, multiplier)
            self.chroma = librosa.feature.chroma_stft(self.signal)
            difference = self.chroma.shape[1]-frameCount
            if difference < 0:
                # signal after adjusted is too short
                multiplier -= approach_para
            elif difference > 0:
                # signal after adjusted is too long
                multiplier += approach_para
            # print 'count : ', count, ' difference : ', difference,
            # self.chroma.shape[1]
            count += 1
        self.chroma = librosa.feature.chroma_stft(self.signal)
        self.tempo = librosa.beat.tempo(self.signal)
        self.spec = librosa.feature.melspectrogram(self.signal, sr=self.sr)
        print('adjusted : ', self.name, ' with ', count - 1,
              ' times aprroaching', ' with multiplier = ', multiplier)
예제 #3
0
def syncBlocks(path,
               CSM,
               beats1,
               beats2,
               Fs,
               hopSize,
               XAudio1,
               XAudio2,
               BeatsPerBlock,
               fileprefix=""):
    """
    :param path: Px2 array representing a partial warping path to align two songs
    :param CSM: The cross similarity matrix between two songs
    :param beats1: An array of beat onsets for song 1 in increments of hopSize
    :param beats2: An array of beat onsets for song 2 in increments of hopSize
    :param XAudio1: The raw audio samples for song 1
    :param XAudio2: The raw audio samples for song 2
    :param BeatsPerBlock: The number of beats per block for each pixel in the CSM
    :param fileprefix: Prefix of each stretched block to save.  By default, blank,\
        so no debugging info saved
    :returns (XFinal: An NSamples x 2 array with the first song along the first column\
                and the second synchronized song along the second column,\
              beatsFinal: An array of the locations in samples of the beat onsets in XFinal \
              scoresFinal: An array of matching scores for each beat)
    """
    XFinal = np.array([[0, 0]])
    beatsFinal = []  #The final beat locations based on hop size
    scoresFinal = []
    for i in range(path.shape[0]):
        [j, k] = [path[i, 0], path[i, 1]]
        if j >= CSM.shape[0] or k >= CSM.shape[1]:
            break
        scoresFinal.append(CSM[j, k])
        t1 = beats1[j] * hopSize
        t2 = beats1[j + BeatsPerBlock] * hopSize
        s1 = beats2[k] * hopSize
        s2 = beats2[k + BeatsPerBlock] * hopSize
        x1 = XAudio1[t1:t2]
        x2 = XAudio2[s1:s2]
        #Figure out the time factor by which to stretch x2 so it aligns
        #with x1
        fac = float(len(x1)) / len(x2)
        print("fac = ", fac)
        x2 = pyrb.time_stretch(x2, Fs, 1.0 / fac)
        print("len(x1) = %i, len(x2) = %i" % (len(x1), len(x2)))
        N = min(len(x1), len(x2))
        x1 = x1[0:N]
        x2 = x2[0:N]
        X = np.zeros((N, 2))
        X[:, 0] = x1
        X[:, 1] = x2
        if len(fileprefix) > 0:
            filename = "%s_%i.mp3" % (fileprefix, i)
            sio.wavfile.write("temp.wav", Fs, X)
            subprocess.call(["avconv", "-i", "temp.wav", filename])
        beat1 = beats1[j + 1] * hopSize - t1
        beatsFinal.append(XFinal.shape[0])
        XFinal = np.concatenate((XFinal, X[0:beat1, :]))
    return (XFinal, beatsFinal, scoresFinal)
예제 #4
0
파일: Hydra.py 프로젝트: erictzimas/hydra
def rubberband(incr, path1, path2):

    y, sr = librosa.load(path1, sr=None)

    y_stretched = pyrubberband.time_stretch(y, sr, incr)
    sf.write(path2, y_stretched, sr, format='wav')
    label1 = Label(w, text="Done !")
    label1.pack()
예제 #5
0
 def do_agumentation(self):
     no_class = os.listdir(self.input_path)
     for name in no_class:
         files = os.listdir(self.input_path + name + "/")
         for i, audio in enumerate(files):
             y, sr = sf.read(self.input_path + name + "/" + audio)
             y_strech = pyrb.time_stretch(y, sr, 2.0)
             wav.write(self.output_path + name, sr, y_strech)
             print(name, "has augmented and saved")
 def stretch_audio(filepath):
     y, sr = librosa.load(filepath, sr=None)
     y_stretched = pyrubberband.time_stretch(y, sr,
                                             args.stretch_constant)
     sf.write(filepath + str(args.stretch_constant) + '.wav',
              y_stretched,
              sr,
              format='wav')
     sf.write(filepath, y_stretched, sr, format='wav')
예제 #7
0
def timeStretch(input_tempo):
    y_shift, sr = librosa.load(can_ps_output, sr=44100)
    y_tempo = mas.get_tempo(can_ps_output)
    print("can_tempo:{}".format(y_tempo))
    rate = float(input_tempo) / y_tempo
    print("stretch_rate:{}".format(rate))
    #librosa.effects.time_stretch(y_shift, rate) #by liborsa
    y_stretch_shift = pyrb.time_stretch(y_shift, sr, rate)
    # seg11
    sf.write(can_output, y_stretch_shift, samplerate=44100)
예제 #8
0
    def __call__(self, x, stretch=1):
        """Stretch the time of given signal
        
        Args:
            x (numpy.ndarray): input signal (n_samples,)
            stretch (float, int): degree of stretching (unit:ratio)

        Returns:
            numpy.ndarray: output (n_samples,)
        """
        y = pyrb.time_stretch(x, self.sample_rate, stretch)
        return y
예제 #9
0
 def __call__(self, wav=None,
              sr=None):
     assert len(wav.shape)==1
     if random.random() < self.prob:
         alpha = 1.0 + self.limit * random.uniform(-1, 1)
         if self.use_pyrb:
             _wav = pyrb.time_stretch(wav, sr, alpha)
         else:
             _wav = librosa.effects.time_stretch(wav, alpha)
         if _wav.shape[0] < self.max_duration:
             wav = _wav
     return {'wav':wav,'sr':sr}
예제 #10
0
def _stretched_audio_by_incre_bpm(song, beats, incre_bpm, sr):
  samples = np.array([]).reshape(2,0)
  for i in range(len(beats)-1):
    # strech all samples between beat i to the begining of the next beat
    sample = librosa.frames_to_samples(beats[i:i+2])
    y_raw = song.raw_audio_duo[:, sample[0]:sample[1]]

    stretch_ratio = incre_bpm[i] / song.bpm
    # transpose here twice because pyrb takes (n,2) while librosa takes (2,n)
    t_y_raw = y_raw.transpose()
    t_y_stretch = pyrb.time_stretch(t_y_raw, sr, stretch_ratio)
    samples = np.concatenate([samples, t_y_stretch.transpose()], axis=1)

  return samples
예제 #11
0
def change_tempo(audio, bpm, new_bpm):
    y = np.array(audio.get_array_of_samples())
    if audio.channels == 2:
        y = y.reshape((-1, 2))

    sample_rate = audio.frame_rate

    tempo_ratio = new_bpm / bpm
    y_fast = pyrb.time_stretch(y, sample_rate, tempo_ratio)

    channels = 2 if (y_fast.ndim == 2 and y_fast.shape[1] == 2) else 1
    y = np.int16(y_fast * 2 ** 15)

    new_seg = AudioSegment(y.tobytes(), frame_rate=sample_rate, sample_width=2, channels=channels)
    return new_seg
예제 #12
0
 def do_agumentation(self):
     no_class = os.listdir(self.input_path)
     for name in no_class:
         files = os.listdir(self.input_path + name + "/")
         files = [f for f in files if f.endswith(".wav")]
         for i, audio in enumerate(files):
             print(audio)
             y, sr = sf.read(self.input_path + name + "/" + audio)
             time = random.uniform(0.6, 1.3)
             y_strech = pyrb.time_stretch(y, sr, time)
             y_agument = pyrb.pitch_shift(y_strech, 22050, 1)
             # print(y_agument)
             wav.write(self.output_path + name + "/" + "agumented_" + audio,
                       sr, y_agument)
             print(name + "/" + "agumented_" + audio,
                   "has augmented and saved")
예제 #13
0
def change_audioseg_tempo(segment, scale):
    y = np.array(segment.get_array_of_samples())
    if segment.channels == 2:
        y = y.reshape((-1, 2))

    sr = segment.frame_rate
    y_fast = pyrb.time_stretch(y, sr, scale)

    channels = 2 if (y_fast.ndim == 2 and y_fast.shape[1] == 2) else 1
    y = np.int16(y_fast * 2**15)

    new_seg = AudioSegment(y.tobytes(),
                           frame_rate=sr,
                           sample_width=2,
                           channels=channels)
    return new_seg
예제 #14
0
def change_speed_only(sound, tempo_ratio):
    y = np.array(sound.get_array_of_samples())
    if sound.channels == 2:
        y = y.reshape((-1, 2))

    sample_rate = sound.frame_rate
    y_fast = pyrubberband.time_stretch(y, sample_rate, tempo_ratio)

    channels = 2 if (y_fast.ndim == 2 and y_fast.shape[1] == 2) else 1
    y = np.int16(y_fast * 2**15)

    new_seg = AudioSegment(y.tobytes(),
                           frame_rate=sample_rate,
                           sample_width=2,
                           channels=channels)

    return new_seg
def audio_stretch(audio_path, start_point, end_point, stretch, out_path):
    full_audio = AudioSegment.from_wav(audio_path)
    to_stetch = full_audio[start_point * 1000:end_point * 1000]

    y, sr = sf.read(audio_path)
    to_stretch_array = np.asarray(to_stetch.get_array_of_samples())

    stretched_audio = pyrb.time_stretch(to_stretch_array, sr, 1 / stretch)

    librosa.output.write_wav('temp.wav', stretched_audio, sr)

    part1, _ = librosa.core.load(audio_path, duration=start_point, sr=sr)
    part2, _ = librosa.core.load('temp.wav', sr=sr)
    part3, _ = librosa.core.load(audio_path, offset=end_point, sr=sr)

    final_audio = np.concatenate((part1, part2, part3))

    librosa.output.write_wav(out_path, final_audio, sr)
예제 #16
0
def test_stretch(sr, random_signal, num_samples, rate):
    '''Test shape of random signals with stretching
    factor of various rate.
    '''

    # input signal of shape (channels, sr * duration)
    y = random_signal

    y_s = pyrubberband.time_stretch(y, sr, rate=rate)

    # test if output dimension matches input dimension
    assert y_s.ndim == y.ndim

    # check shape
    if y.ndim > 1:
        # check number of channels
        assert y.shape[1] == y_s.shape[1]
    else:
        # check num_samples (stretching factor)
        assert np.allclose(y_s.shape[0] * rate, y.shape[0])
예제 #17
0
def test_stretch(sr, random_signal, num_samples, rate):
    '''Test shape of random signals with stretching
    factor of various rate.
    '''

    # input signal of shape (channels, sr * duration)
    y = random_signal

    y_s = pyrubberband.time_stretch(y, sr, rate=rate)

    # test if output dimension matches input dimension
    assert y_s.ndim == y.ndim

    # check shape
    if y.ndim > 1:
        # check number of channels
        assert y.shape[1] == y_s.shape[1]
    else:
        # check num_samples (stretching factor)
        assert np.allclose(y_s.shape[0] * rate, y.shape[0])
예제 #18
0
    def concatOnsets(self,
                     sequence,
                     corpusUnits,
                     targetUnits,
                     stretchUnits=False,
                     windowUnits=False):
        """Concatenate audio units back to back with optional time stretching to match the target
        Can also optionally window the audio
                
        :param sequence: list of indices into the corpusUnits
         
        :param corpusUnits: list of corpus unit audio signals
         
        :param targetUnits: list of target unit audio signals
         
        :param shouldStretch: stretch the corpus unit to match the target unit
        
        :param shouldWindow: apply a window to the signal
         
        :return: an audio signal
        """
        import pyrubberband as pyrb

        audio = []

        for i, item in enumerate(sequence):
            corpusUnit = corpusUnits[item]

            #Use Rubber Band to stretch the audio to match the target
            if stretchUnits:
                factor = len(corpusUnit) / float(len(targetUnits[i]))
                corpusUnit = pyrb.time_stretch(corpusUnit, 44100, factor)

            # Envelope the output audio using a hamming window
            if windowUnits:
                window = np.hamming(len(audio))
                audio *= window

            audio = np.append(audio, corpusUnit)

        return audio
예제 #19
0
def makeAnalogy(X,
                Fs,
                beatsA,
                filename_b,
                hopSize,
                winSize,
                ws,
                TempoBias,
                MFCCWeight=1.0,
                HPCPWeight=1.0):
    """
    Make a cover song analogy; given audio for (A, A'), and B, \
        make B'
    :param X: Audio waveform for A and A'; A is in first column, A' in second
    :param Fs: Sample rate of all audio files
    :param beatsA: Beat onsets (in samples)
    :param hopSize: Feature hop size
    :param winSize: Window size for MFCCs and HPCPs
    :param ws: Window weights of all features
    :param TempoBias: Tempo bias for beat tracking of song B
    """
    #Step 1: Load in new example from artist 1 (B song)
    print("Loading new example...")
    XA = X[:, 0]
    XAp = X[:, 1]
    XB, Fs2 = librosa.load(filename_b)
    XB = librosa.core.to_mono(XB)

    #Step 2: Use rubberband library to change tempo of B so that
    #it's in line with tempo of song A
    tempoB, beatsB = librosa.beat.beat_track(XB,
                                             Fs2,
                                             start_bpm=TempoBias,
                                             hop_length=hopSize)
    tempoA = 60.0 / (np.mean(beatsA[1::] - beatsA[0:-1]) / float(Fs))
    print("tempoA = %g, tempoB = %g" % (tempoA, tempoB))
    ratio = float(tempoA) / tempoB
    print("Shifting by ratio: %g" % ratio)
    XB = pyrb.time_stretch(XB, Fs2, ratio)
예제 #20
0
    def augment(self, array, count):
        # Original signal
        # Second argument determines type of aumentation applied to signal
        self.sigToImage(array, 1, count)

        # Noise addition using normal distribution with mean = 0 and std =1
        # Permissible noise factor value = x > 0.004
        noiseAdding = array + 0.009 * np.random.normal(0, 1, len(array))
        self.sigToImage(noiseAdding, 2, count)

        # Permissible factor values = samplingRate / 100
        timeShifting = np.roll(array, int(500 / 100))
        self.sigToImage(timeShifting, 3, count)

        # Permissible factor values = -5 <= x <= 5
        pitchShifting = pyrb.pitch_shift(array, 500, -3)
        self.sigToImage(pitchShifting, 4, count)

        # Permissible factor values = 0 < x < 1.0
        factor = 0.95  # Yields the best reults without losing ecg wave shape
        timeStretching = pyrb.time_stretch(array, 500, factor)
        self.sigToImage(timeStretching, 5, count)
예제 #21
0
def testNMF2DMusic(K, T, F, NIters = 300, bins_per_octave = 24, shiftrange = 6, \
                    ZoomFac = 8, Trial = 0, Joint3Way = False, \
                    W1Fixed = False, HFixed = False, doKL = False):
    """
    :param Joint3Way: If true, do a joint embedding with A, Ap, and B\
        If false, then do a joint embedding with (A, Ap) and represent\
        B in the A dictionary
    """
    import librosa
    from scipy.io import wavfile
    import pyrubberband as pyrb

    #Synthesizing AAF's "Bad"
    """
    Fs, X = wavfile.read("music/SmoothCriminalAligned.wav")
    X = np.array(X, dtype=np.float32)
    A = X[:, 0]/(2.0**15)
    Ap = X[:, 1]/(2.0**15)
    #Take 20 seconds clips from each
    A = A[0:Fs*20]
    Ap = Ap[0:Fs*20]
    B, Fs = librosa.load("music/MJBad.mp3")
    B = B[Fs*3:Fs*23]
    #A and A' tempos are from the synchronization code
    tempoA = 0.508 
    tempoAp = 0.472
    tempoB = 0.53

    songname = "mj"
    #A good separation I got before
    res = sio.loadmat("FinalExamples/MJAAF_Bad/Joint2DNMFFiltered_K3_Z4_T20_Bins24_F14_Trial2/NMF2DJoint.mat")
    W1 = res['W1']
    W2 = res['W2']
    H1 = res['H1']

    do2DFilteredAnalogy(A, Ap, B, Fs, K, T, F, NIters, bins_per_octave, shiftrange, \
        ZoomFac, Trial, Joint3Way, W1Fixed, HFixed, doKL, songname=songname, W1=W1, W2=W2, H1=H1)
    """

    #Synthesizing AAF's "Wanna Be Starting Something"
    """
    Fs, X = wavfile.read("music/SmoothCriminalAligned.wav")
    X = np.array(X, dtype=np.float32)
    A = X[:, 0]/(2.0**15)
    Ap = X[:, 1]/(2.0**15)
    #Take 20 seconds clips from each
    A = A[0:Fs*20]
    Ap = Ap[0:Fs*20]
    B, Fs = librosa.load("music/MJStartinSomething.mp3")
    
    #tempos = getTempos(A, Ap, B, Fs)
    tempoA = 0.508 
    tempoAp = 0.472
    tempoB = 0.49
    B = pyrb.time_stretch(B, Fs, tempoB/tempoA)
    B = B[0:Fs*20]
    
    songname = "wanna"
    res = sio.loadmat("FinalExamples/MJAAF_Bad/Joint2DNMFFiltered_K3_Z4_T20_Bins24_F14_Trial2/NMF2DJoint.mat")
    W1 = res['W1']
    W2 = res['W2']
    H1 = res['H1']
    res = do2DFilteredAnalogy(A, Ap, B, Fs, K, T, F, NIters, bins_per_octave, shiftrange, \
        ZoomFac, Trial, Joint3Way, W1Fixed, HFixed, doKL, songname=songname, W1=W1, W2=W2, H1=H1)
    Y = res['Y']
    foldername = res['foldername']
    Y = pyrb.time_stretch(Y, Fs, tempoA/tempoB)
    wavfile.write("%s/BpFinalStretched.wav"%foldername, Fs, Y)
    """

    #Synthesizing Marilyn Manson "Who's That Girl"
    Fs, X = wavfile.read("music/SweetDreams/SweetDreamsAlignedClip.wav")
    X = np.array(X, dtype=np.float32)
    A = X[:, 0]/(2.0**15)
    Ap = X[:, 1]/(2.0**15)
    #Take 20 seconds clips from each
    A = A[0:Fs*20]
    Ap = Ap[0:Fs*20]
    B, Fs = librosa.load("music/SweetDreams/WhosThatGirlClip.wav")
    B = B[0:Fs*20]

    tempoA = 0.477
    tempoB = 0.65
    songname = "eurythmics"
    res = do2DFilteredAnalogy(A, Ap, B, Fs, K, T, F, NIters, bins_per_octave, shiftrange, \
        ZoomFac, Trial, Joint3Way, W1Fixed, HFixed, doKL, songname=songname)
    Y = res['Y']
    foldername = res['foldername']
    Y = pyrb.time_stretch(Y, Fs, tempoA/tempoB)
    wavfile.write("%s/BpFinalStretched.wav"%foldername, Fs, Y)
예제 #22
0
파일: time.py 프로젝트: jfsantos/muda
 def audio(mudabox, state):
     # Deform the audio and metadata
     mudabox._audio['y'] = pyrb.time_stretch(mudabox._audio['y'],
                                             mudabox._audio['sr'],
                                             state['rate'])
예제 #23
0
def testMIDIExample(T, F, NIters = 300, bins_per_octave = 24, shiftrange = 6, \
                    ZoomFac = 8, Trial = 0, HFixed = False, doKL = True):
    import librosa
    from scipy.io import wavfile
    import pyrubberband as pyrb
    from CQT import getNSGT
    initParallelAlgorithms()
    path = "music/MIDIExample/BeeGeesTracks/"
    NTracks = 6
    W1 = np.array([])
    H1 = np.array([])
    startidx = 27648 #Where the synchronized path starts
    for track in range(NTracks):
        matfilename = "%s/WH%i_F%i_T%i_Z%i_Trial%i.mat"%(path, track+1, F, T, ZoomFac, Trial)
        if not os.path.exists(matfilename):
            X, Fs = librosa.load("%s/%i.mp3"%(path, track+1))
            X = X[startidx:startidx+Fs*10]
            wavfile.write("Track%i.wav"%track, Fs, X)
            print("Doing CQT of track %i..."%track)
            C0 = getNSGT(X, Fs, bins_per_octave)
            #Zeropad to nearest even factor of the zoom factor
            NRound = ZoomFac*int(np.ceil(C0.shape[1]/float(ZoomFac)))
            C = np.zeros((C0.shape[0], NRound), dtype = np.complex)
            C[:, 0:C0.shape[1]] = C0
            C = np.abs(C)
            C = scipy.ndimage.interpolation.zoom(C, (1, 1.0/ZoomFac))
            plotfn = lambda V, W, H, iter, errs: plotNMF2DConvSpectra(V, W, H, iter, errs, hopLength = 128)
            (Wi, Hi) = doNMF2DConvGPU(C, 1, T, F, L=100, doKL = doKL, plotfn = plotfn, plotInterval=400)
            sio.savemat(matfilename, {"W":Wi, "H":Hi})
        else:
            res = sio.loadmat(matfilename)
            Wi = res["W"]
            Hi = res["H"]
        if W1.size == 0:
            W1 = np.zeros((T, Wi.shape[1], NTracks))
            H1 = np.zeros((F, NTracks, Hi.shape[2]))
        Wi = np.reshape(Wi, [Wi.shape[0], Wi.shape[1]])
        Hi = np.reshape(Hi, [Hi.shape[0], Hi.shape[2]])
        W1[:, :, track] = Wi
        H1[:, track, :] = Hi

    K = NTracks
    Fs, X = wavfile.read("music/MIDIExample/stayinalivesyncedclip.wav")
    X = np.array(X, dtype=np.float32)
    A = X[:, 0]/(2.0**15)
    Ap = X[:, 1]/(2.0**15)
    #Take 10 seconds clips from each
    A = A[0:Fs*10]
    Ap = Ap[0:Fs*10]
    B, Fs = librosa.load("music/MIDIExample/TupacMIDIClip.mp3")
    tempoA = 0.578
    tempoB = 0.71
    B = pyrb.time_stretch(B, Fs, tempoB/tempoA)
    wavfile.write("BStretched.wav", Fs, B)
    B = B[0:Fs*10]

    songname = "madatchya"
    if not HFixed:
        H1 = np.array([])
    res = do2DFilteredAnalogy(A, Ap, B, Fs, K, T, F, NIters, bins_per_octave, shiftrange, \
        ZoomFac, Trial, False, W1Fixed=True, HFixed=HFixed, doKL = doKL, W1 = W1, H1=H1, songname=songname)
    Y = res['Y']
    foldername = res['foldername']
    Y = pyrb.time_stretch(Y, Fs, tempoA/tempoB)
    wavfile.write("%s/BpFinalStretched.wav"%foldername, Fs, Y)
            original_word_len = (len(word)) / sr  # in secs
            original_samp = len(word)
            rise_samp = int(sr * rise_sec)  # convert rise len to samples
            end_dur = int(beat_length *
                          sr)  # convert beat len to samples...should be 7350

            uncorrected_stretch = original_samp / end_dur
            corrected_onset = int(onset / uncorrected_stretch)

            # compute stretch factor
            stretch_factor = original_samp / (end_dur +
                                              (corrected_onset - rise_samp))

            # Stretch
            #word = librosa.effects.time_stretch(word, stretch_factor)
            word = pyrb.time_stretch(word, sr, stretch_factor)

            # compute stretched onset
            #onset = int(onset / stretch_factor)
            onset = librosa.onset.onset_detect(word, units='samples')
            onset = onset[0]  # just return the first onset

            # start sound file from the rise-len before the onset
            word = word[int(onset - rise_samp):]

            # correction procedure because librosa stretch doens't work properly
            # compute diff between desired duration and actual
            difference = end_dur - len(word)

            # zero pad the difference
            if difference > 0:
예제 #25
0
ysync_bad[:, 1] = x2
sio.wavfile.write("sync_bad.wav", sr, ysync_bad)

res = DTW(X1, X2)
D, path = res['D'], res['path']
indices = []
for i in range(D.shape[0]):
    indices.append([])
for p in path:
    indices[p[0]].append(p[1])
x2sync = []
for i, js in enumerate(indices):
    j1 = min(js)
    j2 = max(js)
    k = j2 - j1 + 1
    print(i, k)
    if k > 1:
        x = x2[hop * j1:hop * (j2 + 1)]
        x = pyrb.time_stretch(x, sr, k)
        x2sync += x.tolist()
    elif k == 1:
        x = x2[hop * j1:hop * (j2 + 1)]
        x2sync += x.tolist()
x2sync = np.array(x2sync)

y = np.zeros((x1.size, 2))
y[:, 0] = x1
y[0:x2sync.size, 1] = x2sync

sio.wavfile.write("sync.wav", sr, y)
예제 #26
0
i = 0
desired_tempo = 100
outfpath = "output/%s-%sbpm.flac" % (list(document.sents)[0].string[:10], desired_tempo)

for fpath in list(outfiles):

      # Load the file from disk, trim excess silence, slow it down a bit and
      # concatenate it to the full file

      sentence, sr = librosa.core.load(fpath)
      trimmed, index = librosa.effects.trim(sentence)

      onset_env = librosa.onset.onset_strength(y=trimmed, sr=sr)
      tempo = librosa.beat.tempo(y=trimmed, onset_envelope=onset_env)
      rate = desired_tempo / int(tempo)

      print(fpath, tempo, rate)
      slowed = pyrb.time_stretch(trimmed, sr, rate)

      if i != 0:
          outf, sr = librosa.core.load(outfpath)
          z = np.append(outf, slowed)
          sf.write(outfpath, z, sr, format='flac', subtype='PCM_24')
      else:
          sf.write(outfpath, slowed, sr, format='flac', subtype='PCM_24')

      i += 1

print("\nSaved whole text as %s\n\
import wave
import sys
from pydub import AudioSegment
import soundfile as sf
import pyrubberband as pyrb

# sound = AudioSegment.from_mp3(sys.argv[1])
# sound.export("file.wav", format="wav")

y, sr = sf.read("0.wav")
y_stretch = pyrb.time_stretch(y, sr, 0.90)
y_shift = pyrb.pitch_shift(y, sr, 0.90)
sf.write("analyzed_filepathX5.wav", y_stretch, sr, format='wav')
예제 #28
0
파일: time.py 프로젝트: bmcfee/muda
 def audio(mudabox, state):
     # Deform the audio and metadata
     mudabox._audio['y'] = pyrb.time_stretch(mudabox._audio['y'],
                                             mudabox._audio['sr'],
                                             state['rate'])
예제 #29
0
import scipy


def load_wav(fname):
    srate, audio = wav.read(fname)
    audio = audio.astype(np.float32) / 32767.0
    audio = (0.9 / np.max(audio)) * audio
    # convert to mono
    if (len(audio.shape) == 2):
        audio = (audio[:, 0] + audio[:, 1]) / 2
    return (audio, srate)


dreamer, srate = load_wav('dreamer.wav')
dreamer_live, srate = load_wav('dreamer_live.wav')
dreamer_slow = pyrb.time_stretch(dreamer, srate, 0.75)
goodbye_stranger, srate = load_wav('goodbye_stranger.wav')
naima, srate = load_wav('naima.wav')

# Constant Q transform represents energy among diffrent pitch classes across time
# Beat synchronous chroma vectors reduce the size of chroma vectors and actually make the representation
# tempo invariant


def plot_chromagram(y):
    y = y[0:8000000]
    C = librosa.feature.chroma_cqt(y, sr=srate, bins_per_octave=12, norm=2)

    plt.figure(figsize=(12, 4))
    plt.subplot(1, 2, 1)
    # Display the chromagram: the energy in each chromatic pitch class as a function of time
예제 #30
0
    def generate_labels_features_voca(self, all_list):
        pid = os.getpid()
        mp3_config, feature_config, mp3_str, feature_str = self.config_to_folder(
        )

        i = 0  # number of songs
        j = 0  # number of impossible songs
        k = 0  # number of tried songs
        total = 0  # number of generated instances
        stretch_factors = [1.0]
        shift_factors = [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6]

        loop_broken = False
        for song_name, lab_path, mp3_path, save_path in all_list:
            save_path = save_path + '_voca'

            # different song initialization
            if loop_broken:
                loop_broken = False

            i += 1
            print(pid, "generating features from ...", os.path.join(mp3_path))
            if i % 10 == 0:
                print(i, ' th song')

            original_wav, sr = librosa.load(os.path.join(mp3_path),
                                            sr=mp3_config['song_hz'])

            # save_path, mp3_string, feature_string, song_name, aug.pt
            result_path = os.path.join(save_path, mp3_str, feature_str,
                                       song_name.strip())
            if not os.path.exists(result_path):
                os.makedirs(result_path)

            # calculate result
            for stretch_factor in stretch_factors:
                if loop_broken:
                    loop_broken = False
                    break

                for shift_factor in shift_factors:
                    # for filename
                    idx = 0

                    try:
                        chord_info = self.Chord_class.get_converted_chord_voca(
                            os.path.join(lab_path))
                    except Exception as e:
                        print(e)
                        print(pid, " chord lab file error : %s" % song_name)
                        loop_broken = True
                        j += 1
                        break

                    k += 1
                    # stretch original sound and chord info
                    x = pyrb.time_stretch(original_wav, sr, stretch_factor)
                    x = pyrb.pitch_shift(x, sr, shift_factor)
                    audio_length = x.shape[0]
                    chord_info[
                        'start'] = chord_info['start'] * 1 / stretch_factor
                    chord_info['end'] = chord_info['end'] * 1 / stretch_factor

                    last_sec = chord_info.iloc[-1]['end']
                    last_sec_hz = int(last_sec * mp3_config['song_hz'])

                    if audio_length + mp3_config['skip_interval'] < last_sec_hz:
                        print('loaded song is too short :', song_name)
                        loop_broken = True
                        j += 1
                        break
                    elif audio_length > last_sec_hz:
                        x = x[:last_sec_hz]

                    origin_length = last_sec_hz
                    origin_length_in_sec = origin_length / mp3_config['song_hz']

                    current_start_second = 0

                    # get chord list between current_start_second and current+song_length
                    while current_start_second + mp3_config[
                            'inst_len'] < origin_length_in_sec:
                        inst_start_sec = current_start_second
                        curSec = current_start_second

                        chord_list = []
                        # extract chord per 1/self.time_interval
                        while curSec < inst_start_sec + mp3_config['inst_len']:
                            try:
                                available_chords = chord_info.loc[
                                    (chord_info['start'] <= curSec)
                                    & (chord_info['end'] > curSec +
                                       self.time_interval)].copy()
                                if len(available_chords) == 0:
                                    available_chords = chord_info.loc[(
                                        (chord_info['start'] >= curSec) &
                                        (chord_info['start'] <= curSec +
                                         self.time_interval)) | (
                                             (chord_info['end'] >= curSec) &
                                             (chord_info['end'] <= curSec +
                                              self.time_interval))].copy()

                                if len(available_chords) == 1:
                                    chord = available_chords['chord_id'].iloc[
                                        0]
                                elif len(available_chords) > 1:
                                    max_starts = available_chords.apply(
                                        lambda row: max(row['start'], curSec),
                                        axis=1)
                                    available_chords['max_start'] = max_starts
                                    min_ends = available_chords.apply(
                                        lambda row: min(
                                            row.end, curSec + self.
                                            time_interval),
                                        axis=1)
                                    available_chords['min_end'] = min_ends
                                    chords_lengths = available_chords[
                                        'min_end'] - available_chords[
                                            'max_start']
                                    available_chords[
                                        'chord_length'] = chords_lengths
                                    chord = available_chords.loc[
                                        available_chords['chord_length'].
                                        idxmax()]['chord_id']
                                else:
                                    chord = 169
                            except Exception as e:
                                chord = 169
                                print(e)
                                print(pid, "no chord")
                                raise RuntimeError()
                            finally:
                                # convert chord by shift factor
                                if chord != 169 and chord != 168:
                                    chord += shift_factor * 14
                                    chord = chord % 168

                                chord_list.append(chord)
                                curSec += self.time_interval

                        if len(chord_list
                               ) == self.no_of_chord_datapoints_per_sequence:
                            try:
                                sequence_start_time = current_start_second
                                sequence_end_time = current_start_second + mp3_config[
                                    'inst_len']

                                start_index = int(sequence_start_time *
                                                  mp3_config['song_hz'])
                                end_index = int(sequence_end_time *
                                                mp3_config['song_hz'])

                                song_seq = x[start_index:end_index]

                                etc = '%.1f_%.1f' % (current_start_second,
                                                     current_start_second +
                                                     mp3_config['inst_len'])
                                aug = '%.2f_%i' % (stretch_factor,
                                                   shift_factor)

                                if self.feature_name == FeatureTypes.cqt:
                                    feature = librosa.cqt(
                                        song_seq,
                                        sr=sr,
                                        n_bins=feature_config['n_bins'],
                                        bins_per_octave=feature_config[
                                            'bins_per_octave'],
                                        hop_length=feature_config['hop_length']
                                    )
                                else:
                                    raise NotImplementedError

                                if feature.shape[
                                        1] > self.no_of_chord_datapoints_per_sequence:
                                    feature = feature[:, :self.
                                                      no_of_chord_datapoints_per_sequence]

                                if feature.shape[
                                        1] != self.no_of_chord_datapoints_per_sequence:
                                    print(
                                        'loaded features length is too short :',
                                        song_name)
                                    loop_broken = True
                                    j += 1
                                    break

                                result = {
                                    'feature': feature,
                                    'chord': chord_list,
                                    'etc': etc
                                }

                                # save_path, mp3_string, feature_string, song_name, aug.pt
                                filename = aug + "_" + str(idx) + ".pt"
                                torch.save(result,
                                           os.path.join(result_path, filename))
                                idx += 1
                                total += 1
                            except Exception as e:
                                print(e)
                                print(pid, "feature error")
                                raise RuntimeError()
                        else:
                            print(
                                "invalid number of chord datapoints in sequence :",
                                len(chord_list))
                        current_start_second += mp3_config['skip_interval']
        print(pid, "total instances: %d" % total)
예제 #31
0
def time_stretching(sig, sr, degree):
    return pyrb.time_stretch(sig, sr, degree)
예제 #32
-1
def write_audio_file(path, name, voice, audio, sampling_rate):
    file_name = path + \
        time.strftime("%Y%m%d-%H%M%S_") + name + str(randint(0, 100)) + ".wav"
    if voice == "satan:":
        temp_file_name = path + "temp.wav"
        write(temp_file_name, sampling_rate, audio)

        fixed_framerate = 11000
        sound = AudioSegment.from_file(temp_file_name)
        sound = sound.set_frame_rate(fixed_framerate)
        write(file_name, fixed_framerate, audio)

        y, sr = sf.read(file_name)
        y_stretch = pyrb.time_stretch(y, sr, 1.6)
        y_shift = pyrb.pitch_shift(y, sr, 1.6)
        sf.write(file_name, y_stretch, sr, format='wav')

        sound = AudioSegment.from_wav(file_name)
        sound.export(file_name, format="wav")
    elif voice == "vader:":
        temp_file_name = path + "temp.wav"
        write(temp_file_name, sampling_rate, audio)
        AudioEffect.robotic(temp_file_name, file_name)

        y, sr = sf.read(file_name)
        y_stretch = pyrb.time_stretch(y, sr, 0.9)
        y_shift = pyrb.pitch_shift(y, sr, 0.9)
        sf.write(file_name, y_stretch, sr, format='wav')

        sound = AudioSegment.from_wav(file_name)
        sound.export(file_name, format="wav")
    else:
        write(file_name, sampling_rate, audio)
    return file_name