Exemple #1
0
    def reconstruct_audio(self,
                          description,
                          irm=None,
                          mask=None,
                          idx=None,
                          test=False):
        n_wavfiles = len(self.x_wavfiles)
        if idx is None:
            for j in range(n_wavfiles):
                if irm is None or mask is None:
                    yest = self.reconstruct_x(j)
                else:
                    yest = self.reconstruct_x(
                        j, mask=irm[j, :np.sum(mask[j, :]), :].T)
                y = self.reconstruct_y(j)
                wavfile_enhanced = self.y_wavfiles[j].replace(
                    'scaled', 'enhanced_%s' % description)
                if not os.path.exists(os.path.dirname(wavfile_enhanced)):
                    os.makedirs(os.path.dirname(wavfile_enhanced))
                util.wavwrite(wavfile_enhanced, 16e3, yest)
        elif isinstance(idx, list):
            for j in idx:
                if irm is None or mask is None:
                    yest = self.reconstruct_x(j)
                else:
                    yest = self.reconstruct_x(
                        j, mask=irm[j, :np.sum(mask[j, :]), :].T)
                y = self.reconstruct_y(j)
                if test:
                    y_orig = util.wavread(self.y_wavfiles[j])[0:1, :]
                    x = util.wavread(self.x_wavfiles[j])[0:1, :]
                    if yest.shape[1] > x.shape[1]:
                        yest = yest[:, :x.shape[1]]
                    if y.shape[1] > y_orig.shape[1]:
                        y = y[:, :y_orig.shape[1]]
                    print "For file %d, NMSE between original x and yest is %e" % (
                        j, np.mean((x - yest)**2) / np.mean(x**2))
                    print "For file %d, NMSE between original y_orig and y is %e" % (
                        j, np.mean((y_orig - y)**2) / np.mean(y_orig**2))
                else:
                    wavfile_enhanced = self.y_wavfiles[j].replace(
                        'scaled', 'enhanced_%s' % description)
                    if not os.path.exists(os.path.dirname(wavfile_enhanced)):
                        os.makedirs(os.path.dirname(wavfile_enhanced))
                    util.wavwrite(wavfile_enhanced, 16e3, yest)
        else:
            if irm is None:
                yest = self.reconstruct_x(idx)
            else:
                yest = self.reconstruct_x(idx, mask=irm)

            wavfile_enhanced = self.y_wavfiles[idx].replace(
                'scaled', 'enhanced_%s' % description)
            if not os.path.exists(os.path.dirname(wavfile_enhanced)):
                os.makedirs(os.path.dirname(wavfile_enhanced))
            util.wavwrite(wavfile_enhanced, 16e3, yest)

        return
Exemple #2
0
def load_wav(fname, fs):
    """ load_wav function test and read a wav files, and convert
    stereo channels into mono 

    Parameters
    ----------
    
    fname : [string]
        wav input file name

    fs : [int]
        check if the sampling rate of the signal given by 
        this variable, frequency in Hz


    Returns
    -------

    sig : [np.array]
        mono-channel signal 


    """

    sig, found_fs = wavread(fname)
    if fs != found_fs:
        raise ValueError('sampling rate should be {0}, not {1}. '
                         'please resample.'.format(fs, found_fs))

    if len(sig.shape) > 1:
        warnings.warn('stereo audio: merging channels')
        sig = (sig[:, 0] + sig[:, 1]) / 2

    return sig
    def __getitem__(self, idx):
        if idx >= len(self.mixturePaths):
            raise IndexError
        mixture, fs = util.wavread(self.mixturePaths[idx])
        vocal, fs = util.wavread(self.vocalPaths[idx])
        if self.mono:
            #downmix here
            mixture = np.mean(mixture, axis=-1)
            vocal = np.mean(vocal, axis=-1)

        sample = {
            'mixture': mixture.astype(np.float32),
            'vocal': vocal.astype(np.float32)
        }
        if self.transform is not None:
            sample = self.transform(sample)
        return sample
Exemple #4
0
def extract_features(df, label2ix, spec_kwargs, vad_kwargs,
                     stacksize=1, frate=100, return_y=False):
    if return_y:
        return_y = 'label' in df
    X = {}
    if return_y:
        y = {}
    spectrum_encoder = Spectral(**spec_kwargs)
    vad_encoder = VAD(**vad_kwargs)
    for ix, fname in enumerate(df.filename.unique()):
        sig, fs = wavread(fname)
        if fs != spec_kwargs['fs']:
            raise ValueError('expected samplerate {}, got {}'.format(
                spec_kwargs['fs'], fs)
            )
        spec = spectrum_encoder.transform(sig)
        spec = (spec - spec.mean(0)) / spec.std(0)
        if stacksize > 1:
            spec = roll_array(spec, stacksize)
        vad = vad_encoder.activations(sig)
        vad = vad.reshape(vad.shape[0], -1)
        if stacksize > 1:
            vad = roll_array(vad, stacksize)

        X_curr = []
        if return_y:
            y_curr = []

        rows_iter = df[df.filename == fname].iterrows()
        for _, row in rows_iter:
            start = row.start
            end = row.end

            start_fr = int(start * frate)
            end_fr = int(end * frate)

            feat = np.hstack(
                (spec[start_fr: end_fr],
                 vad[start_fr: end_fr])
            )
            X_curr.append(
                feat.astype(np.float32)
            )
            if return_y:
                y_curr.append(
                    np.ones(feat.shape[0], dtype=np.uint8) * \
                    label2ix[row.label]
                )
        X[fname] = np.vstack(X_curr)
        if return_y:
            y[fname] = np.hstack(y_curr)
    if return_y:
        return X, y
    else:
        return X
def extract_features(df,
                     label2ix,
                     spec_kwargs,
                     vad_kwargs,
                     stacksize=1,
                     frate=100,
                     return_y=False):
    if return_y:
        return_y = 'label' in df
    X = {}
    if return_y:
        y = {}
    spectrum_encoder = Spectral(**spec_kwargs)
    vad_encoder = VAD(**vad_kwargs)
    for ix, fname in enumerate(df.filename.unique()):
        sig, fs = wavread(fname)
        if fs != spec_kwargs['fs']:
            raise ValueError('expected samplerate {}, got {}'.format(
                spec_kwargs['fs'], fs))
        spec = spectrum_encoder.transform(sig)
        spec = (spec - spec.mean(0)) / spec.std(0)
        if stacksize > 1:
            spec = roll_array(spec, stacksize)
        vad = vad_encoder.activations(sig)
        vad = vad.reshape(vad.shape[0], -1)
        if stacksize > 1:
            vad = roll_array(vad, stacksize)

        X_curr = []
        if return_y:
            y_curr = []

        rows_iter = df[df.filename == fname].iterrows()
        for _, row in rows_iter:
            start = row.start
            end = row.end

            start_fr = int(start * frate)
            end_fr = int(end * frate)

            feat = np.hstack((spec[start_fr:end_fr], vad[start_fr:end_fr]))
            X_curr.append(feat.astype(np.float32))
            if return_y:
                y_curr.append(
                    np.ones(feat.shape[0], dtype=np.uint8) * \
                    label2ix[row.label]
                )
        X[fname] = np.vstack(X_curr)
        if return_y:
            y[fname] = np.hstack(y_curr)
    if return_y:
        return X, y
    else:
        return X
 def _load_wav(self, fname):
     """
     Memoized audio loader.
     """
     key = fname
     if not key in self.wav_cache:
         sig, fs_ = wavread(fname)
         if self.fs != fs_:
             raise ValueError('sampling rate should be {0}, not {1}. '
                              'please resample.'.format(self.fs, fs_))
         if len(sig.shape) > 1:
             warnings.warn('stereo audio: merging channels')
             sig = (sig[:, 0] + sig[:, 1]) / 2
         self.wav_cache[key] = sig
     return self.wav_cache[key]
Exemple #7
0
 def _load_wav(self, fname):
     """
     Memoized audio loader.
     """
     key = fname
     if not key in self.wav_cache:
         sig, fs_ = wavread(fname)
         if self.fs != fs_:
             raise ValueError('sampling rate should be {0}, not {1}. '
                              'please resample.'.format(self.fs, fs_))
         if len(sig.shape) > 1:
             warnings.warn('stereo audio: merging channels')
             sig = (sig[:, 0] + sig[:, 1]) / 2
         self.wav_cache[key] = sig
     return self.wav_cache[key]
 def run(self):
     import algorithm
     padata = algorithm.creat_data(algorithm.Names)
     while self.event.isSet():
         if hasattr(self, 'wavfiles'):
             for wavfile in self.wavfiles:
                 x, fs, bits, N = util.wavread(unicode(wavfile))
                 self.x = (x + 32768) / 16
                 for i in range(1, len(x) / 1024):
                     for key in padata:
                         padata[key][:-1] = padata[key][1:]
                     raw_data = self.x[1024 * (i - 1):1024 * i]
                     padata['max'][-1] = max(raw_data)
                     padata['min'][-1] = min(raw_data)
                     padict.update({self.figurename: padata})
                     if self.plotmode == "by data":
                         self.plotsignal.emit()
                     time.sleep(self.importwavspreed / self.importspreed)
import numpy as np
import Model_fcn  #Model
import torch
import util
import sys

if __name__ == "__main__":
    blockSize = 4096
    hopSize = 2048

    if len(sys.argv) != 3:
        print("Usage:\n", sys.argv[0], "input_path output_path")
        sys.exit(1)

    #read the wav file
    x, fs = util.wavread(sys.argv[1])
    #downmix to single channel
    x = np.mean(x, axis=-1)
    #perform stft
    S = util.stft_real(x, blockSize=blockSize, hopSize=hopSize)
    magnitude = np.abs(S).astype(np.float32)
    angle = np.angle(S).astype(np.float32)

    #initialize the model
    model = Model_fcn.ModelSingleStep(blockSize)
    #load the pretrained model
    model.load_state_dict(
        torch.load("Modelfcn.pt", map_location=lambda storage, loc: storage))
    #switch to eval mode
    model.eval()
Exemple #10
0
maxlen = None
maxlen = 500

print "Loading data..."

# development data
D_valid = AudioDataset(config['taskfile_x_valid'],
                       config['taskfile_y_valid'],
                       datafile=config['datafile_valid'],
                       params_stft=config['params_stft'])

#print "  Loading validation data..."
#x_valid, y_valid, mask_valid = D_valid.get_padded_data_matrix(transform_x=transform_x, transform_y=transform_y, pad_value=mask_value, maxlen=maxlen)

for i in range(10):
    x = util.wavread(D_valid.x_wavfiles[i])[0:1, :]
    xr = D_valid.reconstruct_x(i)[0:1, :]
    if xr.shape[1] > x.shape[1]:
        xr = xr[:, :x.shape[1]]
    print "For file %d, NMSE between original x and reconstructed x is %e" % (
        i, np.mean((x - xr)**2) / np.mean(x**2))

    y = util.wavread(D_valid.y_wavfiles[i])[0:1, :]
    yr = D_valid.reconstruct_y(i)
    if yr.shape[1] > y.shape[1]:
        yr = yr[:, :y.shape[1]]
    print "For file %d, NMSE between original y and reconstructed y is %e" % (
        i, np.mean((y - yr)**2) / np.mean(y**2))

D_valid.reconstruct_audio(description="test_reconstruction_audio",
                          idx=range(10),
Exemple #11
0
    outpath = {}
    for i, j, k in os.walk(mix):
        for song in k:
            inpath += [mix + song]
            outpath[mix + song] = est + song

    for f in inpath:
        input_path = f
        output_path = outpath[f]
        print(f)

        blockSize = 4096
        hopSize = 2048

        #read the wav file
        x, fs = util.wavread(input_path)
        #downmix to single channel
        x = np.mean(x, axis=-1)
        #perform stft
        S = util.stft_real(x, blockSize=blockSize, hopSize=hopSize)
        magnitude = np.abs(S).astype(np.float32)
        angle = np.angle(S).astype(np.float32)

        #initialize the model
        model = Model.ModelSingleStep(blockSize)

        #load the pretrained model
        checkpoint = torch.load("savedModel_RNN_best.pt",
                                map_location=lambda storage, loc: storage)
        model.load_state_dict(checkpoint['state_dict'])