예제 #1
0
    def read_all_audio(self):
        psongs = [['01', '09', 13, 18], ['05', '08', 11,
                                         15], ['07', 15, 16, 20],
                  ['04', 10, 12, 17], ['04', 10, 12, 17], ['05', 11, 19, 20],
                  ['02', '03', '06', 14], ['07', 15, 16, 20],
                  ['05', '08', 11, 15], ['01', '09', 13, 18],
                  ['05', 11, 19, 20], ['02', '03', '06', 14]]

        for i in range(len(self.fld)):
            usr = self.fld[i]
            for snum in psongs[i]:
                file_path1 = self.root_dir + str(usr) + '/sing/' + str(
                    snum) + '.wav'
                file_path2 = self.root_dir + str(usr) + '/read/' + str(
                    snum) + '.wav'

                audio1 = core.load(file_path1, self.sr)[0]
                audio2 = core.load(file_path2, self.sr)[0]

                self.all_audio[file_path1] = audio1
                self.all_audio[file_path2] = audio2

        print "All audio read & stored"
        f = open('NUS_data_dict.pkl', 'wb')
        pickle.dump(self.all_audio, f)
        f.close()
예제 #2
0
def create_random_data(size=100, max_size=100):
    rand_voc = np.random.randint(max_size, size=size)
    rand_bass = np.random.randint(max_size, size=size)
    rand_drums = np.random.randint(max_size, size=size)
    rand_other = np.random.randint(max_size, size=size)

    count = 1
    print("Generating random mix...")
    for i_voc, i_bass, i_drums, i_other in zip(
            rand_voc, rand_bass, rand_drums, rand_other):
        inst_files = [FILE_DRUMS, FILE_BASS, FILE_OTHER, FILE_VOCAL]
        inst_files.remove(TRAIN_TARGET)
        y_train, _ = load(
            str(list_source_dir[i_voc] / TRAIN_TARGET), sr=None)
        y_inst1, _ = load(
            str(list_source_dir[i_bass] / inst_files[0]), sr=None)
        y_inst2, _ = load(
            str(list_source_dir[i_drums] / inst_files[1]), sr=None)
        y_inst3, _ = load(
            str(list_source_dir[i_other] / inst_files[2]), sr=None)

        minsize = min([y_train.size, y_inst1.size,
                       y_inst2.size, y_inst3.size])

        y_train = y_train[:minsize]
        y_inst = y_inst1[:minsize] + \
            y_inst2[:minsize] + y_inst3[:minsize]
        y_mix = y_train + y_inst
        fname = "dsd_random%02d" % count

        util.SaveSpectrogramA(y_mix, y_train, fname)

        print("Saved:" + fname)

        count += 1
예제 #3
0
def create_data(generate_high_data=False, aug=False):
    stretch = [1.1]
    for mix_dir, source_dir in zip(list_mix_dir, list_source_dir):
        assert(mix_dir.name == source_dir.name)
        fname = mix_dir.name

        print("Processing: " + fname)
        y_mix, sr = load(str(mix_dir / FILE_MIX), sr=None)
        y_train, _ = load(str(source_dir / TRAIN_TARGET), sr=None)

        assert(y_mix.shape == y_train.shape)

        util.SaveSpectrogramA(
            y_mix,
            y_train,
            fname,
            original_sr=sr,
            generate_high_data=generate_high_data)

        if aug:
            for st in stretch:
                y_mix_stretch = stretch_sound(y_mix, st)
                y_train_stretch = stretch_sound(y_train, st)
                util.SaveSpectrogramA(
                    y_mix_stretch, y_train_stretch, "%s_stretch%d" %
                    (fname, int(
                        st * 10)), original_sr=sr,)
예제 #4
0
    def read_all_audio(self):
        print('Reading audio, to collect all of it in a dictionary')
        psongs = [['01', '09', 13, 18], ['05', '08', 11,
                                         15], ['07', 15, 16, 20],
                  ['04', 10, 12, 17], ['04', 10, 12, 17], ['05', 11, 19, 20],
                  ['02', '03', '06', 14], ['07', 15, 16, 20],
                  ['05', '08', 11, 15], ['01', '09', 13,
                                         18], ['05', 11, 19, 20],
                  ['02', '03', '06',
                   14]]  # List of all singers and their songs

        for i in range(len(self.fld)):
            usr = self.fld[i]
            for snum in psongs[i]:
                file_path1 = self.root_dir + str(usr) + '/sing/' + str(
                    snum) + '.wav'
                file_path2 = self.root_dir + str(usr) + '/read/' + str(
                    snum) + '.wav'

                audio1 = core.load(file_path1, self.sr)[0]
                audio2 = core.load(file_path2, self.sr)[0]

                self.all_audio[file_path1] = audio1
                self.all_audio[file_path2] = audio2

        f = open('NUS_data_dict.pkl',
                 'wb')  # Saved and read from the project directory
        pickle.dump(self.all_audio, f)
        f.close()
        print "All audio read & stored"
예제 #5
0
def main(argv):
    os.makedirs(FLAGS.output_dir, exist_ok=True)
    ''' Initialize model '''
    unet = Unet()
    restore(net=unet, ckpt_path=FLAGS.ckpt_path)

    ''' Load data '''
    mix_wav, _ = load(FLAGS.original_wav, sr=SAMPLE_RATE)
    mix_wav_mag, mix_wav_phase = magphase(stft(mix_wav, n_fft=WINDOW_SIZE, hop_length=HOP_LENGTH))
    mix_wav_mag= mix_wav_mag[:, START:END]
    mix_wav_phase= mix_wav_phase[:, START:END]

    '''Load gt '''
    if FLAGS.gt == True:
        gt_wav, _ = load(FLAGS.original_gt, sr=SAMPLE_RATE)
        gt_wav_mag, gt_wav_phase = magphase(stft(gt_wav, n_fft=WINDOW_SIZE, hop_length=HOP_LENGTH))
        gt_wav_mag= gt_wav_mag[:, START:END]
        gt_wav_phase= gt_wav_phase[:, START:END]

    '''Save input spectrogram image and gt'''
    write_wav(FLAGS.output_dir+'original_mix.wav', 
                istft(mix_wav_mag * mix_wav_phase,win_length=WINDOW_SIZE,hop_length=HOP_LENGTH),
                SAMPLE_RATE, norm=True)
    spectogram_librosa(FLAGS.output_dir+'original_mix.wav',0)
    if FLAGS.gt == True:
        write_wav(FLAGS.output_dir+'gt.wav', 
                    istft(gt_wav_mag * gt_wav_phase,win_length=WINDOW_SIZE,hop_length=HOP_LENGTH),
                    SAMPLE_RATE, norm=True)
        spectogram_librosa(FLAGS.output_dir+'gt.wav',0)

    ''' run data '''
    inputs = mix_wav_mag[1:].reshape(1, 512, 128, 1)
    mask = unet(inputs).numpy().reshape(512, 128)
    predict = inputs.reshape(512, 128)*mask

    ''' evaluation metrics '''
    if FLAGS.gt == True:
        expand_pre = np.expand_dims(predict.flatten(), axis=0)
        expand_gt = np.expand_dims(gt_wav_mag[1:].flatten(), axis=0)
        expand_input = np.expand_dims(inputs.flatten(), axis=0)
        (SDR, SIR, SAR, _) = mir_eval.separation.bss_eval_sources(expand_gt,expand_pre)
        (SDR2, _, _, _) = mir_eval.separation.bss_eval_sources(expand_gt,expand_input)
        NSDR = SDR - SDR2 #SDR(Se, Sr) − SDR(Sm, Sr)

        fout = open(FLAGS.output_dir+'metrics.txt','a')
        print('*****SDR = '+ str(SDR) + ', SIR = '+ str(SIR) + ', SAR = '+ str(SAR) + ', NSDR = '+ str(NSDR) + '*****')
        fout.write('*****SDR = '+ str(SDR) + ', SIR = '+ str(SIR) + ', SAR = '+ str(SAR) + ', NSDR = '+ str(NSDR) + '*****')
        fout.close()

    ''' Convert model output to target magnitude '''
    target_pred_mag = np.vstack((np.zeros((128)), predict))

    ''' Write vocal prediction audio files '''
    write_wav(FLAGS.output_dir+'pred_vocal.wav', 
                istft(target_pred_mag * mix_wav_phase,win_length=WINDOW_SIZE,hop_length=HOP_LENGTH),
                SAMPLE_RATE, norm=True)

    spectogram_librosa(FLAGS.output_dir+'pred_vocal.wav',1)
예제 #6
0
    def logmel(self):
        from librosa.feature import melspectrogram
        from librosa.core import load

        logmel_params = self.config['logmel_params']
        sr = logmel_params['sr']
        n_fft = logmel_params['n_fft']
        hop_length = logmel_params['hop_length']
        n_mels = logmel_params['n_mels']

        feature_path = os.path.join(
            self.dataset['feature_path'],
            'logmel_{}_{}_{}_{}'.format(sr, n_fft, hop_length, n_mels))
        if not os.path.exists(feature_path):
            os.mkdir(feature_path)

        x_train = []
        y_train = []
        f_train = []
        for i, row in self.dataset.train_data.iterrows():
            print('[Train] {}) Getting logmels from {}...'.format(
                i, row['cur_name']),
                  end='')
            wav_name = os.path.join(self.dataset['data_path'], row['cur_name'])
            wav_data, sr = load(wav_name, sr=sr)
            x_train.append(
                melspectrogram(wav_data,
                               sr=sr,
                               n_fft=n_fft,
                               hop_length=hop_length,
                               n_mels=n_mels))
            y_train.append(self._build_multilabel(row))
            f_train.append(row['cur_name'])
            print('done.')

        x_test = []
        y_test = []
        f_test = []
        for i, row in self.dataset.test_data.iterrows():
            print('[Test] {}) Getting mels from {}...'.format(
                i, row['cur_name']),
                  end='')
            wav_name = os.path.join(self.dataset['data_path'], row['cur_name'])
            wav_data, sr = load(wav_name, sr=sr)
            x_test.append(
                melspectrogram(wav_data,
                               sr=sr,
                               n_fft=n_fft,
                               hop_length=hop_length,
                               n_mels=n_mels))
            y_test.append(self._build_multilabel(row))
            f_test.append(row['cur_name'])
            print('done')

        self._save_pickles(feature_path, x_train, y_train, f_train, x_test,
                           y_test, f_test)
예제 #7
0
def comp_lsd(ref_file, pred_file):
    ref = core.load(ref_file, sr=sr)[0]
    pred = core.load(pred_file, sr=sr)[0]
    stft_ref = np.abs(
        core.stft(ref, n_fft=nfft, hop_length=hop, win_length=wlen))
    stft_pred = np.abs(
        core.stft(pred, n_fft=nfft, hop_length=hop, win_length=wlen))
    logstft_ref = np.log(0.1 + stft_ref)
    logstft_pred = np.log(0.1 + stft_pred[:, :stft_ref.shape[1]])
    lsd = np.mean(
        np.sqrt(np.sum((logstft_ref[7:220] - logstft_pred[7:220])**2, axis=0)))
    return lsd
예제 #8
0
def main():
    # man 900 middle 750 default 500 frame_length = 900
    parser = argparse.ArgumentParser()
    parser.add_argument('-s', '--speed', type=float, default=1.)
    parser.add_argument('-t', '--time', type=float, default=-10.)
    parser.add_argument('-o', '--output', type=str, default='output.wav')
    parser.add_argument('-i', '--input', type=str, default='input.wav')

    args = parser.parse_args()

    parameters = {}

    input_filename = args.input
    output_filename = args.output

    if not os.path.isfile(input_filename):
        raise RuntimeError('no input file')

    x, fs = core.load(input_filename)
    #f0, sp, ap = pw.wav2world(x, fs)
    frame_length = 1500  #100000 // int(calculateF0(f0)) // 2 * 2
    y, sr = core.load(input_filename, sr=fs)

    onset_frames = onset.onset_detect(x,
                                      sr=sr,
                                      wait=1,
                                      pre_avg=1,
                                      post_avg=1,
                                      pre_max=1,
                                      post_max=1)
    onset_times = librosa.frames_to_time(onset_frames)

    plt.plot(y)
    for i in onset_times:
        plt.plot([i * 22050, i * 22050], [-1, 1], color="red")

    S = librosa.stft(x)
    logS = librosa.amplitude_to_db(abs(S))

    plt.savefig('woman.png')

    if args.time < 0:
        parameters['origin_time'] = core.get_duration(y, sr)
        parameters['convert_time'] = parameters['origin_time'] / args.speed
    else:
        parameters['origin_time'] = core.get_duration(y, sr)
        parameters['convert_time'] = args.time
    parameters['sample_rate'] = sr
    parameters['frame_length'] = int(fs / 22050 * frame_length)

    #if parameters['convert_time'] / parameters['origin_time'] > 0.8:
    convert_upper_threshold(input_filename, output_filename, parameters)
예제 #9
0
def test_pqmf():
    w, sr = load(WAV_FILE)

    layer = PQMF(N=4, taps=62, cutoff=0.15, beta=9.0)
    w, sr = load(WAV_FILE)
    w2 = torch.from_numpy(w[None, None, :])
    b2 = layer.analysis(w2)
    w2_ = layer.synthesis(b2)

    print(w2_.max())
    print(w2_.min())
    print(w2_.mean())
    sf.write('pqmf_output.wav', w2_.flatten().detach(), sr)
예제 #10
0
def test_pqmf():
    w, sr = load(WAV_FILE)

    layer = PQMF(N=4, taps=62, cutoff=0.15, beta=9.0)
    w, sr = load(WAV_FILE)
    w2 = tf.convert_to_tensor(w[None, None, :])
    b2 = layer.analysis(w2)
    w2_ = layer.synthesis(b2)
    w2_ = w2.numpy()

    print(w2_.max())
    print(w2_.min())
    print(w2_.mean())
    sf.write('tf_pqmf_output.wav', w2_.flatten(), sr)
예제 #11
0
    def _prepare(self, apath):
        audio, _ = load(str(apath), self.sr)
        audio = self._normalize(audio)
        audio = audio.astype(np.float32)
        audio = self._random_crop(audio)

        return audio
예제 #12
0
    def __getitem__(self, index):
        (seq, _) = load(self.file_names_wav[index], sr=None, mono=True)
        gap = max_len - seq.shape[0]
        #if not gap:
        wav_tensor = torch.cat([
                torch.LongTensor(self.hindsight) \
                    .fill_(0.),   #TODO numpy torch bridge
                utils.mu_law_encoding(
                    torch.from_numpy(seq), self.q_levels
                )
            ])
        '''
        else:
            wav_tensor = torch.cat([
                torch.LongTensor(self.hindsight) \
                        .fill_(0.),   #TODO numpy torch bridge
                    utils.mu_law_encoding(
                    torch.from_numpy(seq), self.q_levels
                    ),
                torch.LongTensor(gap).fill_(0.)
                ])

        '''

        spec_tensor = torch.from_numpy(np.load(self.file_names_spec[index], allow_pickle=False))
        #TODO add hindsight zeros to the spec_tensor

        return wav_tensor, spec_tensor
def main():

    files = glob(config.data_path+'/*.wav') # + glob('data/*.mp3') # try ffmpeg -i input.mp3 output.wav

    data, meta = [], []

    for file_id, file in enumerate(files):

        print(f'reading: {file}')

        d, sample_rate = load(file, config.sample_rate)

        data.append(d)

        # synthesis
        # signal_recons = data_to_audio(data)
        # write(f'{file.split("/")[-1]}_{file_id}.wav', config.sample_rate, signal_recons)
        # signal_recons, sample_rate = load(f'{file.split("/")[-1]}_{file_id}.wav', config.sample_rate)

    data_min = min([min(d) for d in data])
    data_max = max([max(d) for d in data])
    data = [(d-data_min)/(data_max-data_min) for d in data]

    meta.extend([data_min, data_max])

    pickle_save([data,meta], config.data_path+('.pk' if config.data_path[-3:]!='.pk' else ''))
    print('saved data.')
예제 #14
0
    def __data_generation(self, list_IDs_temp):
        #'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        Y = np.empty((self.batch_size, self.n_classes), dtype=np.bool)

        # Generate data
        for i, row in list_IDs_temp.iterrows():
            if row.path not in self.audio.keys():
                #print('{} - loading {}'.format(i, row.path))
                #sys.stdout.flush()
                aud, fs = load(row.path)
                coefs = melspectrogram(aud,
                                       sr=fs,
                                       n_fft=2**12,
                                       hop_length=2**11,
                                       n_mels=64,
                                       fmax=10000)
                self.audio[row.path] = coefs
                #print('{} - loaded!'.format(i))
                #sys.stdout.flush()
                # we've loaded one more track, add it to the counter
                self.pbar.update(1)

            start_ind = np.random.randint(low=0,
                                          high=self.audio[row.path].shape[1] -
                                          self.window)
            clip = self.audio[row.path][:, start_ind:start_ind + self.window]
            #
            # start_ind = np.random.randint(low=0,high=coefs.shape[1]-self.window)
            # clip = coefs[:,start_ind:start_ind+self.window]
            X[i, :, :, 0] = clip
            Y[i, :] = row.iloc[2:-1].values.astype(np.int64)

        return X, Y
예제 #15
0
def get_features(filename, *, winlen, winstep, n_mcep, mcep_alpha, minf0,
                 maxf0, type):
    wav, sr = load(filename, sr=None)

    # get f0
    x = wav.astype(float)
    _f0, t = world.harvest(x,
                           sr,
                           f0_floor=minf0,
                           f0_ceil=maxf0,
                           frame_period=winstep * 1000)
    f0 = world.stonemask(x, _f0, t, sr)

    window_size = int(sr * winlen)
    hop_size = int(sr * winstep)

    # get mel
    if type == 'mcc':
        spec = world.cheaptrick(x, f0, t, sr, f0_floor=minf0)
        h = sptk.sp2mc(spec, n_mcep - 1, mcep_alpha).T
    else:
        h = mfcc(x, sr, n_mfcc=n_mcep, n_fft=window_size, hop_length=hop_size)
    h = np.vstack((h, f0))
    maxlen = len(x) // hop_size + 2
    h = repeat_last_padding(h, maxlen)
    id = os.path.basename(filename).replace(".wav", "")
    return (id, x, h)
예제 #16
0
def load_wavs(file_path, sr):
    files = librosa.util.find_files(file_path, ext="wav")
    # wav 파일 불러오기 (제너레이터로 변경)
    wavs = (load(path=wav, sr=sr)[0] for wav in files)
    print('Wave Loading Complete')

    return wavs
예제 #17
0
def compute_features(data_loc='../data/genres/'):
    file_names = glob.glob(data_loc + '*/*.au')
    file_names.sort()

    assert len(
        file_names
    ) == 1000, "ERROR: Couldn't read files properly. Is your data_loc correct?"

    # Setup some vars
    sampleRate = 22050
    n_fft = 1024

    X = []
    genres_list = list(song_labels_dic.keys())
    genres_list.sort()
    genre_flag = 0

    if not os.path.exists('../ckpt'):
        os.makedirs('../ckpt')

        for file in file_names:
            song, _ = lc.load(file)
            song_dft = np.abs(lc.stft(song, n_fft=n_fft))
            X.append(song_dft)
            if len(X) == 100:
                print('Writing: ' + genres_list[genre_flag] + '.pkl file...')
                with open('../ckpt/' + genres_list[genre_flag] + '.pkl',
                          'wb') as f:
                    pickle.dump(X, f)
                X = []
                genre_flag = genre_flag + 1

    return True
예제 #18
0
def plot_audio(filepath):
	x, fs = load(filepath, sr = None, mono = True)
	plt.figure(figsize=(16,4))
	waveplot(x,sr=fs)
	plt.title("Waveform for {}".format(filepath))
	plt.tight_layout()
	plt.show()
예제 #19
0
def load_wavs(file_path, sr):
    wavs = []
    file = librosa.util.find_files(file_path, ext="wav")
    for wav in file:
        audio, _ = load(path=wav, sr=sr)
        wavs.append(audio)
    return wavs
예제 #20
0
 def process_sounds(self):
     '''
     processes dowloaded files below self.root after running download_files().
     DEPRECATED Don't use this for the pretrained VGGish!
     TODO: this should go to preprocessing if kept at all
     '''
     self.info_df = self.df[['gen', 'id']].copy()
     for path, dirs, files in os.walk(self.root):
         for file in files:
             if file.endswith('.mp3'):
                 y, sr = load(os.path.join(path, file))
                 if self.convert_to_wav:
                     write_wav(
                         os.path.join(path, file.replace('.mp3', '.wav')),
                         y, self.input_sr)
                 if self.make_mel_spec:
                     S = librosa.feature.melspectrogram(
                         y,
                         sr=self.sr,
                         n_mels=self.n_mels,
                         hop_length=self.hop_length)
                     log_S = librosa.amplitude_to_db(S, ref=np.max)
                     np.save(os.path.join(path, 'mel_spec.npy'), log_S)
                     if self.save_img:
                         scipy.misc.imsave(
                             os.path.join(path, 'mel_spec.jpg'), log_S)
                     if self.extract_chunks:
                         if log_S.shape[1] < self.len_chunks:
                             print(
                                 'recording {} has length {} which is shorter \
                                     than required chunk length.')
                             continue
                         self.spec_chunks(log_S, path=path)
     self.info_df.to_csv(os.path.join(self.root, 'info.csv'), sep='\t')
예제 #21
0
파일: preprocess.py 프로젝트: atliSig/LAS
def load_wav(file_path: str) -> Tuple[int, np.ndarray]:
    '''
    reads in a .wav file, returns 
    the sample rate and signal
    '''
    y, sample_rate = load(file_path)
    return sample_rate, y
def calculate_spectrograms(audio_dir, out_dir, file_type='.mp3'):
    files = glob.glob(os.path.join(audio_dir, '*' + file_type))
    num_files = len(files)

    print(f'{num_files} audio files found')

    if not os.path.exists(out_dir):
        os.mkdir(out_dir)

    for i, file_name in enumerate(sorted(files)):
        start_time = time.time()
        track_name = os.path.basename(file_name)
        track_id = os.path.splitext(track_name)[0]
        try:
            song_name = track_to_song[track_id]
        except KeyError:
            continue
        if song_name in wmf_item2i.keys():
            audio_file = os.path.join(audio_dir,
                                      track_name)
            out_file = os.path.join(out_dir, track_id) + '.npy'
            if not os.path.exists(out_file):
                y, sr = load(audio_file)
                mel_spectrogram = melspectrogram(y=y, sr=sr, n_fft=1024, hop_length=512, n_mels=128)

    wmf_item2i = pickle.load(open('../../index_dicts.pkl', 'rb'))['item2i']
    track_to_song = pickle.load(open('../../track_to_song.pkl', 'rb'))
    calculate_spectrograms(audio_dir='../../data/MillionSongSubset/audio', out_dir='../../data/MillionSongSubset/spectrograms')
예제 #23
0
def mp3_spec_file(filename):
    x, sr = load(filename)
    S = librosa.stft(x, N_FFT)
    p = np.angle(S)

    S = np.log1p(np.abs(S))
    return S, sr
예제 #24
0
    def one_wave_load_func(path):
        wave = load(f'{data_dir}/{path[0]}', sr=sr)[0]
        wave = cut_pad_wave(wave, const_length)

        assert wave.shape[
            0] == const_length, f'{wave.shape[0]}, {const_length}'
        return wave.reshape((1, -1))
def get_beat_sync_spectrums(audio):
    """
    Returns a beat-sync 3-energy-band spectrogram
    :param audio: Path to the song
    :return: Array containing energy in band1, band2, band3
    """
    y, sr = core.load(audio, sr=44100)
    eql_y = EqualLoudness()(y)
    tempo, framed_dbn = self_tempo_estimation(y, sr)
    np.append(framed_dbn, np.array(len(y)/sr))
    band1 = (0, 220)
    band2 = (220, 1760)
    band3 = (1760, sr / 2)
    band1list = []
    band2list = []
    band3list = []
    for i in range(1, len(framed_dbn)):
        fft_eq = abs(np.fft.fft(eql_y[int(framed_dbn[i - 1] * sr):int(framed_dbn[i] * sr)]))
        freqs = np.fft.fftfreq(len(fft_eq), 1 / sr)
        band1list.append(np.sqrt(np.mean(sum(fft_eq[np.where(np.logical_and(freqs > band1[0], freqs < band1[1]))]**2))))
        band2list.append(np.sqrt(np.mean(sum(fft_eq[np.where(np.logical_and(freqs > band2[0], freqs < band2[1]))]**2))))
        band3list.append(np.sqrt(np.mean(sum(fft_eq[np.where(np.logical_and(freqs > band3[0], freqs < band3[1]))]**2))))

    band1list = np.array(band1list).transpose()
    band2list = np.array(band2list).transpose()
    band3list = np.array(band3list).transpose()
    return np.vstack([band1list, band2list, band3list])
예제 #26
0
def generate_cqt(i, file_path, offset=0, duration=None):
    print('[{}] Opening'.format(i), file_path)
    data, sample_rate = load(file_path,
                             sr=None,
                             offset=offset,
                             duration=duration)
    print('[{}] Sample Rate:'.format(i), sample_rate, 'shape:', data.shape)

    if len(data.shape) == 2:
        with Timer('[{}] Converted to mono'.format(i)):
            print('[{}] Converting to mono channel...'.format(i))
            data = to_mono(data)

    with Timer('[{}] Resampling'.format(i)):
        print('[{}] Resampling to'.format(i), TARGET_SAMPLE_RATE, 'Hz...')
        downsampled_data = resample(data,
                                    orig_sr=sample_rate,
                                    target_sr=TARGET_SAMPLE_RATE)
        # downsampled_data = data
        print('[{}] Downsampled to'.format(i), TARGET_SAMPLE_RATE,
              'Hz shape is now', downsampled_data.shape)

    with Timer('[{}] CQT'.format(i)):
        print('[{}] Generating CQT...'.format(i))
        cqt_result = np.abs(
            cqt(downsampled_data,
                sr=TARGET_SAMPLE_RATE,
                hop_length=HOP_LENGTH,
                n_bins=TOTAL_BINS,
                bins_per_octave=BINS_PER_OCTAVE))

    return cqt_result
예제 #27
0
def LoadAudio(fname):
    y, sr = load(fname, sr=C.SR)
    spec = stft(y, n_fft=C.FFT_SIZE, hop_length=C.H, win_length=C.FFT_SIZE)
    mag = np.abs(spec)
    mag /= np.max(mag)
    phase = np.exp(1.j * np.angle(spec))
    return mag, phase
예제 #28
0
def train():

    print('Loading dataset: {} ...'.format(WAV_FILE))

    samples, _ = libcore.load(WAV_FILE, sr=SAMPLING_RATE)
    power = np.mean(samples ** 2) * 0.5

    print('Sampling training set nb_samples={}, size=({},{}) ...'.format(TS_SIZE, SEQ_LEN, INPUT_DIM))
    training_set = np.array([sample_chunk(samples, power).T for _ in range(TS_SIZE)])

    print('Constructing autoencoder ...')

    inputs = Input(shape=(SEQ_LEN, INPUT_DIM))
    enc_1 = GRU(128)(inputs)
    features = RepeatVector(SEQ_LEN)(enc_1)
    dec_0 = GRU(128, return_sequences=True)(features)
    dec_1 = GRU(INPUT_DIM, return_sequences=True)(dec_0)
    autoencoder = Model(inputs, dec_1)

    autoencoder.summary()
    autoencoder.compile(optimizer='rmsprop', loss='mse')

    model_cb = ModelCheckpoint(WEIGHT_FILE_PATTERN, monitor='val_loss', verbose=0,
                               save_best_only=False, save_weights_only=False, mode='auto', period=SAVE_AFTER)

    print('Training autoencoder for {} epochs. Save each {}th epoch ...'.format(T_EPOCHS, SAVE_AFTER))

    history = autoencoder.fit(training_set, training_set, nb_epoch=T_EPOCHS, validation_split=0.1, callbacks=[model_cb])
def LoadAudio(fname):
    y, sr = load(fname, sr=C.SR)
    spec = stft(y, n_fft=C.FFT_SIZE, hop_length=C.H, win_length=C.FFT_SIZE)
    mag = np.abs(spec)
    mag /= np.max(mag)
    phase = np.exp(1.j*np.angle(spec))
    return mag, phase
예제 #30
0
    def make_blocking_data(self):
        xData, yData = list(), list()
        path = self.featurePath + self.name + '/'
        for j, filename in enumerate(os.listdir(path)):
            print(f"{self.name} {filename} ({j + 1})")
            WavPath = path + filename
            y, sr = load(WavPath, mono=True)
            S = melspectrogram(y, sr).T
            S = S[:-1 * (S.shape[0] % 128)]
            num_chunk = S.shape[0] / 128
            data_chunks = np.split(S, num_chunk)
            xChunks, yChunks = list(), list()
            for unit in data_chunks:
                xChunks.append(unit)
                yChunks.append(self.labelDict[self.name])
            xData.append(xChunks)
            yData.append(yChunks)
        xData = [unit for record in xData for unit in record]
        yData = [unit for record in yData for unit in record]

        self.features = torch.tensor(data=xData, device=device)
        self.labels = torch.tensor(data=yData, device=device)
        print(self.features.shape)
        print(self.labels.shape)
        self.x_cat_data.append(self.features)
        self.y_cat_data.append(self.labels)
        return
예제 #31
0
def load_audio(fname):
    y = load(fname, sr=16000)[0]
    spec = stft(y, n_fft=1024, hop_length=512, win_length=1024)
    spec = np.pad(spec, [(0, 0), (0, 1024 - spec.shape[1] % 1024)], 'constant')
    mag = np.abs(spec)
    mag /= np.max(mag)
    phase = np.exp(1.j * np.angle(spec))
    return mag, phase, y.shape[0]
예제 #32
0
파일: beatmatch.py 프로젝트: djdapz/autoDJ
def mix_maker(playlist, rootDir):
    """
    Creates a seamless mix of all the songs in a playlist. Songs crossfade into one another. The function assumes
    similar BPM and a sample rate of 44100 Hz.
    
    Input Parameters
    ------------------------
    
    playlist: list of paths to files containing songs for analysis and mixing. 
    
    
    Returns
    ------------------------
    
    a continuous mix of all songs as one audiofile. 
    """

 
    #sample_list = np.zeros(len(playlist), dtype = object)
    #mix = []
    if '.mp3' in playlist[0]:
    	print(1)
    else:
    	playlist = playlist[1:]
    playlist_length = len(playlist)
    print('iteration 0:')

    samples, sr = load(rootDir+"/" +playlist[0], 44100)
    mix = samples
    playlist = playlist[1:]
    playlist_length = len(playlist)

    x = 0
    while playlist_length > 0:
    	print("iteration: ")
    	x = x+1
    	print(x)
    	print('playlist_length')
    	print(len(playlist))
        samples, sr = load(rootDir+"/" +playlist[0],44100)
        mix = beat_match(mix, samples, sr)
        playlist = playlist[1:]
        playlist_length = len(playlist)
        
    
    return mix
예제 #33
0
    def __computedata(self, path, samplemetadata):
        meta, pitch = samplemetadata

        audiodat = lrco.load(join(path, meta[0]), sr=self.sr,
                             offset=meta[1], duration=meta[2])
        audiodat = ExtractMonoAudioFiles.featurefunc(*audiodat).T

#        pitchvect = np.array([pitch] * audiodat.shape[0])

        #return (audiodat, pitch)
        return {'features': audiodat, 'label': pitch}
list_source_dir.extend([os.path.join(PATH_DSD_SOURCE[1], f)
                        for f in os.listdir(PATH_DSD_SOURCE[1])])
list_source_dir = sorted(list_source_dir)

list_mix_dir = [os.path.join(PATH_DSD_MIXTURE[0], f)
                for f in os.listdir(PATH_DSD_MIXTURE[0])]
list_mix_dir.extend([os.path.join(PATH_DSD_MIXTURE[1], f)
                     for f in os.listdir(PATH_DSD_MIXTURE[1])])
list_mix_dir = sorted(list_mix_dir)


for mix_dir, source_dir in zip(list_mix_dir,  list_source_dir):
    assert(mix_dir.split("/")[-1] == source_dir.split("/")[-1])
    fname = mix_dir.split("/")[-1]
    print("Processing: " + fname)
    y_mix, sr = load(os.path.join(mix_dir, FILE_MIX), sr=None)
    y_vocal, _ = load(os.path.join(source_dir, FILE_VOCAL), sr=None)
    y_inst = sum([load(os.path.join(source_dir, f), sr=None)[0]
                  for f in [FILE_DRUMS, FILE_BASS, FILE_OTHER]])

    assert(y_mix.shape == y_vocal.shape)
    assert(y_mix.shape == y_inst.shape)

    util.SaveSpectrogram(y_mix, y_vocal, y_inst, fname)


rand_voc = np.random.randint(100, size=50)
rand_bass = np.random.randint(100, size=50)
rand_drums = np.random.randint(100, size=50)
rand_other = np.random.randint(100, size=50)
예제 #35
0
파일: test.py 프로젝트: djdapz/autoDJ
def main():
	samples, sr = load(rootDir+"/" +playlist[0], 44100)
	song2 = fade(sample, type = "in", end = beat2[32])
예제 #36
0
from os import path
import numpy as np
import soundfile as sf
from audio_helpers import play_audio
from librosa.effects import pitch_shift
from librosa.core import load
from librosa.output import write_wav


DIR = 'data/wave'
fn = '2.wav'
base_name, ext = path.splitext(fn)
FN = path.join(DIR, fn)
FN_NEW = path.join(DIR, '{}_shifted{}'.format(base_name, ext))

x, fs = load(FN)
print "Script loaded file with fs {}".format(fs)


def to_pcm(x):
    max_val = np.iinfo(np.int16).max
    return (x * max_val).astype(np.int16)


shifted = pitch_shift(x, fs, 2)
sf.write(FN_NEW, shifted, fs, subtype="PCM_24")
play_audio(FN_NEW)
예제 #37
0
# -*- coding: utf-8 -*-
"""
Created on Sat May  7 16:45:29 2016

@author: parallels
"""

import functions
from librosa.util import find_files
from librosa.core import load

audiofilelist = find_files("database/audios/",ext = "wav")
print "saving peaks...."
for audiofile in audiofilelist:
    y,sr = load(audiofile)
    filename = audiofile.split("/")[-1]+".npy" # -1 means the last name of the directory
    functions.save_maximum_array(y,filename)
    print "saved:" + filename
예제 #38
0
# -*- coding: utf-8 -*-
"""
Created on Sat May  7 13:51:42 2016

@author: parallels
"""
import numpy as np
from librosa.core import load,stft
import matplotlib.pyplot as plt
from librosa.display import specshow
import functions
 #from scipy.spatial.distance import euclidean

y,sr = load("wiwym.wav")
rec,sr = load("recording.wav")
y = y[:sr*30]

 spec = np.abs(stft(y,n_fft = 4960,hop_length = 512))
    query = np.abs(stft(rec,n_fft = 4960,hop_length = 512))
    maximum_spec = find_peak(spec,30)
    maximum_query = find_peak(query,30)

plt.plot(overlap)
    stem_inst = []
    stems_path = os.path.join(PATH_MENDLEY, songname, data["stem_dir"])
    mixfilename = data["mix_filename"]
    for s in data["stems"]:
        stem = data["stems"][s]
        fname = stem["filename"]

        print(
            "stem: %s %s %s" % (fname, stem["component"], stem["instrument"]))
        if (stem["instrument"].find("male") >= 0) or \
                (stem["instrument"].find("singer") > 0):
            stem_voc.append(fname)
            all_voctracks.append(fname)
            print("Is vocal!")
        else:
            stem_inst.append(fname)
            all_insttracks.append(fname)

    print("detected vocals:")
    print(stem_voc)
    if (len(stem_voc) == 0) or (len(stem_inst) == 0):
        print("empty vocal or inst...skip")
        continue
    audio_vocal = sum([load(os.path.join(stems_path, f), sr=None, mono=True)[0]
                       for f in stem_voc])
    audio_inst = sum([load(os.path.join(stems_path, f), sr=None, mono=True)[0]
                      for f in stem_inst])
    audio_mix, _ = load(os.path.join(PATH_MENDLEY, songname, mixfilename),
                        sr=None, mono=True)
    util.SaveSpectrogram(audio_mix, audio_vocal, audio_inst, songname)