Example #1
0
def test_file_loading():
    # initiate dsdtools

    dsd = dsdtools.DB(root_dir="data/DSD100subset")
    tracks = dsd.load_dsd_tracks()

    assert len(tracks) == 4

    for track in tracks:
        assert track.audio.shape[1] > 0

    # load only the dev set
    tracks = dsd.load_dsd_tracks(subsets='Dev')

    assert len(tracks) == 2

    # load only the dev set
    tracks = dsd.load_dsd_tracks(subsets=['Dev', 'Test'])

    assert len(tracks) == 4

    # load only a single id
    tracks = dsd.load_dsd_tracks(ids=55)

    assert len(tracks) == 1
Example #2
0
def test_evaluate(method):

    dsd = dsdtools.DB(root_dir="data/DSD100subset", evaluation=method)

    # process dsd but do not save the results
    assert dsd.evaluate(
        user_function=user_function1,
        ids=55
    )
 def __init__(self, root_dir=CONSTS.DB_PATH, targets=CONSTS.TARGETS):
     self.DB = dsdtools.DB(root_dir=root_dir)
     self.dev_set = self.DB.load_dsd_tracks(subsets='Dev')
     self.test_set = self.DB.load_dsd_tracks(subsets='Test')
     self.targets = targets
     self.models = [
         model_constructor.make_compile_model(target)
         for target in self.targets
     ]
     self.models_input = processing.make_input(self.dev_set)
Example #4
0
def test_file_loading_valid():
    # initiate dsdtools

    dsd = dsdtools.DB(root_dir="data/DSD100subset", valid_ids=55)
    tracks = dsd.load_dsd_tracks(subsets='Dev')
    # from two tracks there is only one track left (id=81)
    assert len(tracks) == 1
    assert tracks[0].id == 81

    tracks = dsd.load_dsd_tracks(subsets='Valid')
    assert len(tracks) == 1
    assert tracks[0].id == 55

    with pytest.raises(ValueError):
        tracks = dsd.load_dsd_tracks(subsets=['Dev', 'Valid'])
Example #5
0
def musdb2stft(save_dir='./data/', target='wo_vocals'):
    """
    create stft dataset from MUSDB18
    :param save_dir: the path for the wanted data directory
    :param target: music target. for the original setup in the paper run twice, \
                   once for 'wo_vocals' and once for 'mixture'
    """
    if opts.dataset == 'MUSDB18':
        mus = musdb.DB(root_dir='./dataset/musdb18')
        tracks = mus.load_mus_tracks(subsets='train')
    elif opts.dataset == 'DSD100':
        dsd = dsdtools.DB(root_dir='./dataset/DSD100')
        tracks = dsd.load_dsd_tracks(subsets='Dev')
    else:
        sys.exit("Only support DSD100|MUSDB18")

    save_dir = os.path.join(save_dir, target)
    if not os.path.exists(save_dir):
        print("Creating directory: {}".format(save_dir))
        os.makedirs(save_dir)
    for track in tqdm(tracks):

        music = switch_music(target, track)
        # resample audio file to 20480 Hz and create mono audio array from both channels
        music = music.mean(axis=1)
        samplerate = 20480
        music = librosa.resample(music, track.rate, samplerate)

        # parameters setting, resulting images of 257*256
        fft_size = 512
        hopsamp = fft_size // 8
        part_duration = 255 / (samplerate / hopsamp)
        length_song = music.shape[0]
        batch_size = round(samplerate * part_duration)
        counter = 1

        # data augmentation, taking 0.8 sec of audio from to create stft with different start time
        for shift_duration in tqdm([0, 0.3, 0.6]):
            shift_len = round(shift_duration * samplerate)
            number_of_parts = int(
                np.floor((length_song - shift_len) / batch_size))
            data = music[shift_len:number_of_parts * batch_size + shift_len]
            data2 = data.reshape(number_of_parts,
                                 int(data.size / number_of_parts))
            for row in data2:
                stft_full = librosa.core.stft(row,
                                              n_fft=fft_size,
                                              hop_length=hopsamp,
                                              win_length=fft_size)
                stft_full = stft_full[0:-1, :]
                stft_mag = abs(stft_full)
                stft_mag = np.interp(stft_mag,
                                     (stft_mag.min(), stft_mag.max()),
                                     (-0, +1))
                stft_mag = (stft_mag**0.3) * 255
                stft_mag = stft_mag.astype('uint8')
                if opts.dataset == 'MUSDB18':
                    songname = track.name + '_' + str(counter)
                else:
                    songname = track.filename + '_' + str(counter)
                songname = os.path.join(save_dir, songname)
                if stft_mag.sum() != 0:
                    imageio.imwrite(songname + '.png', stft_mag)
                counter += 1
Example #6
0
        # multiply mask
        Yj = np.multiply(X, Mask)

        # inverte to time domain and set same length as original mixture
        target_estimate = istft(Yj)[1].T[:N,:]

        # set this as the source estimate
        estimates[target_name] = target_estimate

        # accumulate to the accompaniment if this is not vocals
        if target_name != 'vocals':
            accompaniment_source += target_estimate
    # set accompaniment source
    estimates['accompaniment']=accompaniment_source

    return estimates

# initiate dsdtools
dsd = dsdtools.DB()

#default parameters
alpha = 1 #exponent for the ratio Mask
theta = 0.5 # threshold

dsd.run(
    functools.partial(IBM, alpha=alpha,theta=theta),
    estimates_dir='IBM',
    parallel=True,
    cpus=4
)
    Voice.y = rs.resample(Voice.y, fs / 2, fs)
    Voice.y = Voice.y[0:numSamples]

    mS = np.multiply(SongMask, Mix.mX)
    Song.y = SM.istft(mS, Mix.pX, Parm)
    Song.y = rs.resample(Song.y, fs / 2, fs)
    Song.y = Song.y[0:numSamples]

    voc_array[:, 1] = Voice.y
    acc_array[:, 1] = Song.y

    # return any number of targets
    estimates = {
        'vocals': voc_array,
        'accompaniment': acc_array,
    }
    return estimates


# initiate dsdtools
dsd = dsdtools.DB(root_dir="./Wavfile")

# verify if my_function works correctly
if dsd.test(my_function):
    print("my_function is valid")

dsd.run(
    my_function,
    estimates_dir='./Audio/CNN_015',
)
Example #8
0
import dsdtools
import gc
import keras

from preprocessing import *
from consts import *
from models import *

dsd = dsdtools.DB(root_dir=DB_PATH)
dev_set = dsd.load_dsd_tracks(subsets='Dev')
test_set = dsd.load_dsd_tracks(subsets='Test')
targets = TARGETS
models = list()
for t in targets:
    models.append(APModel(t))
models_input = processInput(dev_set)


def trainModel(target,
               epochs=EPOCHS,
               batch_size=BATCH_SIZE,
               save_epochs=False):
    model = models[targets.index(target)]
    model_target = processTarget(dev_set, target)
    checkpointer = keras.callbacks.ModelCheckpoint(model.name + '_DNN_model',
                                                   monitor='val_loss',
                                                   verbose=0,
                                                   save_best_only=False,
                                                   save_weights_only=False,
                                                   mode='auto',
                                                   period=1)
Example #9
0
def test_stft_dsd(config,
                  checkpoint_dir,
                  output_folder='./outputs/',
                  method_name='abl_1',
                  target='vocals',
                  is_test=False):
    """
    Testing/evaluating the net. For given generator, pruduces sdr, sir and sar for DSD100 dataset.
    :param config: path to config file
    :param checkpoint_dir: checkpoint_dir: path to generator's saved parameters. In case of evaluating during training,
    path to checkpoints directory.
    :param output_folder: desired output path
    :param method_name: name of method
    :param target: desired target (vocals/drums/bass)
    :param is_test: flag, when running during training should be False. True for total DSD100 evaluation for a given checkpoint.
    :return: stat parameters for the net, added to tensorboard.
    """
    dsd = dsdtools.DB(root_dir='../../data/datasets/music/DSD100')
    tracks = dsd.load_dsd_tracks(subsets='Test')

    config = get_config(config)
    trainer = MUSIC_Trainer(config)
    enhance = 9

    if ~is_test:
        tracks = [tracks[i] for i in [17, 41, 6, 23, 31]]
    last_gen_name = get_model_list(checkpoint_dir, "gen")
    state_dict = torch.load(last_gen_name)
    trainer.gen.load_state_dict(state_dict['gen'])
    trainer.cuda()
    trainer.eval()
    encode, decode = trainer.gen.encode, trainer.gen.decode

    recon_list = []
    sdr_list, sir_list, sar_list = [], [], []
    sdr_list_inter, sir_list_inter, sar_list_inter = [], [], []

    print(method_name)
    for track in tqdm(tracks):
        sample_rate = 20480
        music_array, music_array_ref, music_array_inter = music_track(
            track, target)
        music_array_samp = librosa.resample(music_array.transpose(),
                                            track.rate, sample_rate)

        masker_l = GenerateMusic(music_array_samp[0, :],
                                 encode,
                                 decode,
                                 enhance=enhance)  # default was 8
        recon_vocals_l, recon_inter_l = masker_l.forward()
        masker_r = GenerateMusic(music_array_samp[1, :],
                                 encode,
                                 decode,
                                 enhance=enhance)
        recon_vocals_r, recon_inter_r = masker_r.forward()

        recon_vocals = np.vstack((recon_vocals_l, recon_vocals_r))
        recon_inter = np.vstack((recon_inter_l, recon_inter_r))
        recon_vocals = librosa.resample(recon_vocals, sample_rate, track.rate)
        recon_inter = librosa.resample(recon_inter, sample_rate, track.rate)

        recon_vocals = recon_vocals.transpose()
        recon_inter = recon_inter.transpose()
        recon_list.append(recon_vocals)

        if len(music_array_ref) > len(recon_vocals):
            len_diff = len(music_array_ref) - len(recon_vocals)
            recon_vocals = np.concatenate(
                (recon_vocals, recon_vocals[-len_diff:, :]))
            recon_inter = np.concatenate(
                (recon_inter, recon_inter[-len_diff:, :]))
        elif len(music_array_ref) < len(recon_vocals):
            recon_vocals = recon_vocals[0:len(music_array_ref), :]
            recon_inter = recon_inter[0:len(music_array_ref), :]

        reference_music = np.array([music_array_ref, music_array_inter])
        estimates_music = np.array([recon_vocals, recon_inter])

        sdr_b, _, sir_b, sar_b, _ = museval.metrics.bss_eval_images_framewise(
            reference_music, estimates_music, window=1323000, hop=661500)
        sdr, sir, sar = sdr_b[0], sir_b[0], sar_b[0]
        sdr_inter, sir_inter, sar_inter = sdr_b[1], sir_b[1], sar_b[1]
        sdr, sir, sar = np.mean(sdr[~np.isnan(sdr)]), np.mean(
            sir[~np.isnan(sir)]), np.mean(sar[~np.isnan(sar)])
        sdr_inter, sir_inter, sar_inter = np.mean(sdr_inter[~np.isnan(sdr_inter)]), \
                                          np.mean(sir_inter[~np.isnan(sir_inter)]), np.mean(sar_inter[~np.isnan(sar_inter)])

        sdr_list.append(sdr), sir_list.append(sir), sar_list.append(sar)
        sdr_list_inter.append(sdr_inter), sir_list_inter.append(
            sir_inter), sar_list_inter.append(sar_inter)

    sdr_max = max(sdr_list)
    sdr_max_loc = sdr_list.index(sdr_max)
    sir_max = max(sir_list)
    sir_max_loc = sir_list.index(sir_max)

    sdr_median, sir_median, sar_median = np.median(sdr_list), np.median(
        sir_list), np.median(sar_list)
    sdr_median_inter, sir_median_inter, sar_median_inter = np.median(sdr_list_inter), \
                                                           np.median(sir_list_inter), np.median(sar_list_inter)

    output_folder = output_folder + method_name
    if not os.path.exists(output_folder):
        print("Creating directory: {}".format(output_folder))
        os.makedirs(output_folder)

    stats = [
        sdr_median, sir_median, sar_median, sdr_median_inter, sir_median_inter,
        sar_median_inter
    ]
    stats_name = [
        'sdr_median', 'sir_median', 'sar_median', 'sdr_median_inter',
        'sir_median_inter', 'sar_median_inter'
    ]
    with open(os.path.join(output_folder, 'stats_final_test.txt'), 'w') as f:
        for stat_name, stat in zip(stats_name, stats):
            f.write("%s\n" % stat_name), f.write("%s\n" % stat)
            print(stat_name + ': ' + str(stat))
    stats_dic = {
        'sdr': sdr_list,
        'sir': sir_list,
        'sar': sar_list,
        'sdr_inter': sdr_list_inter,
        'sir_inter': sir_list_inter,
        'sar_inter': sar_list_inter
    }
    outfile = os.path.join(output_folder, 'final_results')
    save_obj(stats_dic, outfile)

    music_2_write = recon_list[sdr_max_loc]
    music_2_write_sec = recon_list[sir_max_loc]
    sf.write(
        os.path.join(output_folder,
                     'best_sdr_iter_' + tracks[sdr_max_loc].filename + '.wav'),
        music_2_write, track.rate)
    sf.write(
        os.path.join(output_folder,
                     'best_sir_iter_' + tracks[sir_max_loc].filename + '.wav'),
        music_2_write_sec, track.rate)
    return sdr_median, sir_median, sar_median, sdr_max, sir_max
Example #10
0
def dsd(request):
    return dsdtools.DB(root_dir=request.param)
Example #11
0
            elif instrument == 'drums':
                test_X.append(banks)
                test_y.append(1)
            elif instrument == 'bass':
                test_X.append(banks)
                test_y.append(2)

            i += SAMPLE_LENGTH


if __name__ == '__main__':
    parser = ArgumentParser()
    parser.add_argument('-f', dest='PATH')
    args = parser.parse_args()

    dsd = dsdtools.DB(root_dir=args.PATH)
    dsd.run(process_train_track, subsets='Dev')

    print("Processing training data")
    train_X, val_X, train_y, val_y = train_test_split(train_X,
                                                      train_y,
                                                      test_size=.2,
                                                      random_state=6)
    train_X = np.array(train_X)
    val_X = np.array(val_X)

    np.save('data/train_X.npy', train_X)
    np.save('data/train_y.npy', train_y)
    np.save('data/val_X.npy', val_X)
    np.save('data/val_y.npy', val_y)
    print("Saved training data")
Example #12
0
def test_env(path):

    if path is not None:
        os.environ["DSD_PATH"] = path

    assert dsdtools.DB()