Ejemplo n.º 1
0
def main():
    import sys
    sys.path.append("../lib")
    import time
    import config as cfg
    import multiprocessing as mp
    import simpleopt
    create_lock()
    dataset_type = simpleopt.get("dataset")
    with mp.Pool(processes=6) as pool:
        cfg.pool = pool
        pre_fn()
        l = []
        for gene in sys.argv[4:]:
            try:
                print("* Acquire GPU...")
                cfg.lock.acquire()
                print("* Got GPU!")
                t = time.time()
                r = eval_single(gene)
                t = time.time() - t
                l.append((gene, *r, t))
            finally:
                cfg.lock.release()
        clean_fn()
        for i, (gene, sdr, neg_gflops, neg_mega_pc, valid_sdr,
                t) in enumerate(l):
            print(
                "[%d/%d] gene=%s, sdr=%f, neg_gflops=%f, neg_mega_pc=%f, valid_sdr=%f, time=%f"
                %
                (i, len(l), gene, sdr, neg_gflops, neg_mega_pc, valid_sdr, t))
Ejemplo n.º 2
0
def main():
  import sys
  sys.path.append("../lib")
  import simpleopt
  n_ch = int(simpleopt.get("ch"))
  total_parameters, n_forward_flop = count(None, None, n_ch)
  print("Trainable parameters: {:,}".format(total_parameters,))
  print("Forward FLOPs: {:,}".format(n_forward_flop,))
Ejemplo n.º 3
0
def load_config(gene_ver=None, gene_value=None):
    import config as cfg
    if not os.path.exists(ckpt_root_path):
        os.makedirs(ckpt_root_path, exist_ok=True)
    if gene_ver is None:
        gene_ver = simpleopt.get("ver")
    if gene_value is None:
        gene_value = int(simpleopt.get("gene"))
    if gene_value is None:
        gene_value = int(simpleopt.get("gene"))

    class DSD2Config:
        ckpt_name = "dsd2_%s_%d" % (
            gene_ver,
            gene_value,
        )
        batch_size = 1 if gene_ver in ("sa", ) else 3
        ckpt_path = os.path.join(ckpt_root_path, ckpt_name)
        max_lr = 1e-4 if gene_ver in (
            "sa",
            "sa2",
        ) and gene_value in (4, ) else 3e-4
        min_lr = 1e-5
        first_decay_period = 10000 * (3 // batch_size)
        final_step = 630001 * (3 // batch_size)
        ckpt_step = 10000 * (3 // batch_size)

    class MIR2Config:
        ckpt_name = "mir2_%s_%d" % (
            gene_ver,
            gene_value,
        )
        batch_size = 1 if gene_ver in ("sa", ) else 3
        ckpt_path = os.path.join(ckpt_root_path, ckpt_name)
        max_lr = 3e-4
        min_lr = 1e-5
        first_decay_period = 1000 * (3 // batch_size)
        final_step = 63001 * (3 // batch_size)
        ckpt_step = 1000 * (3 // batch_size)

    class MUS2FConfig:
        ckpt_name = "mus2f_%s_%d" % (
            gene_ver,
            gene_value,
        )
        batch_size = 1 if gene_ver in ("sa", ) else 3
        ckpt_path = os.path.join(ckpt_root_path, ckpt_name)
        max_lr = 1e-4 if gene_ver in (
            "sa",
            "sa2",
        ) and gene_value in (4, ) else 2e-4  # FFFFFFFFFF
        min_lr = 1e-5
        first_decay_period = 10000 * (3 // batch_size)
        final_step = 630001 * (3 // batch_size)
        ckpt_step = 10000 * (3 // batch_size)

    cfg.gene_ver = gene_ver
    cfg.gene_value = gene_value
    cfg.DSD2Config = DSD2Config
    cfg.MIR2Config = MIR2Config
    cfg.MUS2FConfig = MUS2FConfig
Ejemplo n.º 4
0
import sys, os
sys.path.append("../lib")
sys.path.append("../srv")
import numpy as np
import pylab as pl
import scipy.interpolate as ipl
import geneop
import simpleopt

breakpoint_path = simpleopt.get("input")
dataset_type = simpleopt.get("dataset")
score_type = simpleopt.get("score")
assert score_type in ("test", "valid")
init_status_path = os.path.join(breakpoint_path, "20_gen_init.pickle")
gen_status_path = os.path.join(breakpoint_path, "30_gen_%04d.pickle")

n_gen = 9999

max_sdr_list = []
mean_sdr_list = []
sdr_25_list = []
sdr_50_list = []
sdr_75_list = []

min_param_list = []
mean_param_list = []
param_25_list = []
param_50_list = []
param_75_list = []

Ejemplo n.º 5
0
def pre_fn():
    import simpleopt
    dataset_type = simpleopt.get("dataset")
    gene_type = simpleopt.get("ver")
    geneop.load_type(gene_type)
    cfg.n = {
        "v1": 142,
        "v1pe": 142,
        "v1f16": 142,
    }[gene_type]
    if dataset_type == "mus2":
        '''cfg.worker_config = {
      "result_format": ("sdr", "neg_gflops", "neg_mega_pc", "valid_sdr"),
      "batch_size": 3,
      "max_lr": 3e-4,
      "min_lr": 1e-4,
      "warmup_fac": 0.3,
      "first_lr_period": 10000,
      "warmup_period": 1000,
      "n_hop_per_sample": 64,
      "work_sr": 16000,
      "hop_size": 512,
      "fft_size": 2048,
      "n_feature": 1024,
    }'''
        cfg.worker_config = {
            "result_format": ("sdr", "neg_gflops", "neg_mega_pc", "valid_sdr"),
            "batch_size": 3,
            "max_lr": 3e-4,
            "min_lr": 1e-4,
            "warmup_fac": 0.3,
            "first_lr_period": 100,
            "warmup_period": 100,
            "n_hop_per_sample": 64,
            "work_sr": 16000,
            "hop_size": 512,
            "fft_size": 2048,
            "n_feature": 1024,
        }
    else:
        cfg.worker_config = {
            "result_format": ("sdr", "neg_gflops", "neg_mega_pc", "valid_sdr"),
            "batch_size": 2,
            "max_lr": 3e-4,
            "min_lr": 1e-4,
            "warmup_fac": 0.3,
            "first_lr_period": 100,
            "warmup_period": 100,
            "n_hop_per_sample": 64,
            "work_sr": 8000,
            "hop_size": 256,
            "fft_size": 1024,
            "n_feature": 512,
        }
    if dataset_type == "mir2":
        cfg.worker_config.update(
            n_train=100,
            n_step=1500,
            n_eval=55,
            n_valid=20,
        )
    elif dataset_type == "dsd2":
        cfg.worker_config.update(
            n_train=30,
            n_step=3100,
            n_eval=15,
            n_valid=5,
        )
    elif dataset_type == "mus2":
        '''cfg.worker_config.update(
      n_train=60,
      n_step=30000,
      n_eval=30,
      n_valid=10,
    )'''
        cfg.worker_config.update(
            n_train=60,
            n_step=3100,
            n_eval=30,
            n_valid=10,
        )
    cfg.worker = __import__("worker_%s" % (dataset_type, ))
    cfg.worker.clean_env()
    cfg.worker.pre_fn()
Ejemplo n.º 6
0
def create_lock():
    import simpleopt
    lockpath = simpleopt.get("lockpath")
    if not cfg.lock:
        cfg.lock = FileLock(lockpath)
Ejemplo n.º 7
0
def main():
    import multiprocessing as mp
    import numpy as np
    import tensorflow as tf
    import os, sys
    from mir_util import infer, to_spec, to_wav_file
    import scipy.signal as sp
    import config as cfg
    sys.path.append("../lib")
    from eval_util import bss_eval
    from common import loadWav
    import redirect, simpleopt

    step_idx = int(simpleopt.get("step"))
    n_eval = simpleopt.get("first", None)
    with cfg.ConfigBoundary():
        batch_size = 1
        n_feature = cfg.frame_size // 2

        # Model
        print("* Initialize network")
        p_input = tf.compat.v1.placeholder(tf.float32,
                                           shape=(batch_size, 64, n_feature,
                                                  1),
                                           name="p_input")
        v_pred = infer(p_input, 2, False)
        if isinstance(v_pred, list):
            v_pred = v_pred[-1]
        v_pred = tf.clip_by_value(v_pred, 0.0, 1.0) * p_input

        x_input = np.zeros((batch_size, 64, n_feature, 1), dtype=np.float32)
        with tf.compat.v1.Session(config=cfg.sess_cfg) as sess:
            # Initialized, Load state
            sess.run(tf.compat.v1.global_variables_initializer())

            print("* Load checkpoint")
            ckpt_path = os.path.join(cfg.MIR2Config.ckpt_path,
                                     "checkpoint-%d" % (step_idx, ))
            tf.compat.v1.train.Saver().restore(sess, ckpt_path)
            print(" :Loaded: `%s`" % (ckpt_path, ))

            os.makedirs("./eval_output", exist_ok=True)
            name_list = []
            ret_list = []
            with mp.Pool(processes=1, initializer=worker_main) as pool:
                for (root, _, file_list) in os.walk(cfg.mir_wav_path):
                    file_list = sorted(f for f in file_list if not (
                        f.startswith("abjones") or f.startswith("amy")))
                    if n_eval is not None:
                        file_list = file_list[:int(n_eval)]
                    for i_file, filename in enumerate(file_list):
                        print("[%03d/%03d] SEND: `%s`" % (
                            i_file + 1,
                            len(file_list),
                            filename,
                        ))
                        name_list.append(filename)
                        path = os.path.join(root, filename)

                        mixed_wav, sr_orig = loadWav(path)
                        gt_wav_vocal = mixed_wav[:, 1]
                        gt_wav_inst = mixed_wav[:, 0]
                        mixed_wav = np.sum(mixed_wav, axis=1)

                        mixed_wav_orig = mixed_wav
                        gt_wav_vocal_orig = gt_wav_vocal
                        gt_wav_inst_orig = gt_wav_inst

                        gt_wav_vocal = sp.resample_poly(
                            gt_wav_vocal, cfg.sr, sr_orig).astype(np.float32)
                        gt_wav_inst = sp.resample_poly(
                            gt_wav_inst, cfg.sr, sr_orig).astype(np.float32)
                        mixed_wav = sp.resample_poly(
                            mixed_wav, cfg.sr, sr_orig).astype(np.float32)

                        mixed_spec = to_spec(mixed_wav)
                        mixed_spec_mag = np.abs(mixed_spec)
                        mixed_spec_phase = np.angle(mixed_spec)
                        max_tmp = np.max(mixed_spec_mag)
                        mixed_spec_mag = mixed_spec_mag / max_tmp

                        src_len = mixed_spec_mag.shape[0]
                        start_idx = 0
                        y_est_inst = np.zeros((src_len, n_feature),
                                              dtype=np.float32)
                        y_est_vocal = np.zeros((src_len, n_feature),
                                               dtype=np.float32)
                        while start_idx + 64 < src_len:
                            x_input[0, :, :,
                                    0] = mixed_spec_mag[start_idx:start_idx +
                                                        64, :n_feature]
                            y_output = sess.run(v_pred,
                                                feed_dict={p_input: x_input})
                            if start_idx == 0:
                                y_est_inst[start_idx:start_idx +
                                           64, :] = y_output[0, :, :, 0]
                                y_est_vocal[start_idx:start_idx +
                                            64, :] = y_output[0, :, :, 1]
                            else:
                                y_est_inst[start_idx + 16:start_idx +
                                           48, :] = y_output[0, 16:48, :, 0]
                                y_est_vocal[start_idx + 16:start_idx +
                                            48, :] = y_output[0, 16:48, :, 1]
                            start_idx += 32

                        x_input[0, :, :,
                                0] = mixed_spec_mag[src_len -
                                                    64:src_len, :n_feature]
                        y_output = sess.run(v_pred,
                                            feed_dict={p_input: x_input})
                        src_start = src_len - start_idx - 16
                        y_est_inst[start_idx +
                                   16:src_len, :] = y_output[0, 64 -
                                                             src_start:64, :,
                                                             0]
                        y_est_vocal[start_idx +
                                    16:src_len, :] = y_output[0, 64 -
                                                              src_start:64, :,
                                                              1]

                        y_est_inst *= max_tmp
                        y_est_vocal *= max_tmp
                        y_wav_inst = to_wav_file(
                            y_est_inst, mixed_spec_phase[:, :n_feature])
                        y_wav_vocal = to_wav_file(
                            y_est_vocal, mixed_spec_phase[:, :n_feature])
                        #saveWav("inst.wav", y_wav_inst, cfg.sr)
                        #saveWav("vocal.wav", y_wav_vocal, cfg.sr)

                        # upsample to original samprate
                        y_wav_inst_orig = sp.resample_poly(
                            y_wav_inst, sr_orig, cfg.sr).astype(np.float32)
                        y_wav_vocal_orig = sp.resample_poly(
                            y_wav_vocal, sr_orig, cfg.sr).astype(np.float32)
                        ret_list.append(
                            pool.apply_async(bss_eval, (
                                mixed_wav_orig,
                                gt_wav_inst_orig,
                                gt_wav_vocal_orig,
                                y_wav_inst_orig,
                                y_wav_vocal_orig,
                            )))
                with redirect.ConsoleAndFile(
                        "./eval_output/mir2_%s_%d_step%d.txt" %
                    (cfg.gene_ver, cfg.gene_value, step_idx)) as r:
                    gnsdr = 0.0
                    gsir = 0.0
                    gsar = 0.0
                    total_len = 0
                    for name, ret in zip(name_list, ret_list):
                        nsdr, sir, sar, lens = ret.get()
                        printstr = name + " " + str(nsdr) + " " + str(
                            sir) + " " + str(sar)
                        r.print(printstr)
                        total_len += lens
                        gnsdr += nsdr * lens
                        gsir += sir * lens
                        gsar += sar * lens
                    r.print("Final results")
                    r.print("GNSDR [Accompaniments, voice]")
                    r.print(gnsdr / total_len)
                    r.print("GSIR [Accompaniments, voice]")
                    r.print(gsir / total_len)
                    r.print("GSAR [Accompaniments, voice]")
                    r.print(gsar / total_len)
Ejemplo n.º 8
0
def main():
    import multiprocessing as mp
    import numpy as np
    import tensorflow as tf
    import os, sys
    import config as cfg
    #import librosa
    from mir_util import infer, to_spec, to_wav_file
    import scipy.signal as sp
    sys.path.append("../lib")
    from eval_util import bss_eval_sdr_framewise
    from common import loadWav
    import redirect, simpleopt
    import pandas as pd

    step_idx = int(simpleopt.get("step"))
    n_eval = simpleopt.get("first", None)
    with cfg.ConfigBoundary():
        batch_size = 1
        n_feature = cfg.frame_size // 2

        # Model
        print("* Initialize network")
        p_input = tf.compat.v1.placeholder(tf.float32,
                                           shape=(batch_size, 64, n_feature,
                                                  1),
                                           name="p_input")
        v_pred = infer(p_input, 2, False)
        if isinstance(v_pred, list):
            v_pred = v_pred[-1]
        v_pred = tf.clip_by_value(v_pred, 0.0, 1.0) * p_input

        x_input = np.zeros((batch_size, 64, n_feature, 1), dtype=np.float32)
        with tf.compat.v1.Session(config=cfg.sess_cfg) as sess:
            # Initialized, Load state
            sess.run(tf.compat.v1.global_variables_initializer())

            print("* Load checkpoint")
            ckpt_path = os.path.join(cfg.DSD2Config.ckpt_path,
                                     "checkpoint-%d" % (step_idx, ))
            tf.compat.v1.train.Saver().restore(sess, ckpt_path)
            print(" :Loaded: `%s`" % (ckpt_path, ))

            os.makedirs("./eval_output", exist_ok=True)
            name_list = []
            ret_list = []
            with mp.Pool(processes=8, initializer=worker_main) as pool:
                for (root, dir_list, _) in os.walk(
                        os.path.join(cfg.dsd_path, "Mixtures", "Test")):
                    dir_list = sorted(dir_list)
                    if n_eval is not None:
                        dir_list = dir_list[:int(n_eval)]
                    for i_dir, d in enumerate(dir_list):
                        print("[%02d/%02d] STG1: `%s`" % (
                            i_dir + 1,
                            len(dir_list),
                            d,
                        ))
                        name_list.append(d)

                        filename_vocal = os.path.join(cfg.dsd_path, "Sources",
                                                      "Test", d, "vocals.wav")
                        filename_mix = os.path.join(cfg.dsd_path, "Mixtures",
                                                    "Test", d, "mixture.wav")

                        import time
                        t = time.time()
                        mixed_wav_orig, sr_orig = loadWav(
                            filename_mix
                        )  #librosa.load(filename_mix, sr=None, mono=True)
                        mixed_wav_orig = np.sum(mixed_wav_orig, axis=1)
                        gt_wav_vocal_orig, _ = loadWav(
                            filename_vocal
                        )  #librosa.load(filename_vocal, sr=None, mono=True)[0]
                        gt_wav_vocal_orig = np.sum(gt_wav_vocal_orig, axis=1)
                        gt_wav_inst_orig = mixed_wav_orig - gt_wav_vocal_orig

                        mixed_wav = sp.resample_poly(
                            mixed_wav_orig, cfg.sr, sr_orig
                        ).astype(
                            np.float32
                        )  #librosa.load(filename_mix, sr=cfg.sr, mono=True)[0]
                        gt_wav_vocal = sp.resample_poly(
                            gt_wav_vocal_orig, cfg.sr, sr_orig
                        ).astype(
                            np.float32
                        )  #librosa.load(filename_vocal, sr=cfg.sr, mono=True)[0]
                        gt_wav_inst = mixed_wav - gt_wav_vocal
                        mixed_spec = to_spec(mixed_wav)
                        mixed_spec_mag = np.abs(mixed_spec)
                        mixed_spec_phase = np.angle(mixed_spec)
                        max_tmp = np.max(mixed_spec_mag)
                        mixed_spec_mag = mixed_spec_mag / max_tmp

                        src_len = mixed_spec_mag.shape[0]
                        start_idx = 0
                        y_est_inst = np.zeros((src_len, n_feature),
                                              dtype=np.float32)
                        y_est_vocal = np.zeros((src_len, n_feature),
                                               dtype=np.float32)
                        while start_idx + 64 < src_len:
                            x_input[0, :, :,
                                    0] = mixed_spec_mag[start_idx:start_idx +
                                                        64, :n_feature]
                            y_output = sess.run(v_pred,
                                                feed_dict={p_input: x_input})
                            if start_idx == 0:
                                y_est_inst[start_idx:start_idx +
                                           64, :] = y_output[0, :, :, 0]
                                y_est_vocal[start_idx:start_idx +
                                            64, :] = y_output[0, :, :, 1]
                            else:
                                y_est_inst[start_idx + 16:start_idx +
                                           48, :] = y_output[0, 16:48, :, 0]
                                y_est_vocal[start_idx + 16:start_idx +
                                            48, :] = y_output[0, 16:48, :, 1]
                            start_idx += 32

                        x_input[0, :, :,
                                0] = mixed_spec_mag[src_len -
                                                    64:src_len, :n_feature]
                        y_output = sess.run(v_pred,
                                            feed_dict={p_input: x_input})
                        src_start = src_len - start_idx - 16
                        y_est_inst[start_idx +
                                   16:src_len, :] = y_output[0, 64 -
                                                             src_start:64, :,
                                                             0]
                        y_est_vocal[start_idx +
                                    16:src_len, :] = y_output[0, 64 -
                                                              src_start:64, :,
                                                              1]

                        y_est_inst *= max_tmp
                        y_est_vocal *= max_tmp
                        y_wav_inst = to_wav_file(
                            y_est_inst, mixed_spec_phase[:, :n_feature])
                        y_wav_vocal = to_wav_file(
                            y_est_vocal, mixed_spec_phase[:, :n_feature])
                        #saveWav("inst.wav", y_wav_inst, cfg.sr)
                        #saveWav("vocal.wav", y_wav_vocal, cfg.sr)

                        #upsample to original SR
                        y_wav_inst_orig = sp.resample_poly(
                            y_wav_inst, sr_orig, cfg.sr).astype(
                                np.float32
                            )  #librosa.resample(y_wav_inst, cfg.sr, sr_orig)
                        y_wav_vocal_orig = sp.resample_poly(
                            y_wav_vocal, sr_orig, cfg.sr).astype(
                                np.float32
                            )  #librosa.resample(y_wav_vocal, cfg.sr, sr_orig)

                        ret_list.append(
                            pool.apply_async(bss_eval_sdr_framewise, (
                                np.array([gt_wav_inst_orig, gt_wav_vocal_orig],
                                         dtype=np.float32),
                                np.array([y_wav_inst_orig, y_wav_vocal_orig],
                                         dtype=np.float32),
                            )))

                head_list = [
                    "method", "track", "target", "metric", "score", "time"
                ]
                row_list = []
                out_path = "./old_fw/dsd2_%s_%d_step%d.json" % (
                    cfg.gene_ver, cfg.gene_value, step_idx)
                method_name = "dsd2_%s_%d_step%d" % (cfg.gene_ver,
                                                     cfg.gene_value, step_idx)
                for name, ret in zip(name_list, ret_list):
                    print(name)
                    sdr, sir, sar = ret.get()
                    for i, v in enumerate(sdr[0]):
                        row_list.append((
                            method_name,
                            name,
                            "accompaniment",
                            "SDR",
                            v,
                            i,
                        ))
                    for i, v in enumerate(sir[0]):
                        row_list.append((
                            method_name,
                            name,
                            "accompaniment",
                            "SIR",
                            v,
                            i,
                        ))
                    for i, v in enumerate(sar[0]):
                        row_list.append((
                            method_name,
                            name,
                            "accompaniment",
                            "SAR",
                            v,
                            i,
                        ))

                    for i, v in enumerate(sdr[1]):
                        row_list.append((
                            method_name,
                            name,
                            "vocals",
                            "SDR",
                            v,
                            i,
                        ))
                    for i, v in enumerate(sir[1]):
                        row_list.append((
                            method_name,
                            name,
                            "vocals",
                            "SIR",
                            v,
                            i,
                        ))
                    for i, v in enumerate(sar[1]):
                        row_list.append((
                            method_name,
                            name,
                            "vocals",
                            "SAR",
                            v,
                            i,
                        ))
                out = pd.DataFrame(row_list, columns=head_list).reset_index()
                print(out)
                out.to_json(out_path)
Ejemplo n.º 9
0
import config as cfg
import librosa
import scipy.signal as sp
sys.path.append("../lib")
import simpleopt
from common import *
import random

n_sample = 0
data_path = cfg.dsd_path
mixed_list = []
inst_list = []
vocal_list = []
n_sample = 0
n_feature = cfg.frame_size // 2
n_aug = simpleopt.get("aug", default=4, ok=int)

print("* Generate spectrograms")
np.random.seed(0x41526941)
random.seed(0x41526941)
for (root, dirs, files) in os.walk(data_path + "/Mixtures/Dev/"):
    for d in sorted(dirs):
        print(d)
        for i in range(n_aug + 1):
            filename_vocal = os.path.join(data_path, "Sources", "Dev", d,
                                          "vocals.wav")
            filename_mix = os.path.join(data_path, "Mixtures", "Dev", d,
                                        "mixture.wav")
            mixed_wav, sr_orig = loadWav(filename_mix)
            mixed_wav = np.sum(mixed_wav, axis=1)
            mixed_wav = sp.resample_poly(mixed_wav, cfg.sr,
Ejemplo n.º 10
0
import numpy as np
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
import os, sys
sys.path.append("../lib")
from mir_util import infer, to_spec, to_wav_file
import config as cfg
from common import *
import sys
sys.is_train = False

import simpleopt

n_feature = 5644 // 2

dataset_type = simpleopt.get("dataset")
mix_path_list = simpleopt.get_multi("input")
ckpt_step = int(simpleopt.get("step"))
downmix = simpleopt.get_switch("downmix")

ver = simpleopt.get("ver")
gene = simpleopt.get("gene")

with cfg.ConfigBoundary():
    net_config, ch_list = {
        "mus2": (cfg.MUS2FConfig, ("inst", "vocal")),
    }[dataset_type]
    n_ch = len(ch_list)

    # Model
    print("* Initialize network model")
Ejemplo n.º 11
0
def main_estimate(pool):
    import numpy as np
    import tensorflow as tf
    import os, sys, pathlib
    sys.path.append("../lib")
    import config as cfg
    #import librosa
    from mir_util import infer, to_spec, to_wav_file
    import scipy.signal as sp
    import musdb, museval
    import simpleopt
    sys.is_train = False

    step_idx = int(simpleopt.get("step"))
    n_eval = simpleopt.get("first", None)
    if n_eval is not None:
        n_eval = int(n_eval)
        assert n_eval > 0

    sound_sample_root = simpleopt.get("sound-out", None)
    source = simpleopt.get("source")
    if source == "vocals":
        source = None

    with cfg.ConfigBoundary():
        if source is None:
            model_name = "mus2f_%s_%d_step%d" % (
                cfg.gene_ver,
                cfg.gene_value,
                step_idx,
            )
        else:
            model_name = "mus2f_%s_%d_step%d_%s" % (
                cfg.gene_ver,
                cfg.gene_value,
                step_idx,
                source,
            )
        model_name_nosrc = "mus2f_%s_%d_step%d" % (
            cfg.gene_ver,
            cfg.gene_value,
            step_idx,
        )
        if sound_sample_root is None:
            sound_sample_root = "./sound_output_mus2f/{}".format(
                model_name_nosrc)
        pathlib.Path(sound_sample_root).mkdir(parents=True, exist_ok=True)
        ckpt_path = cfg.MUS2FConfig.ckpt_path
        if source != "vocals":
            ckpt_path = "{}_{}".format(ckpt_path, source)

        batch_size = 1
        n_feature = 5644 // 2

        # Model
        print("* Initialize network")
        p_input = tf.compat.v1.placeholder(tf.float32,
                                           shape=(batch_size, 64, n_feature,
                                                  1),
                                           name="p_input")
        v_pred = infer(p_input, 2, False)
        if isinstance(v_pred, list):
            v_pred = v_pred[-1]

        with tf.compat.v1.Session(config=cfg.sess_cfg) as sess:
            # Initialized, Load state
            sess.run(tf.compat.v1.global_variables_initializer())

            print("* Load checkpoint")
            ckpt_path = os.path.join(ckpt_path, "checkpoint-%d" % (step_idx, ))
            tf.compat.v1.train.Saver().restore(sess, ckpt_path)
            print(" :Loaded: `%s`" % (ckpt_path, ))

            os.makedirs("./eval_output", exist_ok=True)
            name_list = []
            ret_list = []

            mus = musdb.DB(root=cfg.mus_root_path,
                           download=False,
                           subsets="test",
                           is_wav=True)
            mus_trk_list = list(mus.tracks)
            mus_trk_list.sort(key=lambda x: x.name)
            assert len(mus_trk_list) > 0
            if n_eval is not None:
                mus_trk_list = mus_trk_list[:n_eval]

            results = museval.EvalStore()

            for i_song, track in enumerate(mus_trk_list):
                print("[%02d/%02d] Estimate: `%s`" % (
                    i_song + 1,
                    len(mus_trk_list),
                    track.name,
                ))
                voc_ch_list = []
                inst_ch_list = []
                for i_channel in range(2):
                    print(" :Channel #%d" % (i_channel, ))
                    name_list.append(track.name + " Channel %d" %
                                     (i_channel, ))

                    mixed_wav = track.audio[:, i_channel]
                    mixed_spec = to_spec(mixed_wav,
                                         len_frame=5644,
                                         len_hop=5644 // 4)
                    mixed_spec_mag = np.abs(mixed_spec)
                    mixed_spec_phase = np.angle(mixed_spec)
                    max_tmp = np.max(mixed_spec_mag)
                    mixed_spec_mag = mixed_spec_mag / max_tmp

                    src_len = mixed_spec_mag.shape[0]
                    start_idx = 0
                    y_est_inst = np.zeros((src_len, n_feature),
                                          dtype=np.float32)
                    y_est_vocal = np.zeros((src_len, n_feature),
                                           dtype=np.float32)
                    x_input = np.zeros((batch_size, 64, n_feature, 1),
                                       dtype=np.float32)
                    while start_idx + 64 < src_len:
                        x_input[0, :, :,
                                0] = mixed_spec_mag[start_idx:start_idx +
                                                    64, :n_feature]
                        y_output = sess.run(v_pred,
                                            feed_dict={p_input: x_input})
                        if start_idx == 0:
                            y_est_inst[start_idx:start_idx +
                                       64, :] = y_output[0, :, :, 0]
                            y_est_vocal[start_idx:start_idx +
                                        64, :] = y_output[0, :, :, 1]
                        else:
                            y_est_inst[start_idx + 16:start_idx +
                                       48, :] = y_output[0, 16:48, :, 0]
                            y_est_vocal[start_idx + 16:start_idx +
                                        48, :] = y_output[0, 16:48, :, 1]
                        start_idx += 32

                    x_input[0, :, :,
                            0] = mixed_spec_mag[src_len -
                                                64:src_len, :n_feature]
                    y_output = sess.run(v_pred, feed_dict={p_input: x_input})
                    src_start = src_len - start_idx - 16
                    y_est_inst[start_idx +
                               16:src_len, :] = y_output[0,
                                                         64 - src_start:64, :,
                                                         0]
                    y_est_vocal[start_idx +
                                16:src_len, :] = y_output[0,
                                                          64 - src_start:64, :,
                                                          1]

                    y_est_inst *= max_tmp
                    y_est_vocal *= max_tmp
                    y_wav_inst = to_wav_file(y_est_inst,
                                             mixed_spec_phase[:, :n_feature],
                                             len_hop=5644 // 4)
                    y_wav_vocal = to_wav_file(y_est_vocal,
                                              mixed_spec_phase[:, :n_feature],
                                              len_hop=5644 // 4)

                    voc_ch_list.append(y_wav_vocal.reshape(
                        y_wav_vocal.size, 1))
                    inst_ch_list.append(y_wav_inst.reshape(y_wav_inst.size, 1))
                    del y_wav_inst, y_wav_vocal, y_est_inst, y_est_vocal, src_start, x_input, y_output, mixed_spec_mag, max_tmp, mixed_spec_phase, mixed_spec, mixed_wav
                estimates = {
                    source: np.concatenate(voc_ch_list, axis=1),
                }
                del voc_ch_list, inst_ch_list
                if sound_sample_root:
                    mus.save_estimates(estimates, track, sound_sample_root)
                del estimates, i_song, track