def main(): import sys sys.path.append("../lib") import time import config as cfg import multiprocessing as mp import simpleopt create_lock() dataset_type = simpleopt.get("dataset") with mp.Pool(processes=6) as pool: cfg.pool = pool pre_fn() l = [] for gene in sys.argv[4:]: try: print("* Acquire GPU...") cfg.lock.acquire() print("* Got GPU!") t = time.time() r = eval_single(gene) t = time.time() - t l.append((gene, *r, t)) finally: cfg.lock.release() clean_fn() for i, (gene, sdr, neg_gflops, neg_mega_pc, valid_sdr, t) in enumerate(l): print( "[%d/%d] gene=%s, sdr=%f, neg_gflops=%f, neg_mega_pc=%f, valid_sdr=%f, time=%f" % (i, len(l), gene, sdr, neg_gflops, neg_mega_pc, valid_sdr, t))
def main(): import sys sys.path.append("../lib") import simpleopt n_ch = int(simpleopt.get("ch")) total_parameters, n_forward_flop = count(None, None, n_ch) print("Trainable parameters: {:,}".format(total_parameters,)) print("Forward FLOPs: {:,}".format(n_forward_flop,))
def load_config(gene_ver=None, gene_value=None): import config as cfg if not os.path.exists(ckpt_root_path): os.makedirs(ckpt_root_path, exist_ok=True) if gene_ver is None: gene_ver = simpleopt.get("ver") if gene_value is None: gene_value = int(simpleopt.get("gene")) if gene_value is None: gene_value = int(simpleopt.get("gene")) class DSD2Config: ckpt_name = "dsd2_%s_%d" % ( gene_ver, gene_value, ) batch_size = 1 if gene_ver in ("sa", ) else 3 ckpt_path = os.path.join(ckpt_root_path, ckpt_name) max_lr = 1e-4 if gene_ver in ( "sa", "sa2", ) and gene_value in (4, ) else 3e-4 min_lr = 1e-5 first_decay_period = 10000 * (3 // batch_size) final_step = 630001 * (3 // batch_size) ckpt_step = 10000 * (3 // batch_size) class MIR2Config: ckpt_name = "mir2_%s_%d" % ( gene_ver, gene_value, ) batch_size = 1 if gene_ver in ("sa", ) else 3 ckpt_path = os.path.join(ckpt_root_path, ckpt_name) max_lr = 3e-4 min_lr = 1e-5 first_decay_period = 1000 * (3 // batch_size) final_step = 63001 * (3 // batch_size) ckpt_step = 1000 * (3 // batch_size) class MUS2FConfig: ckpt_name = "mus2f_%s_%d" % ( gene_ver, gene_value, ) batch_size = 1 if gene_ver in ("sa", ) else 3 ckpt_path = os.path.join(ckpt_root_path, ckpt_name) max_lr = 1e-4 if gene_ver in ( "sa", "sa2", ) and gene_value in (4, ) else 2e-4 # FFFFFFFFFF min_lr = 1e-5 first_decay_period = 10000 * (3 // batch_size) final_step = 630001 * (3 // batch_size) ckpt_step = 10000 * (3 // batch_size) cfg.gene_ver = gene_ver cfg.gene_value = gene_value cfg.DSD2Config = DSD2Config cfg.MIR2Config = MIR2Config cfg.MUS2FConfig = MUS2FConfig
import sys, os sys.path.append("../lib") sys.path.append("../srv") import numpy as np import pylab as pl import scipy.interpolate as ipl import geneop import simpleopt breakpoint_path = simpleopt.get("input") dataset_type = simpleopt.get("dataset") score_type = simpleopt.get("score") assert score_type in ("test", "valid") init_status_path = os.path.join(breakpoint_path, "20_gen_init.pickle") gen_status_path = os.path.join(breakpoint_path, "30_gen_%04d.pickle") n_gen = 9999 max_sdr_list = [] mean_sdr_list = [] sdr_25_list = [] sdr_50_list = [] sdr_75_list = [] min_param_list = [] mean_param_list = [] param_25_list = [] param_50_list = [] param_75_list = []
def pre_fn(): import simpleopt dataset_type = simpleopt.get("dataset") gene_type = simpleopt.get("ver") geneop.load_type(gene_type) cfg.n = { "v1": 142, "v1pe": 142, "v1f16": 142, }[gene_type] if dataset_type == "mus2": '''cfg.worker_config = { "result_format": ("sdr", "neg_gflops", "neg_mega_pc", "valid_sdr"), "batch_size": 3, "max_lr": 3e-4, "min_lr": 1e-4, "warmup_fac": 0.3, "first_lr_period": 10000, "warmup_period": 1000, "n_hop_per_sample": 64, "work_sr": 16000, "hop_size": 512, "fft_size": 2048, "n_feature": 1024, }''' cfg.worker_config = { "result_format": ("sdr", "neg_gflops", "neg_mega_pc", "valid_sdr"), "batch_size": 3, "max_lr": 3e-4, "min_lr": 1e-4, "warmup_fac": 0.3, "first_lr_period": 100, "warmup_period": 100, "n_hop_per_sample": 64, "work_sr": 16000, "hop_size": 512, "fft_size": 2048, "n_feature": 1024, } else: cfg.worker_config = { "result_format": ("sdr", "neg_gflops", "neg_mega_pc", "valid_sdr"), "batch_size": 2, "max_lr": 3e-4, "min_lr": 1e-4, "warmup_fac": 0.3, "first_lr_period": 100, "warmup_period": 100, "n_hop_per_sample": 64, "work_sr": 8000, "hop_size": 256, "fft_size": 1024, "n_feature": 512, } if dataset_type == "mir2": cfg.worker_config.update( n_train=100, n_step=1500, n_eval=55, n_valid=20, ) elif dataset_type == "dsd2": cfg.worker_config.update( n_train=30, n_step=3100, n_eval=15, n_valid=5, ) elif dataset_type == "mus2": '''cfg.worker_config.update( n_train=60, n_step=30000, n_eval=30, n_valid=10, )''' cfg.worker_config.update( n_train=60, n_step=3100, n_eval=30, n_valid=10, ) cfg.worker = __import__("worker_%s" % (dataset_type, )) cfg.worker.clean_env() cfg.worker.pre_fn()
def create_lock(): import simpleopt lockpath = simpleopt.get("lockpath") if not cfg.lock: cfg.lock = FileLock(lockpath)
def main(): import multiprocessing as mp import numpy as np import tensorflow as tf import os, sys from mir_util import infer, to_spec, to_wav_file import scipy.signal as sp import config as cfg sys.path.append("../lib") from eval_util import bss_eval from common import loadWav import redirect, simpleopt step_idx = int(simpleopt.get("step")) n_eval = simpleopt.get("first", None) with cfg.ConfigBoundary(): batch_size = 1 n_feature = cfg.frame_size // 2 # Model print("* Initialize network") p_input = tf.compat.v1.placeholder(tf.float32, shape=(batch_size, 64, n_feature, 1), name="p_input") v_pred = infer(p_input, 2, False) if isinstance(v_pred, list): v_pred = v_pred[-1] v_pred = tf.clip_by_value(v_pred, 0.0, 1.0) * p_input x_input = np.zeros((batch_size, 64, n_feature, 1), dtype=np.float32) with tf.compat.v1.Session(config=cfg.sess_cfg) as sess: # Initialized, Load state sess.run(tf.compat.v1.global_variables_initializer()) print("* Load checkpoint") ckpt_path = os.path.join(cfg.MIR2Config.ckpt_path, "checkpoint-%d" % (step_idx, )) tf.compat.v1.train.Saver().restore(sess, ckpt_path) print(" :Loaded: `%s`" % (ckpt_path, )) os.makedirs("./eval_output", exist_ok=True) name_list = [] ret_list = [] with mp.Pool(processes=1, initializer=worker_main) as pool: for (root, _, file_list) in os.walk(cfg.mir_wav_path): file_list = sorted(f for f in file_list if not ( f.startswith("abjones") or f.startswith("amy"))) if n_eval is not None: file_list = file_list[:int(n_eval)] for i_file, filename in enumerate(file_list): print("[%03d/%03d] SEND: `%s`" % ( i_file + 1, len(file_list), filename, )) name_list.append(filename) path = os.path.join(root, filename) mixed_wav, sr_orig = loadWav(path) gt_wav_vocal = mixed_wav[:, 1] gt_wav_inst = mixed_wav[:, 0] mixed_wav = np.sum(mixed_wav, axis=1) mixed_wav_orig = mixed_wav gt_wav_vocal_orig = gt_wav_vocal gt_wav_inst_orig = gt_wav_inst gt_wav_vocal = sp.resample_poly( gt_wav_vocal, cfg.sr, sr_orig).astype(np.float32) gt_wav_inst = sp.resample_poly( gt_wav_inst, cfg.sr, sr_orig).astype(np.float32) mixed_wav = sp.resample_poly( mixed_wav, cfg.sr, sr_orig).astype(np.float32) mixed_spec = to_spec(mixed_wav) mixed_spec_mag = np.abs(mixed_spec) mixed_spec_phase = np.angle(mixed_spec) max_tmp = np.max(mixed_spec_mag) mixed_spec_mag = mixed_spec_mag / max_tmp src_len = mixed_spec_mag.shape[0] start_idx = 0 y_est_inst = np.zeros((src_len, n_feature), dtype=np.float32) y_est_vocal = np.zeros((src_len, n_feature), dtype=np.float32) while start_idx + 64 < src_len: x_input[0, :, :, 0] = mixed_spec_mag[start_idx:start_idx + 64, :n_feature] y_output = sess.run(v_pred, feed_dict={p_input: x_input}) if start_idx == 0: y_est_inst[start_idx:start_idx + 64, :] = y_output[0, :, :, 0] y_est_vocal[start_idx:start_idx + 64, :] = y_output[0, :, :, 1] else: y_est_inst[start_idx + 16:start_idx + 48, :] = y_output[0, 16:48, :, 0] y_est_vocal[start_idx + 16:start_idx + 48, :] = y_output[0, 16:48, :, 1] start_idx += 32 x_input[0, :, :, 0] = mixed_spec_mag[src_len - 64:src_len, :n_feature] y_output = sess.run(v_pred, feed_dict={p_input: x_input}) src_start = src_len - start_idx - 16 y_est_inst[start_idx + 16:src_len, :] = y_output[0, 64 - src_start:64, :, 0] y_est_vocal[start_idx + 16:src_len, :] = y_output[0, 64 - src_start:64, :, 1] y_est_inst *= max_tmp y_est_vocal *= max_tmp y_wav_inst = to_wav_file( y_est_inst, mixed_spec_phase[:, :n_feature]) y_wav_vocal = to_wav_file( y_est_vocal, mixed_spec_phase[:, :n_feature]) #saveWav("inst.wav", y_wav_inst, cfg.sr) #saveWav("vocal.wav", y_wav_vocal, cfg.sr) # upsample to original samprate y_wav_inst_orig = sp.resample_poly( y_wav_inst, sr_orig, cfg.sr).astype(np.float32) y_wav_vocal_orig = sp.resample_poly( y_wav_vocal, sr_orig, cfg.sr).astype(np.float32) ret_list.append( pool.apply_async(bss_eval, ( mixed_wav_orig, gt_wav_inst_orig, gt_wav_vocal_orig, y_wav_inst_orig, y_wav_vocal_orig, ))) with redirect.ConsoleAndFile( "./eval_output/mir2_%s_%d_step%d.txt" % (cfg.gene_ver, cfg.gene_value, step_idx)) as r: gnsdr = 0.0 gsir = 0.0 gsar = 0.0 total_len = 0 for name, ret in zip(name_list, ret_list): nsdr, sir, sar, lens = ret.get() printstr = name + " " + str(nsdr) + " " + str( sir) + " " + str(sar) r.print(printstr) total_len += lens gnsdr += nsdr * lens gsir += sir * lens gsar += sar * lens r.print("Final results") r.print("GNSDR [Accompaniments, voice]") r.print(gnsdr / total_len) r.print("GSIR [Accompaniments, voice]") r.print(gsir / total_len) r.print("GSAR [Accompaniments, voice]") r.print(gsar / total_len)
def main(): import multiprocessing as mp import numpy as np import tensorflow as tf import os, sys import config as cfg #import librosa from mir_util import infer, to_spec, to_wav_file import scipy.signal as sp sys.path.append("../lib") from eval_util import bss_eval_sdr_framewise from common import loadWav import redirect, simpleopt import pandas as pd step_idx = int(simpleopt.get("step")) n_eval = simpleopt.get("first", None) with cfg.ConfigBoundary(): batch_size = 1 n_feature = cfg.frame_size // 2 # Model print("* Initialize network") p_input = tf.compat.v1.placeholder(tf.float32, shape=(batch_size, 64, n_feature, 1), name="p_input") v_pred = infer(p_input, 2, False) if isinstance(v_pred, list): v_pred = v_pred[-1] v_pred = tf.clip_by_value(v_pred, 0.0, 1.0) * p_input x_input = np.zeros((batch_size, 64, n_feature, 1), dtype=np.float32) with tf.compat.v1.Session(config=cfg.sess_cfg) as sess: # Initialized, Load state sess.run(tf.compat.v1.global_variables_initializer()) print("* Load checkpoint") ckpt_path = os.path.join(cfg.DSD2Config.ckpt_path, "checkpoint-%d" % (step_idx, )) tf.compat.v1.train.Saver().restore(sess, ckpt_path) print(" :Loaded: `%s`" % (ckpt_path, )) os.makedirs("./eval_output", exist_ok=True) name_list = [] ret_list = [] with mp.Pool(processes=8, initializer=worker_main) as pool: for (root, dir_list, _) in os.walk( os.path.join(cfg.dsd_path, "Mixtures", "Test")): dir_list = sorted(dir_list) if n_eval is not None: dir_list = dir_list[:int(n_eval)] for i_dir, d in enumerate(dir_list): print("[%02d/%02d] STG1: `%s`" % ( i_dir + 1, len(dir_list), d, )) name_list.append(d) filename_vocal = os.path.join(cfg.dsd_path, "Sources", "Test", d, "vocals.wav") filename_mix = os.path.join(cfg.dsd_path, "Mixtures", "Test", d, "mixture.wav") import time t = time.time() mixed_wav_orig, sr_orig = loadWav( filename_mix ) #librosa.load(filename_mix, sr=None, mono=True) mixed_wav_orig = np.sum(mixed_wav_orig, axis=1) gt_wav_vocal_orig, _ = loadWav( filename_vocal ) #librosa.load(filename_vocal, sr=None, mono=True)[0] gt_wav_vocal_orig = np.sum(gt_wav_vocal_orig, axis=1) gt_wav_inst_orig = mixed_wav_orig - gt_wav_vocal_orig mixed_wav = sp.resample_poly( mixed_wav_orig, cfg.sr, sr_orig ).astype( np.float32 ) #librosa.load(filename_mix, sr=cfg.sr, mono=True)[0] gt_wav_vocal = sp.resample_poly( gt_wav_vocal_orig, cfg.sr, sr_orig ).astype( np.float32 ) #librosa.load(filename_vocal, sr=cfg.sr, mono=True)[0] gt_wav_inst = mixed_wav - gt_wav_vocal mixed_spec = to_spec(mixed_wav) mixed_spec_mag = np.abs(mixed_spec) mixed_spec_phase = np.angle(mixed_spec) max_tmp = np.max(mixed_spec_mag) mixed_spec_mag = mixed_spec_mag / max_tmp src_len = mixed_spec_mag.shape[0] start_idx = 0 y_est_inst = np.zeros((src_len, n_feature), dtype=np.float32) y_est_vocal = np.zeros((src_len, n_feature), dtype=np.float32) while start_idx + 64 < src_len: x_input[0, :, :, 0] = mixed_spec_mag[start_idx:start_idx + 64, :n_feature] y_output = sess.run(v_pred, feed_dict={p_input: x_input}) if start_idx == 0: y_est_inst[start_idx:start_idx + 64, :] = y_output[0, :, :, 0] y_est_vocal[start_idx:start_idx + 64, :] = y_output[0, :, :, 1] else: y_est_inst[start_idx + 16:start_idx + 48, :] = y_output[0, 16:48, :, 0] y_est_vocal[start_idx + 16:start_idx + 48, :] = y_output[0, 16:48, :, 1] start_idx += 32 x_input[0, :, :, 0] = mixed_spec_mag[src_len - 64:src_len, :n_feature] y_output = sess.run(v_pred, feed_dict={p_input: x_input}) src_start = src_len - start_idx - 16 y_est_inst[start_idx + 16:src_len, :] = y_output[0, 64 - src_start:64, :, 0] y_est_vocal[start_idx + 16:src_len, :] = y_output[0, 64 - src_start:64, :, 1] y_est_inst *= max_tmp y_est_vocal *= max_tmp y_wav_inst = to_wav_file( y_est_inst, mixed_spec_phase[:, :n_feature]) y_wav_vocal = to_wav_file( y_est_vocal, mixed_spec_phase[:, :n_feature]) #saveWav("inst.wav", y_wav_inst, cfg.sr) #saveWav("vocal.wav", y_wav_vocal, cfg.sr) #upsample to original SR y_wav_inst_orig = sp.resample_poly( y_wav_inst, sr_orig, cfg.sr).astype( np.float32 ) #librosa.resample(y_wav_inst, cfg.sr, sr_orig) y_wav_vocal_orig = sp.resample_poly( y_wav_vocal, sr_orig, cfg.sr).astype( np.float32 ) #librosa.resample(y_wav_vocal, cfg.sr, sr_orig) ret_list.append( pool.apply_async(bss_eval_sdr_framewise, ( np.array([gt_wav_inst_orig, gt_wav_vocal_orig], dtype=np.float32), np.array([y_wav_inst_orig, y_wav_vocal_orig], dtype=np.float32), ))) head_list = [ "method", "track", "target", "metric", "score", "time" ] row_list = [] out_path = "./old_fw/dsd2_%s_%d_step%d.json" % ( cfg.gene_ver, cfg.gene_value, step_idx) method_name = "dsd2_%s_%d_step%d" % (cfg.gene_ver, cfg.gene_value, step_idx) for name, ret in zip(name_list, ret_list): print(name) sdr, sir, sar = ret.get() for i, v in enumerate(sdr[0]): row_list.append(( method_name, name, "accompaniment", "SDR", v, i, )) for i, v in enumerate(sir[0]): row_list.append(( method_name, name, "accompaniment", "SIR", v, i, )) for i, v in enumerate(sar[0]): row_list.append(( method_name, name, "accompaniment", "SAR", v, i, )) for i, v in enumerate(sdr[1]): row_list.append(( method_name, name, "vocals", "SDR", v, i, )) for i, v in enumerate(sir[1]): row_list.append(( method_name, name, "vocals", "SIR", v, i, )) for i, v in enumerate(sar[1]): row_list.append(( method_name, name, "vocals", "SAR", v, i, )) out = pd.DataFrame(row_list, columns=head_list).reset_index() print(out) out.to_json(out_path)
import config as cfg import librosa import scipy.signal as sp sys.path.append("../lib") import simpleopt from common import * import random n_sample = 0 data_path = cfg.dsd_path mixed_list = [] inst_list = [] vocal_list = [] n_sample = 0 n_feature = cfg.frame_size // 2 n_aug = simpleopt.get("aug", default=4, ok=int) print("* Generate spectrograms") np.random.seed(0x41526941) random.seed(0x41526941) for (root, dirs, files) in os.walk(data_path + "/Mixtures/Dev/"): for d in sorted(dirs): print(d) for i in range(n_aug + 1): filename_vocal = os.path.join(data_path, "Sources", "Dev", d, "vocals.wav") filename_mix = os.path.join(data_path, "Mixtures", "Dev", d, "mixture.wav") mixed_wav, sr_orig = loadWav(filename_mix) mixed_wav = np.sum(mixed_wav, axis=1) mixed_wav = sp.resample_poly(mixed_wav, cfg.sr,
import numpy as np import tensorflow as tf tf.compat.v1.disable_eager_execution() import os, sys sys.path.append("../lib") from mir_util import infer, to_spec, to_wav_file import config as cfg from common import * import sys sys.is_train = False import simpleopt n_feature = 5644 // 2 dataset_type = simpleopt.get("dataset") mix_path_list = simpleopt.get_multi("input") ckpt_step = int(simpleopt.get("step")) downmix = simpleopt.get_switch("downmix") ver = simpleopt.get("ver") gene = simpleopt.get("gene") with cfg.ConfigBoundary(): net_config, ch_list = { "mus2": (cfg.MUS2FConfig, ("inst", "vocal")), }[dataset_type] n_ch = len(ch_list) # Model print("* Initialize network model")
def main_estimate(pool): import numpy as np import tensorflow as tf import os, sys, pathlib sys.path.append("../lib") import config as cfg #import librosa from mir_util import infer, to_spec, to_wav_file import scipy.signal as sp import musdb, museval import simpleopt sys.is_train = False step_idx = int(simpleopt.get("step")) n_eval = simpleopt.get("first", None) if n_eval is not None: n_eval = int(n_eval) assert n_eval > 0 sound_sample_root = simpleopt.get("sound-out", None) source = simpleopt.get("source") if source == "vocals": source = None with cfg.ConfigBoundary(): if source is None: model_name = "mus2f_%s_%d_step%d" % ( cfg.gene_ver, cfg.gene_value, step_idx, ) else: model_name = "mus2f_%s_%d_step%d_%s" % ( cfg.gene_ver, cfg.gene_value, step_idx, source, ) model_name_nosrc = "mus2f_%s_%d_step%d" % ( cfg.gene_ver, cfg.gene_value, step_idx, ) if sound_sample_root is None: sound_sample_root = "./sound_output_mus2f/{}".format( model_name_nosrc) pathlib.Path(sound_sample_root).mkdir(parents=True, exist_ok=True) ckpt_path = cfg.MUS2FConfig.ckpt_path if source != "vocals": ckpt_path = "{}_{}".format(ckpt_path, source) batch_size = 1 n_feature = 5644 // 2 # Model print("* Initialize network") p_input = tf.compat.v1.placeholder(tf.float32, shape=(batch_size, 64, n_feature, 1), name="p_input") v_pred = infer(p_input, 2, False) if isinstance(v_pred, list): v_pred = v_pred[-1] with tf.compat.v1.Session(config=cfg.sess_cfg) as sess: # Initialized, Load state sess.run(tf.compat.v1.global_variables_initializer()) print("* Load checkpoint") ckpt_path = os.path.join(ckpt_path, "checkpoint-%d" % (step_idx, )) tf.compat.v1.train.Saver().restore(sess, ckpt_path) print(" :Loaded: `%s`" % (ckpt_path, )) os.makedirs("./eval_output", exist_ok=True) name_list = [] ret_list = [] mus = musdb.DB(root=cfg.mus_root_path, download=False, subsets="test", is_wav=True) mus_trk_list = list(mus.tracks) mus_trk_list.sort(key=lambda x: x.name) assert len(mus_trk_list) > 0 if n_eval is not None: mus_trk_list = mus_trk_list[:n_eval] results = museval.EvalStore() for i_song, track in enumerate(mus_trk_list): print("[%02d/%02d] Estimate: `%s`" % ( i_song + 1, len(mus_trk_list), track.name, )) voc_ch_list = [] inst_ch_list = [] for i_channel in range(2): print(" :Channel #%d" % (i_channel, )) name_list.append(track.name + " Channel %d" % (i_channel, )) mixed_wav = track.audio[:, i_channel] mixed_spec = to_spec(mixed_wav, len_frame=5644, len_hop=5644 // 4) mixed_spec_mag = np.abs(mixed_spec) mixed_spec_phase = np.angle(mixed_spec) max_tmp = np.max(mixed_spec_mag) mixed_spec_mag = mixed_spec_mag / max_tmp src_len = mixed_spec_mag.shape[0] start_idx = 0 y_est_inst = np.zeros((src_len, n_feature), dtype=np.float32) y_est_vocal = np.zeros((src_len, n_feature), dtype=np.float32) x_input = np.zeros((batch_size, 64, n_feature, 1), dtype=np.float32) while start_idx + 64 < src_len: x_input[0, :, :, 0] = mixed_spec_mag[start_idx:start_idx + 64, :n_feature] y_output = sess.run(v_pred, feed_dict={p_input: x_input}) if start_idx == 0: y_est_inst[start_idx:start_idx + 64, :] = y_output[0, :, :, 0] y_est_vocal[start_idx:start_idx + 64, :] = y_output[0, :, :, 1] else: y_est_inst[start_idx + 16:start_idx + 48, :] = y_output[0, 16:48, :, 0] y_est_vocal[start_idx + 16:start_idx + 48, :] = y_output[0, 16:48, :, 1] start_idx += 32 x_input[0, :, :, 0] = mixed_spec_mag[src_len - 64:src_len, :n_feature] y_output = sess.run(v_pred, feed_dict={p_input: x_input}) src_start = src_len - start_idx - 16 y_est_inst[start_idx + 16:src_len, :] = y_output[0, 64 - src_start:64, :, 0] y_est_vocal[start_idx + 16:src_len, :] = y_output[0, 64 - src_start:64, :, 1] y_est_inst *= max_tmp y_est_vocal *= max_tmp y_wav_inst = to_wav_file(y_est_inst, mixed_spec_phase[:, :n_feature], len_hop=5644 // 4) y_wav_vocal = to_wav_file(y_est_vocal, mixed_spec_phase[:, :n_feature], len_hop=5644 // 4) voc_ch_list.append(y_wav_vocal.reshape( y_wav_vocal.size, 1)) inst_ch_list.append(y_wav_inst.reshape(y_wav_inst.size, 1)) del y_wav_inst, y_wav_vocal, y_est_inst, y_est_vocal, src_start, x_input, y_output, mixed_spec_mag, max_tmp, mixed_spec_phase, mixed_spec, mixed_wav estimates = { source: np.concatenate(voc_ch_list, axis=1), } del voc_ch_list, inst_ch_list if sound_sample_root: mus.save_estimates(estimates, track, sound_sample_root) del estimates, i_song, track