def __init__(self, config, path_scp_mix, path_scp_targets): self.wp = utils.wav_processor(config) self.scp_mix = self.wp.read_scp(path_scp_mix) self.scp_targets = [self.wp.read_scp(path_scp_target) \ for path_scp_target in path_scp_targets] self.keys = [key for key in self.scp_mix.keys()] self.trans = transform(config)
def __init__(self, config, path_wav_test, path_model, clustering_type, eval_idx=None): print(eval_idx) self.wp = utils.wav_processor(config, path_model) self.model = model.DeepClustering(config) self.device = torch.device(config['gpu']) print('Processing on', config['gpu']) self.path_model = path_model ckp = torch.load(self.path_model, map_location=self.device) self.model.load_state_dict(ckp['model_state_dict']) self.model.eval() self.trans = transform(config, path_model) self.num_spks = config['num_spks'] self.kmeans = KMeans(n_clusters=self.num_spks) self.gmm = GMM(n_components=self.num_spks, max_iter=100) dt_now = datetime.datetime.now() self.path_separated = "./result_test" + '/' + str(dt_now.isoformat()) self.clustering_type = clustering_type self.scp_mix = self.wp.read_scp("./scp/tt_mix.scp") os.makedirs(self.path_separated, exist_ok=True) self.path_csv = os.path.join(self.path_separated, "log.csv") self.eval_idx = eval_idx if eval_idx != None else None with open(self.path_csv, 'w') as f: writer = csv.writer(f) conditions = [self.path_model, path_wav_test, self.eval_idx] writer.writerow(conditions) header = ["key"] if self.eval_idx == 'SDR': for i in range(self.num_spks): header.append("SDR_{0}".format(str(i + 1))) for i in range(self.num_spks): header.append("SDRi_{0}".format(str(i + 1))) elif self.eval_idx == 'SI-SDR': for i in range(self.num_spks): header.append("SI-SDR_{0}".format(str(i + 1))) for i in range(self.num_spks): header.append("SI-SDRi_{0}".format(str(i + 1))) writer.writerow(header)
def calc_normalize_params(path_scp_target_i, config, time): wp = utils.wav_processor(config) scp_mix = wp.read_scp(path_scp_target_i) f_bin = int(config['transform']['n_fft'] / 2 + 1) mean_f = np.zeros(f_bin) var_f = np.zeros(f_bin) for key in tqdm(scp_mix.keys()): y = wp.read_wav(scp_mix[key]) logpow = wp.log_power(y, normalize=False) mean_f += np.mean(logpow, 0) var_f += np.mean(logpow**2, 0) mean_f = mean_f / len(scp_mix.keys()) var_f = var_f / len(scp_mix.keys()) std_f = np.sqrt(var_f - mean_f**2) return mean_f, std_f
def __init__(self, config): self.wp = utils.wav_processor(config) self.dpcl = model.DeepClustering(config) self.device = torch.device(config['gpu']) print('Processing on', config['gpu']) self.path_model = config['test']['path_model'] ckp = torch.load(self.path_model, map_location=self.device) self.dpcl.load_state_dict(ckp['model_state_dict']) self.dpcl.eval() self.dir_wav_root = config['dir_wav_root'] path_scp_mix = config['test']['path_scp_mix'] self.scp_mix = self.wp.read_scp(path_scp_mix) self.trans = transform(config) self.num_spks = config['num_spks'] self.kmeans = KMeans(n_clusters=self.num_spks) self.gmm = GMM(n_components=self.num_spks, max_iter=1000) dt_now = datetime.datetime.now() self.path_separated = config['test']['path_separated'] + '/' + str( dt_now.isoformat()) self.type_mask = config['test']['type_mask']
def __init__(self, config): self.num_spks = config['num_spks'] self.wp = utils.wav_processor(config) self.mask = config['dataloader']['train']['mask']
def __init__(self,config,path_model): self.num_spks = config['num_spks'] self.wp = utils.wav_processor(config,path_model) self.mask_type = config['train']['mask_type']
import numpy as np import yaml from tqdm import tqdm import pickle from Learning import utils if __name__ == "__main__": print('calc normalizing parameters') with open('config.yaml', 'r') as yml: config = yaml.safe_load(yml) wp = utils.wav_processor(config) path_scp_mix = config['dataloader']['train']['path_scp_mix'] scp_mix = wp.read_scp(path_scp_mix) f_bin = int(config['transform']['n_fft'] / 2 + 1) mean_f = np.zeros(f_bin) var_f = np.zeros(f_bin) for key in tqdm(scp_mix.keys()): y = wp.read_wav(scp_mix[key]) Y = wp.stft(y) log_pow = wp.log_power(Y) mean_f += np.mean(log_pow, 0) var_f += np.mean(log_pow**2, 0) mean_f = mean_f / len(scp_mix.keys()) var_f = var_f / len(scp_mix.keys())