def make_validation_set(filelist, cropsize, sr, hop_length, n_fft, offset): patch_list = [] patch_dir = 'cs{}_sr{}_hl{}_nf{}_of{}'.format(cropsize, sr, hop_length, n_fft, offset) os.makedirs(patch_dir, exist_ok=True) for i, (X_path, y_path) in enumerate(tqdm(filelist)): basename = os.path.splitext(os.path.basename(X_path))[0] X, y = spec_utils.cache_or_load(X_path, y_path, sr, hop_length, n_fft) coef = np.max([np.abs(X).max(), np.abs(y).max()]) X, y = X / coef, y / coef l, r, roi_size = make_padding(X.shape[2], cropsize, offset) X_pad = np.pad(X, ((0, 0), (0, 0), (l, r)), mode='constant') y_pad = np.pad(y, ((0, 0), (0, 0), (l, r)), mode='constant') len_dataset = int(np.ceil(X.shape[2] / roi_size)) for j in range(len_dataset): outpath = os.path.join(patch_dir, '{}_p{}.npz'.format(basename, j)) start = j * roi_size if not os.path.exists(outpath): np.savez( outpath, X=X_pad[:, :, start:start + cropsize], y=y_pad[:, :, start:start + cropsize]) patch_list.append(outpath) return VocalRemoverValidationSet(patch_list)
def make_training_set(filelist, cropsize, patches, sr, hop_length, n_fft, offset): len_dataset = patches * len(filelist) X_dataset = np.zeros( (len_dataset, 2, n_fft // 2 + 1, cropsize), dtype=np.complex64) y_dataset = np.zeros( (len_dataset, 2, n_fft // 2 + 1, cropsize), dtype=np.complex64) for i, (X_path, y_path) in enumerate(tqdm(filelist)): X, y = spec_utils.cache_or_load(X_path, y_path, sr, hop_length, n_fft) coef = np.max([np.abs(X).max(), np.abs(y).max()]) X, y = X / coef, y / coef l, r, roi_size = make_padding(X.shape[2], cropsize, offset) X_pad = np.pad(X, ((0, 0), (0, 0), (l, r)), mode='constant') y_pad = np.pad(y, ((0, 0), (0, 0), (l, r)), mode='constant') starts = np.random.randint(0, X_pad.shape[2] - cropsize, patches) ends = starts + cropsize for j in range(patches): idx = i * patches + j X_dataset[idx] = X_pad[:, :, starts[j]:ends[j]] y_dataset[idx] = y_pad[:, :, starts[j]:ends[j]] return X_dataset, y_dataset
def make_training_set(filelist, cropsize, patches, sr, hop_length, offset): len_dataset = patches * len(filelist) X_dataset = np.zeros((len_dataset, 2, hop_length, cropsize), dtype=np.float32) y_dataset = np.zeros((len_dataset, 2, hop_length, cropsize), dtype=np.float32) for i, (X_path, y_path) in enumerate(tqdm(filelist)): p = np.random.uniform() if p < 0.1: X_path.replace(os.path.splitext(X_path)[1], '_pitch-1.wav') y_path.replace(os.path.splitext(y_path)[1], '_pitch-1.wav') elif p < 0.2: X_path.replace(os.path.splitext(X_path)[1], '_pitch1.wav') y_path.replace(os.path.splitext(y_path)[1], '_pitch1.wav') X, y = spec_utils.cache_or_load(X_path, y_path, sr, hop_length) coeff = np.max([X.max(), y.max()]) X, y = X / coeff, y / coeff l, r, roi_size = make_padding(X.shape[2], cropsize, offset) X_pad = np.pad(X, ((0, 0), (0, 0), (l, r)), mode='constant') y_pad = np.pad(y, ((0, 0), (0, 0), (l, r)), mode='constant') starts = np.random.randint(0, X_pad.shape[2] - cropsize, patches) ends = starts + cropsize for j in range(patches): idx = i * patches + j X_dataset[idx] = X_pad[:, :, starts[j]:ends[j]] y_dataset[idx] = y_pad[:, :, starts[j]:ends[j]] if np.random.uniform() < 0.5: # swap channel X_dataset[idx] = X_dataset[idx, ::-1] y_dataset[idx] = y_dataset[idx, ::-1] return X_dataset, y_dataset
def make_validation_set(filelist, cropsize, offset, sr, hop_length): X_dataset = [] y_dataset = [] for i, (X_path, y_path) in enumerate(tqdm(filelist)): X, y = spec_utils.cache_or_load(X_path, y_path, sr, hop_length) left = offset roi_size = cropsize - offset * 2 right = roi_size - (X.shape[2] % roi_size) + offset X_pad = np.pad(X, ((0, 0), (0, 0), (left, right)), mode='reflect') y_pad = np.pad(y, ((0, 0), (0, 0), (left, right)), mode='reflect') len_dataset = int(np.ceil(X.shape[2] / roi_size)) for j in range(len_dataset): start = j * roi_size X_dataset.append(X_pad[:, :, start:start + cropsize]) y_dataset.append(y_pad[:, :, start:start + cropsize]) return np.float32(X_dataset), np.float32(y_dataset)
def make_training_set(filelist, cropsize, patches, sr, hop_length): len_dataset = patches * len(filelist) X_dataset = np.zeros((len_dataset, 2, hop_length, cropsize), dtype=np.float32) y_dataset = np.zeros((len_dataset, 2, hop_length, cropsize), dtype=np.float32) for i, (X_path, y_path) in enumerate(tqdm(filelist)): X, y = spec_utils.cache_or_load(X_path, y_path, sr, hop_length) for j in range(patches): idx = i * patches + j start = np.random.randint(0, X.shape[2] - cropsize) X_dataset[idx] = X[:, :, start:start + cropsize] y_dataset[idx] = y[:, :, start:start + cropsize] if np.random.uniform() < 0.5: # swap channel X_dataset[idx] = X_dataset[idx, ::-1] y_dataset[idx] = y_dataset[idx, ::-1] return X_dataset, y_dataset
def create_dataset(filelist, cropsize, patches, sr, hop_length, validation=False): len_dataset = patches * len(filelist) X_dataset = np.zeros((len_dataset, 2, hop_length, cropsize), dtype=np.float32) y_dataset = np.zeros((len_dataset, 2, hop_length, cropsize), dtype=np.float32) for i, (X_path, y_path) in enumerate(tqdm(filelist)): X, y = spec_utils.cache_or_load(X_path, y_path, sr, hop_length) for j in range(patches): idx = i * patches + j start = np.random.randint(0, X.shape[2] - cropsize) X_dataset[idx] = X[:, :, start:start + cropsize] y_dataset[idx] = y[:, :, start:start + cropsize] if not validation: if np.random.uniform() < 0.5: # swap channel X_dataset[idx] = X_dataset[idx, ::-1] y_dataset[idx] = y_dataset[idx, ::-1] # if np.random.uniform() < 0.5: # # spec aug # f = np.random.randint(0, hop_length // 4) # f0 = np.random.randint(0, hop_length - f) # X_dataset[idx, :, f0:f0 + f, :] = 0 # y_dataset[idx, :, f0:f0 + f, :] = 0 # if np.random.uniform() < 0.5: # # spec aug # t = np.random.randint(0, hop_length // 8) # t0 = np.random.randint(0, cropsize - t) # X_dataset[idx, :, :, t0:t0 + t] = 0 # y_dataset[idx, :, :, t0:t0 + t] = 0 return X_dataset, y_dataset