def decode_generator(feat_list, upsampling_factor=120, string_path='/feat_mceplf0cap', batch_size=1, excit_dim=0): """DECODE BATCH GENERATOR Args: wav_list (str): list including wav files batch_size (int): batch size in decoding upsampling_factor (int): upsampling factor Return: (object): generator instance """ with torch.no_grad(): shape_list = [shape_hdf5(f, string_path)[0] for f in feat_list] idx = np.argsort(shape_list) feat_list = [feat_list[i] for i in idx] # divide into batch list n_batch = math.ceil(len(feat_list) / batch_size) batch_feat_lists = np.array_split(feat_list, n_batch) batch_feat_lists = [f.tolist() for f in batch_feat_lists] for batch_feat_list in batch_feat_lists: batch_feat = [] n_samples_list = [] feat_ids = [] for featfile in batch_feat_list: ## load waveform if 'mel' in string_path: if excit_dim > 0: feat = np.c_[read_hdf5(featfile, '/feat_mceplf0cap' )[:, :excit_dim], read_hdf5(featfile, string_path)] else: feat = read_hdf5(featfile, string_path) else: feat = read_hdf5(featfile, string_path) # append to list batch_feat += [feat] n_samples_list += [feat.shape[0] * upsampling_factor] feat_ids += [os.path.basename(featfile).replace(".h5", "")] # convert list to ndarray batch_feat = pad_list(batch_feat) # convert to torch variable batch_feat = torch.FloatTensor(batch_feat) if torch.cuda.is_available(): batch_feat = batch_feat.cuda() yield feat_ids, (batch_feat, n_samples_list)
def get_max_frame(l, feat_list, wav_list, cpu, spk_list, spk_dict): tmp_spk_dict = {} for spk in spk_list: tmp_spk_dict[spk] = {} #count = 0 for feat, wav in zip(feat_list, wav_list): n_frame = shape_hdf5(feat, '/f0_range')[0] spk = os.path.basename(os.path.dirname(feat)) tmp_spk_dict[spk][feat + "@" + wav] = n_frame logging.info(f'{cpu} {spk} {feat} {wav} {n_frame}') # logging.info(tmp_spk_dict) #break # count += 1 # if count > 2: # if count > 5: # break #time.sleep(10) l.acquire() try: for spk in spk_list: if bool(tmp_spk_dict[spk]): spk_dict[spk].append(tmp_spk_dict[spk]) finally: l.release()
def decode_generator(feat_list, batch_size=32, wav_transform=None, feat_transform=None, use_speaker_code=False, upsampling_factor=0): """DECODE BATCH GENERATOR Args: featdir (str): directory including feat files batch_size (int): batch size in decoding wav_transform (func): preprocessing function for waveform feat_transform (func): preprocessing function for aux feats use_speaker_code (bool): whether to use speaker code upsampling_factor (int): upsampling factor Return: (object): generator instance """ # for sample-by-sample generation if batch_size == 1: for featfile in feat_list: x = np.zeros((1)) if upsampling_factor == 0: h = read_hdf5(featfile, "/feat") else: h = read_hdf5(featfile, "/feat_org") if use_speaker_code: sc = read_hdf5(featfile, "/speaker_code") sc = np.tile(sc, [h.shape[0], 1]) h = np.concatenate([h, sc], axis=1) # perform pre-processing if wav_transform is not None: x = wav_transform(x) if feat_transform is not None: h = feat_transform(h) # convert to torch variable x = Variable(torch.from_numpy(x).long(), volatile=True) h = Variable(torch.from_numpy(h).float(), volatile=True) if torch.cuda.is_available(): x = x.cuda() h = h.cuda() x = x.unsqueeze(0) # 1 => 1 x 1 h = h.transpose(0, 1).unsqueeze(0) # T x C => 1 x C x T # get target length and file id if upsampling_factor == 0: n_samples = h.size(2) - 1 else: n_samples = h.size(2) * upsampling_factor - 1 feat_id = os.path.basename(featfile).replace(".h5", "") yield feat_id, (x, h, n_samples) # for batch generation else: # sort with the feature length if upsampling_factor == 0: shape_list = [shape_hdf5(f, "/feat")[0] for f in feat_list] else: shape_list = [shape_hdf5(f, "/feat_org")[0] for f in feat_list] idx = np.argsort(shape_list) feat_list = [feat_list[i] for i in idx] # divide into batch list n_batch = math.ceil(len(feat_list) / batch_size) batch_lists = np.array_split(feat_list, n_batch) batch_lists = [f.tolist() for f in batch_lists] for batch_list in batch_lists: batch_x = [] batch_h = [] n_samples_list = [] feat_ids = [] for featfile in batch_list: # make seed waveform and load aux feature x = np.zeros((1)) if upsampling_factor == 0: h = read_hdf5(featfile, "/feat") else: h = read_hdf5(featfile, "/feat_org") if use_speaker_code: sc = read_hdf5(featfile, "/speaker_code") sc = np.tile(sc, [h.shape[0], 1]) h = np.concatenate([h, sc], axis=1) # perform pre-processing if wav_transform is not None: x = wav_transform(x) if feat_transform is not None: h = feat_transform(h) # append to list batch_x += [x] batch_h += [h] if upsampling_factor == 0: n_samples_list += [h.shape[0] - 1] else: n_samples_list += [h.shape[0] * upsampling_factor - 1] feat_ids += [os.path.basename(featfile).replace(".h5", "")] # convert list to ndarray batch_x = np.stack(batch_x, axis=0) batch_h = pad_list(batch_h) # convert to torch variable batch_x = Variable(torch.from_numpy(batch_x).long(), volatile=True) batch_h = Variable(torch.from_numpy(batch_h).float(), volatile=True).transpose(1, 2) if torch.cuda.is_available(): batch_x = batch_x.cuda() batch_h = batch_h.cuda() yield feat_ids, (batch_x, batch_h, n_samples_list)
def decode_generator(feat_list, fs, wav_transform=None, feat_transform=None, feature_type="world", feat_ext=".h5", dense_factor=8, batch_size=32, upsampling_factor=80, f0_factor=1.0, f0_dim_index=1, extra_memory=False): """DECODE BATCH GENERATOR Args: feat_list (str): list of feat files fs (int): sampling rate wav_transform (func): preprocessing function for waveform feat_transform (func): preprocessing function for aux feats feature_type (str): feature type feat_ext (str): feature filename extension dense_factor (int): the number of taps in one cycle batch_size (int): batch size in decoding upsampling_factor (int): upsampling factor f0_factor (float): the ratio of scaled f0 f0_dim_index (int): the dimension index of the f0 feature extra_memory(bool): processing dilated factor in tensor format or not * tensor mode will accelerate the decoding but consume more memory Return: (object): generator instance """ # sort with the feature length shape_list = [shape_hdf5(f, "/" + feature_type)[0] for f in feat_list] idx = np.argsort(shape_list) feat_list = [feat_list[i] for i in idx] # divide into batch list n_batch = math.ceil(len(feat_list) / batch_size) batch_lists = np.array_split(feat_list, n_batch) batch_lists = [f.tolist() for f in batch_lists] for batch_list in batch_lists: batch_x = [] batch_h = [] batch_d = [] feat_ids = [] n_samples_list = [] for featfile in batch_list: # make seed waveform and load aux feature x = np.zeros((1)) h = read_hdf5(featfile, "/" + feature_type) if f0_factor is not 1.0: h[:, f0_dim_index] = h[:, f0_dim_index] * f0_factor d = _dilated_factor(_batch_f0(h), fs, dense_factor) d = extend_time(np.expand_dims(d, -1), upsampling_factor) # T x 1 # perform pre-processing if wav_transform is not None: x = wav_transform(x) if feat_transform is not None: h = feat_transform(h) # append to list batch_x += [x] batch_h += [h] batch_d += [d] feat_ids += [os.path.basename(featfile).replace(feat_ext, "")] n_samples_list += [h.shape[0] * upsampling_factor - 1] # convert list to ndarray batch_x = np.stack(batch_x, axis=0) batch_h = pad_list(batch_h) batch_d = pad_list(batch_d) # convert to torch variable batch_x = torch.from_numpy(batch_x).long() batch_h = torch.from_numpy(batch_h).float().transpose(1, 2) if extra_memory: batch_d = torch.from_numpy(batch_d).float().squeeze(-1) else: batch_d = batch_d.squeeze(-1) # send to cuda if torch.cuda.is_available(): batch_x = batch_x.cuda() batch_h = batch_h.cuda() if extra_memory: batch_d = batch_d.cuda() yield feat_ids, batch_x, batch_h, n_samples_list, batch_d
def decode_generator(feat_list, upsampling_factor=120, string_path='/feat_mceplf0cap', batch_size=1): """DECODE BATCH GENERATOR Args: wav_list (str): list including wav files batch_size (int): batch size in decoding upsampling_factor (int): upsampling factor Return: (object): generator instance """ with torch.no_grad(): # sort with the wav length #shape_list = [length_wav(f) for f in wav_list] shape_list = [shape_hdf5(f, string_path)[0] for f in feat_list] idx = np.argsort(shape_list) #wav_list = [wav_list[i] for i in idx] feat_list = [feat_list[i] for i in idx] # divide into batch list #n_batch = math.ceil(len(wav_list) / batch_size) n_batch = math.ceil(len(feat_list) / batch_size) #batch_wav_lists = np.array_split(wav_list, n_batch) #batch_wav_lists = [f.tolist() for f in batch_wav_lists] batch_feat_lists = np.array_split(feat_list, n_batch) batch_feat_lists = [f.tolist() for f in batch_feat_lists] #for batch_wav_list, batch_feat_list in zip(batch_wav_lists, batch_feat_lists): for batch_feat_list in batch_feat_lists: #batch_x = [] batch_feat = [] n_samples_list = [] feat_ids = [] #for wav_file, featfile in zip(batch_wav_list, batch_feat_list): for featfile in batch_feat_list: # load waveform # x, _ = sf.read(wav_file, dtype=np.float32) #_, x = wavfile.read(wav_file) #x = np.array(x, dtype=np.float64) # x = x[:x.shape[0]-(x.shape[0]%upsampling_factor)] feat = read_hdf5(featfile, string_path) logging.info(feat[100]) logging.info(featfile) #feat = np.c_[feat[:,0:2], feat[:,2:3]*(-np.exp(feat[:,3:6])), feat[:,6:]] #logging.info(feat[100]) # append to list # batch_x += [x] batch_feat += [feat] n_samples_list += [feat.shape[0] * upsampling_factor] feat_ids += [os.path.basename(featfile).replace(".h5", "")] # convert list to ndarray #batch_x = pad_list(batch_x) batch_feat = pad_list(batch_feat) # convert to torch variable #batch_x = torch.FloatTensor(batch_x) batch_feat = torch.FloatTensor(batch_feat) if torch.cuda.is_available(): batch_feat = batch_feat.cuda() #yield feat_ids, (batch_x, batch_feat, n_samples_list) yield feat_ids, (batch_feat, n_samples_list)
def decode_generator(feat_list, batch_size=7, wav_transform=None, string_path='/feat_org_lf0', spk_trg=None, \ min_idx=None, upsampling_factor=0): """DECODE BATCH GENERATOR Args: featdir (str): directory including feat files batch_size (int): batch size in decoding wav_transform (func): preprocessing function for waveform upsampling_factor (int): upsampling factor Return: (object): generator instance """ with torch.no_grad(): #string_path = "/feat_org_lf0" #string_path = "/feat_lat_lf0_cyclevae-mult-jnt-scpost_gauss-2_32_97_VCC2TF1-300" #string_path = "/feat_lat_lf0_cyclevqvae-mult-jnt-scpost-1_50-50_101_VCC2TF1" # sort with the feature length #logging.info(string_path) #logging.info(feat_list) if spk_trg is not None: if min_idx is not None: string_path = '/feat_cvmcep_cycvae-' + str( min_idx) + '-' + spk_trg else: string_path = '/feat_cvmcep_cycvae-' + spk_trg shape_list = [shape_hdf5(f, string_path)[0] for f in feat_list] idx = np.argsort(shape_list) feat_list = [feat_list[i] for i in idx] # divide into batch list n_batch = math.ceil(len(feat_list) / batch_size) batch_lists = np.array_split(feat_list, n_batch) batch_lists = [f.tolist() for f in batch_lists] for batch_list in batch_lists: batch_x = [] batch_h = [] n_samples_list = [] feat_ids = [] for featfile in batch_list: # make seed waveform and load aux feature #logging.info(featfile) x = np.zeros((1)) h = read_hdf5(featfile, string_path) # perform pre-processing if wav_transform is not None: x = wav_transform(x) # append to list batch_x += [x] batch_h += [h] n_samples_list += [h.shape[0] * upsampling_factor] feat_ids += [os.path.basename(featfile).replace(".h5", "")] # convert list to ndarray batch_x = np.stack(batch_x, axis=0) batch_h = pad_list(batch_h) # convert to torch variable batch_x = torch.LongTensor(batch_x) batch_h = torch.FloatTensor(batch_h).transpose(1, 2) if torch.cuda.is_available(): batch_x = batch_x.cuda() batch_h = batch_h.cuda() yield feat_ids, (batch_x, batch_h, n_samples_list)