def create_batch(self, flag='train', shuffle=True): data_num = len(self.wav2py[flag]) idxs = list(range(data_num)) if shuffle: random.shuffle(idxs) wavs = [] labels = [] for i, idx in enumerate(idxs): wav_path, pys = self.wav2py[flag][idx] fbank = extract_feature(wav_path) label = np.array([self.py2id[py] for py in pys]) while ((fbank.shape[0] >= self.audio_len) or (len(label) >= self.label_len)): temp = random.randint(len(idxs) // 4, len(idxs) // 2) wav_path, pys = self.wav2py[flag][temp] fbank = extract_feature(wav_path) label = np.array([self.py2id[py] for py in pys], dtype=np.int32) assert len(wavs) == len(labels) if len(wavs) == self.batch_size: the_inputs, input_length = self.wav_padding(wavs) the_labels, label_length = self.label_padding(labels) inputs = [the_inputs, the_labels, input_length, label_length] outputs = np.zeros([self.batch_size, 1], dtype=np.float32) yield inputs, outputs wavs, labels = [], [] wavs.append(fbank) labels.append(label) if len(wavs) != 0: the_inputs, input_length = self.wav_padding(wavs) the_labels, label_length = self.label_padding(labels) inputs = [the_inputs, the_labels, input_length, label_length] outputs = np.zeros([len(wavs), 1], dtype=np.float32) yield inputs, outputs
def create_batch(self, flag='train', shuffle=True): data_num = len(self.wav2py[flag]) idxs = list(range(data_num)) if shuffle: random.shuffle(idxs) wavs = [] labels = [] for i, idx in enumerate(idxs): wav_path, pys = self.wav2py[flag][idx] fbank = extract_feature(wav_path) label = np.array([self.py2id[py] for py in pys], dtype=np.int16) assert len(wavs) == len(labels) if len(wavs) == self.batch_size: the_inputs, input_length = self.wav_padding(wavs) the_labels, label_length = self.label_padding(labels) for i in range(self.batch_size): while input_length[i] <= max(label_length): r = random.randint(1, self.batch_size - 1) the_inputs[i] = the_inputs[r] the_labels[i] = the_labels[r] input_length[i] = input_length[r] label_length[i] = label_length[r] assert min(input_length) > max(label_length), str( min(input_length)) + ":" + str(max(label_length)) inputs = [the_inputs, the_labels, input_length, label_length] outputs = np.zeros([self.batch_size, 1], dtype=np.float32) yield inputs, outputs wavs, labels = [], [] wavs.append(fbank) labels.append(label)
def check_input(wav_path, label): fbank = extract_feature(wav_path) if (fbank.shape[0] // 8) < len(label): with open('log/check_input.txt', '+a') as file: file.write(wav_path + '\t' + 'fbank_len:' + str(fbank.shape[0]) + '\tinput_len' + str(fbank.shape[0] // 8) + '\tlabel_len' + str(len(label)) + '\n')
def create_batch_dev(self): wavs = [] labels = [] data_num = len(self.wav2py['dev']) wavs = [] labels = [] for i in enumerate(data_num): wav_path, pys = self.wav2py['dev'][i] fbank = extract_feature(wav_path) label = [self.py2id[py] for py in pys] pass
def create_online(self, wav_path): assert os.path.exists(wav_path) fbank = extract_feature(wav_path) assert fbank.shape[0] < self.audio_len wav, wav_len = self.wav_padding([fbank]) yield wav, wav_len
def report_data(wav_path, label): fbank = extract_feature(wav_path) with open('log/data_report.txt', '+a') as file: file.write(wav_path + '\t' + 'fbank_len:' + str(fbank.shape[0]) + '\tinput_len:' + str(fbank.shape[0] // 8) + '\tlabel_len:' + str(len(label)) + '\n')
def check_audio(wav_path): fbank = extract_feature(wav_path) if fbank.shape[0] > config.audio_len: with open('log/check_audio.txt', '+a') as file: file.write(wav_path + '\n')