Example #1
0
    def create_batch(self, flag='train', shuffle=True):
        data_num = len(self.wav2py[flag])
        idxs = list(range(data_num))
        if shuffle:
            random.shuffle(idxs)
        wavs = []
        labels = []
        for i, idx in enumerate(idxs):
            wav_path, pys = self.wav2py[flag][idx]
            fbank = extract_feature(wav_path)
            label = np.array([self.py2id[py] for py in pys])
            while ((fbank.shape[0] >= self.audio_len)
                   or (len(label) >= self.label_len)):
                temp = random.randint(len(idxs) // 4, len(idxs) // 2)
                wav_path, pys = self.wav2py[flag][temp]
                fbank = extract_feature(wav_path)
                label = np.array([self.py2id[py] for py in pys],
                                 dtype=np.int32)

            assert len(wavs) == len(labels)
            if len(wavs) == self.batch_size:
                the_inputs, input_length = self.wav_padding(wavs)
                the_labels, label_length = self.label_padding(labels)
                inputs = [the_inputs, the_labels, input_length, label_length]
                outputs = np.zeros([self.batch_size, 1], dtype=np.float32)
                yield inputs, outputs
                wavs, labels = [], []
            wavs.append(fbank)
            labels.append(label)
        if len(wavs) != 0:
            the_inputs, input_length = self.wav_padding(wavs)
            the_labels, label_length = self.label_padding(labels)
            inputs = [the_inputs, the_labels, input_length, label_length]
            outputs = np.zeros([len(wavs), 1], dtype=np.float32)
            yield inputs, outputs
Example #2
0
    def create_batch(self, flag='train', shuffle=True):
        data_num = len(self.wav2py[flag])
        idxs = list(range(data_num))
        if shuffle:
            random.shuffle(idxs)
        wavs = []
        labels = []

        for i, idx in enumerate(idxs):
            wav_path, pys = self.wav2py[flag][idx]
            fbank = extract_feature(wav_path)
            label = np.array([self.py2id[py] for py in pys], dtype=np.int16)
            assert len(wavs) == len(labels)
            if len(wavs) == self.batch_size:
                the_inputs, input_length = self.wav_padding(wavs)
                the_labels, label_length = self.label_padding(labels)
                for i in range(self.batch_size):
                    while input_length[i] <= max(label_length):
                        r = random.randint(1, self.batch_size - 1)
                        the_inputs[i] = the_inputs[r]
                        the_labels[i] = the_labels[r]
                        input_length[i] = input_length[r]
                        label_length[i] = label_length[r]
                assert min(input_length) > max(label_length), str(
                    min(input_length)) + ":" + str(max(label_length))
                inputs = [the_inputs, the_labels, input_length, label_length]
                outputs = np.zeros([self.batch_size, 1], dtype=np.float32)
                yield inputs, outputs
                wavs, labels = [], []
            wavs.append(fbank)
            labels.append(label)
Example #3
0
def check_input(wav_path, label):
    fbank = extract_feature(wav_path)
    if (fbank.shape[0] // 8) < len(label):
        with open('log/check_input.txt', '+a') as file:
            file.write(wav_path + '\t' + 'fbank_len:' + str(fbank.shape[0]) +
                       '\tinput_len' + str(fbank.shape[0] // 8) +
                       '\tlabel_len' + str(len(label)) + '\n')
Example #4
0
 def create_batch_dev(self):
     wavs = []
     labels = []
     data_num = len(self.wav2py['dev'])
     wavs = []
     labels = []
     for i in enumerate(data_num):
         wav_path, pys = self.wav2py['dev'][i]
         fbank = extract_feature(wav_path)
         label = [self.py2id[py] for py in pys]
         pass
Example #5
0
 def create_online(self, wav_path):
     assert os.path.exists(wav_path)
     fbank = extract_feature(wav_path)
     assert fbank.shape[0] < self.audio_len
     wav, wav_len = self.wav_padding([fbank])
     yield wav, wav_len
Example #6
0
def report_data(wav_path, label):
    fbank = extract_feature(wav_path)
    with open('log/data_report.txt', '+a') as file:
        file.write(wav_path + '\t' + 'fbank_len:' + str(fbank.shape[0]) +
                   '\tinput_len:' + str(fbank.shape[0] // 8) + '\tlabel_len:' +
                   str(len(label)) + '\n')
Example #7
0
def check_audio(wav_path):
    fbank = extract_feature(wav_path)
    if fbank.shape[0] > config.audio_len:
        with open('log/check_audio.txt', '+a') as file:
            file.write(wav_path + '\n')