Esempio n. 1
0
 def _populate_batch_queue(self, session, coord):
     '''
     Queue thread routine.
     '''
     file_count = len(self._data_set.files)
     index = -1
     while not coord.should_stop():
         index = self._data_set.next_index(index) % file_count
         wav_file, transcript = self._data_set.files[index]
         source = audiofile_to_input_vector(wav_file,
                                            self._model_feeder.numcep,
                                            self._model_feeder.numcontext)
         source_len = len(source)
         target = text_to_char_array(
             transcript, self._alphabet)  ## 이 부분을 diphone 형태로 받아오도록 수정
         target_len = len(target)
         if source_len < target_len:
             raise ValueError(
                 'Error: Audio file {} is too short for transcription.'.
                 format(wav_file))
         try:
             session.run(self._enqueue_op,
                         feed_dict={
                             self._model_feeder.ph_x: source,
                             self._model_feeder.ph_x_length: source_len,
                             self._model_feeder.ph_y: target,
                             self._model_feeder.ph_y_length: target_len
                         })
         except tf.errors.CancelledError:
             return
Esempio n. 2
0
 def _populate_batch_queue(self, session, coord):
     '''
     Queue thread routine.
     '''
     file_count = len(self._data_set.files)
     index = -1
     while not coord.should_stop():
         index = self._data_set.next_index(index) % file_count
         wav_file, transcript = self._data_set.files[index]
         source = audiofile_to_input_vector(wav_file,
                                            self._model_feeder.numcep,
                                            self._model_feeder.numcontext)
         source_len = len(source)
         target = text_to_char_array(transcript)
         target_len = len(target)
         try:
             session.run(self._enqueue_op,
                         feed_dict={
                             self._model_feeder.ph_x: source,
                             self._model_feeder.ph_x_length: source_len,
                             self._model_feeder.ph_y: target,
                             self._model_feeder.ph_y_length: target_len
                         })
         except tf.errors.CancelledError:
             return
Esempio n. 3
0
def process_single_file(row):
    # row = index, Series
    _, file = row
    features = audiofile_to_input_vector(file.wav_filename, N_FEATURES, N_CONTEXT)
    transcript = text_to_char_array(file.transcript, alphabet)

    return features, len(features), transcript, len(transcript)
Esempio n. 4
0
 def generate_values():
     samples = samples_from_files(sources, buffering=buffering)
     for sample in change_audio_types(samples,
                                      AUDIO_TYPE_NP,
                                      process_ahead=2 * batch_size if process_ahead is None else process_ahead):
         transcript = text_to_char_array(sample.transcript, Config.alphabet, context=sample.sample_id)
         transcript = to_sparse_tuple(transcript)
         yield sample.sample_id, sample.audio, sample.audio_format[0], transcript
Esempio n. 5
0
def process_single_file(row, numcep, numcontext, alphabet):
    # row = index, Series
    _, file = row
    features = audiofile_to_input_vector(file.wav_filename, numcep, numcontext)
    features_len = len(features) - 2*numcontext
    transcript = text_to_char_array(file.transcript, alphabet)

    if features_len < len(transcript):
        raise ValueError('Error: Audio file {} is too short for transcription.'.format(file.wav_filename))

    return features, features_len, transcript, len(transcript)
Esempio n. 6
0
def process_single_file(row, numcep, numcontext, alphabet):
    # row = index, Series
    _, file = row
    features = audiofile_to_input_vector(file.wav_filename, numcep, numcontext)
    features_len = len(features) - 2*numcontext
    transcript = text_to_char_array(file.transcript, alphabet)

    if features_len < len(transcript):
        raise ValueError('Error: Audio file {} is too short for transcription.'.format(file.wav_filename))

    return features, features_len, transcript, len(transcript)
Esempio n. 7
0
    def _compute_source_target(self):
        txt_file = self._txt_files[0]
        wav_file = path.splitext(txt_file)[0] + ".wav"

        audio_waves = audiofile_to_input_vector(wav_file, self._numcep, self._numcontext)

        with open(txt_file) as open_txt_file:
            original = ' '.join(open_txt_file.read().strip().lower().split(' ')[2:]).replace('.', '')

        target = text_to_char_array(original)

        return audio_waves, len(audio_waves), target, len(target)
Esempio n. 8
0
 def _populate_batch_queue(self, session):
     for txt_file, wav_file in self._files_circular_list:
         source = audiofile_to_input_vector(wav_file, self._numcep, self._numcontext)
         source_len = len(source)
         with codecs.open(txt_file, encoding="utf-8") as open_txt_file:
             target = unicodedata.normalize("NFKD", open_txt_file.read()).encode("ascii", "ignore")
             target = text_to_char_array(target)
         target_len = len(target)
         session.run(self._enqueue_op, feed_dict={
             self._x: source,
             self._x_length: source_len,
             self._y: target,
             self._y_length: target_len})
Esempio n. 9
0
 def _populate_batch_queue(self, session):
     for wav_file, transcript in self._indices():
         source = audiofile_to_input_vector(wav_file, self._numcep,
                                            self._numcontext)
         source_len = len(source)
         target = text_to_char_array(transcript)
         target_len = len(target)
         try:
             session.run(self._enqueue_op,
                         feed_dict={
                             self._x: source,
                             self._x_length: source_len,
                             self._y: target,
                             self._y_length: target_len
                         })
         except tf.errors.CancelledError:
             return
Esempio n. 10
0
 def _populate_batch_queue(self, session):
     for txt_file, wav_file in self._files_circular_list:
         if self._coord.should_stop():
             return
         source = audiofile_to_input_vector(wav_file, self._numcep,
                                            self._numcontext)
         source_len = len(source)
         with codecs.open(txt_file, encoding="utf-8") as open_txt_file:
             target = unicodedata.normalize("NFKD", open_txt_file.read())
             target = text_to_char_array(target)
         target_len = len(target)
         try:
             session.run(self._enqueue_op,
                         feed_dict={
                             self._x: source,
                             self._x_length: source_len,
                             self._y: target,
                             self._y_length: target_len
                         })
         except tf.errors.CancelledError:
             return
Esempio n. 11
0
 def _populate_batch_queue(self, session):
     for txt_file, wav_file in self._indices():
         source = audiofile_to_input_vector(wav_file, self._numcep, self._numcontext)
         source_len = len(source)
         with codecs.open(txt_file, encoding="utf-8") as open_txt_file:
             # We need to do the encode-decode dance here because encode
             # returns a bytes() object on Python 3, and text_to_char_array
             # expects a string.
             target = unicodedata.normalize("NFKD", open_txt_file.read())   \
                                 .encode("ascii", "ignore")                 \
                                 .decode("ascii", "ignore")
             target = text_to_char_array(target)
         target_len = len(target)
         try:
             session.run(self._enqueue_op, feed_dict={
                 self._x: source,
                 self._x_length: source_len,
                 self._y: target,
                 self._y_length: target_len})
         except tf.errors.CancelledError:
             return
Esempio n. 12
0
 def _populate_batch_queue(self, session, coord):
     '''
     Queue thread routine.
     '''
     file_count = len(self._data_set.files)
     index = -1
     while not coord.should_stop():
         index = self._data_set.next_index(index) % file_count
         wav_file, transcript = self._data_set.files[index]
         source = audiofile_to_input_vector(wav_file, self._model_feeder.numcep, self._model_feeder.numcontext)
         source_len = len(source)
         target = text_to_char_array(transcript, self._alphabet)
         target_len = len(target)
         if source_len < target_len:
             raise ValueError('Error: Audio file {} is too short for transcription.'.format(wav_file))
         try:
             session.run(self._enqueue_op, feed_dict={ self._model_feeder.ph_x: source,
                                                       self._model_feeder.ph_x_length: source_len,
                                                       self._model_feeder.ph_y: target,
                                                       self._model_feeder.ph_y_length: target_len })
         except tf.errors.CancelledError:
             return
        .replace('á'.decode("utf-8"), 'a') \
        .replace('é'.decode("utf-8"), 'e') \
        .replace('í'.decode("utf-8"), 'i') \
        .replace('ó'.decode("utf-8"), 'o') \
        .replace('ú'.decode("utf-8"), 'u') \
        .replace('ö'.decode("utf-8"), 'o') \
        .replace('ü'.decode("utf-8"), 'u') \
        .replace('ý'.decode("utf-8"), 'y') \
        .replace('–'.decode("utf-8"), ' ') \
        .replace('‼'.decode("utf-8"), ' ') \
        .replace("mp3", "eme pe hiru")
    text = punct.sub(' ', text)
    text = " " + text + " "

    text = text.strip().lower()
    return text


for root, dirnames, filenames in os.walk("/home/ubuntu/datasets/tempo/"):
    for filename in fnmatch.filter(filenames, "*.txt"):
        trans_file = os.path.join(root, filename)
        try:
            text.text_to_char_array(
                clean_non_ascii(codecs.open(trans_file, 'r', 'utf-8').read()),
                alphabet)
        except Exception as err:
            print(err, )
            print(trans_file)
            #import pdb; pdb.set_trace()
            continue