예제 #1
0
def read_data_thread_(
        set_id,
        sess,
        input_data,
        input_length,
        output_data,
        output_length,
        enqueue_op_,
        close_op_,
        mean_speaker,
        var_speaker,
        fst):
    '''Enqueue data to queue for the target domain'''

    trans = tf.gfile.FastGFile(FLAGS.dann_file).readlines()
    random.shuffle(trans)
    for text, set_id_trans, speaker, audio_file in csv.reader(trans):
        try:
            text = [VOCAB_TO_INT[c]
                    for c in list(text)] + [VOCAB_TO_INT['</s>']]
     
        except KeyError:
            continue
     
        if (set_id == set_id_trans and
                ((not FLAGS.use_train_lm) or in_fst(fst, text))):
            feat = get_features(audio_file)
            feat = feat - mean_speaker[speaker]
            feat = feat / np.sqrt(var_speaker[speaker])
            sess.run(enqueue_op_, feed_dict={
                input_data: feat,
                input_length: feat.shape[0],
                output_data: text,
                output_length: len(text)})
    sess.run(close_op_)
예제 #2
0
def read_data_thread(set_id, sess, input_data, input_length, output_data,
                     output_length, enqueue_op, close_op, mean_speaker,
                     var_speaker, fst):
    """Enqueue data to queue"""

    trans = FileOpen(FLAGS.trans_file).readlines()
    random.shuffle(trans)
    for line in trans:
        line = line.strip()
        text, set_id_trans, speaker, audio_file = line.split('\\')
        try:
            text = [VOCAB_TO_INT[c]
                    for c in list(text)] + [VOCAB_TO_INT['</s>']]
        except KeyError:
            continue
        if (set_id == set_id_trans
                and ((not FLAGS.use_train_lm) or in_fst(fst, text))):
            feat = get_features(audio_file)
            feat = feat - mean_speaker[speaker]
            feat = feat / np.sqrt(var_speaker[speaker])
            sess.run(enqueue_op,
                     feed_dict={
                         input_data: feat,
                         input_length: feat.shape[0],
                         output_data: text,
                         output_length: len(text)
                     })
    sess.run(close_op)
예제 #3
0
def read_data_thread(
        set_id,
        sess,
        input_data,
        input_length,
        output_data,
        output_length,
        enqueue_op,
        close_op,
        mean_speaker,
        var_speaker,
        fst):
    '''Enqueue data to queue'''

    trans = tf.gfile.FastGFile(FLAGS.trans_file).readlines()
    random.shuffle(trans)
    for text, set_id_trans, speaker, audio_file in csv.reader(trans):
        text = [VOCAB_TO_INT[c] for c in list(text)]
        # A space is required after the sentence due to the way FST is set up
        if (text[-1] != VOCAB_TO_INT[' ']):
            text.append(VOCAB_TO_INT[' '])
        text.append(VOCAB_TO_INT['</s>'])
        if (set_id == set_id_trans and
                ((not FLAGS.use_train_lm) or in_fst(fst, text))):
            feat = get_features(audio_file)
            feat = feat - mean_speaker[speaker]
            feat = feat / np.sqrt(var_speaker[speaker])
            sess.run(enqueue_op, feed_dict={
                input_data: feat,
                input_length: feat.shape[0],
                output_data: text,
                output_length: len(text)})
    sess.run(close_op)