Ejemplo n.º 1
0
            batches.append((x, y))

        return batches

    def test(self):
        while 1:
            eval_batches = self.make_eval_batch_data()
            # print('now',self.dg.offset)
            self.runner.run(eval_batches)
            if self.dg.offset > len(self.dg.test_texts) - 1:
                break


if __name__ == '__main__':

    parse = argparse.ArgumentParser()
    parse.add_argument('--data_config',
                       type=str,
                       required=True,
                       help='the lm data config path')
    parse.add_argument('--model_config',
                       type=str,
                       required=True,
                       help='the lm model config path')
    args = parse.parse_args()

    config = UserConfig(args.data_config, args.model_config)
    tester = LM_Tester(config)
    tester.test()
Ejemplo n.º 2
0
            am_result = self.decode_am_result(am_result[1:-1])
        else:
            am_result = self.decode_am_result(am_result[0])
        return am_result

    def lm_test(self, txt):
        py = pypinyin.pinyin(txt)
        input_py = [i[0] for i in py]
        #now lm_result is token id
        lm_result = self.lm.predict(input_py)
        #token to vocab
        lm_result = self.lm.decode(lm_result[0].numpy(),
                                   self.lm.word_featurizer)
        return lm_result


if __name__ == '__main__':

    os.environ['CUDA_VISIBLE_DEVICES'] = '2'
    am_config = UserConfig(r'./conformer-transducer-logs/am_data.yml',
                           r'./conformer-transducer-logs/conformer.yml')
    lm_config = UserConfig(r'./transformer-logs/lm_data.yml',
                           r'./transformer-logs/transformer.yml')
    asr = ASR(am_config, lm_config)

    a, b = asr.stt('BAC009S0724W0121.wav')
    print(a)
    print(b)
    print(asr.am_test('BAC009S0724W0121.wav'))
    print(asr.lm_test('中介协会'))
Ejemplo n.º 3
0
        """Load checkpoint."""

        self.checkpoint_dir = os.path.join(
            config['learning_config']['running_config']["outdir"],
            "checkpoints")
        files = os.listdir(self.checkpoint_dir)
        files.sort(key=lambda x: int(x.split('_')[-1].replace('.h5', '')))
        self.model.load_weights(os.path.join(self.checkpoint_dir, files[-1]))
        self.init_steps = int(files[-1].split('_')[-1].replace('.h5', ''))


if __name__ == '__main__':
    from utils.user_config import UserConfig
    import tensorflow as tf
    os.environ['CUDA_VISIBLE_DEVICES'] = '1'
    am_config = UserConfig(r'D:\TF2-ASR\configs\am_data.yml',
                           r'D:\TF2-ASR\configs\conformer.yml')
    am = AM(am_config)
    print('load model')
    am.load_model(False)
    print('convert here')
    am.model.return_pb_function(80, 4)
    concere = am.model.recognize_pb.get_concrete_function()
    converter = tf.lite.TFLiteConverter.from_concrete_functions([concere])
    converter.experimental_new_converter = True
    # converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.target_spec.supported_ops = [
        tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS
    ]
    converter.convert()
    # am.convert_to_pb('./test_model')
Ejemplo n.º 4
0
        """
        with tf.name_scope("index_to_unicode_points"):

            def map_fn(arr):
                def sub_map_fn(index):
                    return self.index_to_unicode_points[index]

                return tf.map_fn(sub_map_fn, arr, dtype=tf.int32)

            # filter -1 value to avoid outofrange
            minus_one = -1 * tf.ones_like(feat, dtype=tf.int32)
            blank_like = self.blank * tf.ones_like(feat, dtype=tf.int32)
            feat = tf.where(feat == minus_one, blank_like, feat)
            return tf.map_fn(
                map_fn,
                feat,
                dtype=tf.int32,
            )


if __name__ == '__main__':
    from utils.user_config import UserConfig
    import pypinyin
    import numpy as np
    config = UserConfig('../config.yml', '../config.yml', False)
    print(config)
    test = TextFeaturizer(config['decoder_config'])
    print(test.num_classes, test.vocab_array)
    # print(test.extract(pypinyin.lazy_pinyin('我爱你',1)))
    print(test.iextract(tf.constant(np.random.random([4, test.num_classes]))))
Ejemplo n.º 5
0
            while i < step:
                enc = tf.reshape(x[:, i], [1, 1, -1])
                y = self.am.model.predict_net(inputs=tf.reshape(self.decoded, [1, -1]),
                                              p_memory_states=None,
                                              training=False)
                y = y[:, -1:]
                z = self.am.model.joint_net([enc, y], training=False)
                logits = tf.squeeze(tf.nn.log_softmax(z))
                pred = tf.argmax(logits, axis=-1, output_type=tf.int32)
                pred = tf.reshape(pred, [1])
                if pred != 0 and pred != self.text_featurizer.blank:
                    self.decoded = tf.concat([self.decoded, pred], axis=0)
                    print("buffer_step: {}, "
                          "step: {}, "
                          "pred: {}".format(j,
                                            i,
                                            self.text_featurizer.index_to_token[pred.numpy().tolist()[0]]))
                i += 1
            j += 1
        print(1)


if __name__ == "__main__":
    am_config = UserConfig(r'./pre_train/rnnt/am_data.yml', r'./pre_train/rnnt/conformer.yml')
    # am_config = UserConfig(r'./configs/am_data_back.yml', r'./configs/tdnn.yml')
    model = StreamingASR(am_config)
    model.stream_detect('CppInference/test.wav')



Ejemplo n.º 6
0
    def punc_test(self, txt):
        return self.lm.punc_predict(list(txt))


if __name__ == '__main__':
    import time
    # USE CPU:
    # os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
    # USE one GPU:
    # os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    # limit cpu to 1 core:
    # import tensorflow as tf
    # tf.config.threading.set_inter_op_parallelism_threads(1)
    # tf.config.threading.set_intra_op_parallelism_threads(1)

    am_config = UserConfig(r'./conformerCTC(M)/am_data.yml',
                           r'./conformerCTC(M)/conformerM.yml')
    lm_config = UserConfig(r'./transformer-logs/lm_data.yml',
                           r'./transformer-logs/transformerO2OE.yml')
    punc_config = UserConfig(r'./punc_model/punc_settings.yml',
                             r'./punc_model/punc_settings.yml')
    asr = ASR(am_config, lm_config, punc_config)

    # first inference will be slow,it is normal
    s = time.time()
    a, b = asr.stt(r'BAC009S0764W0121.wav')
    e = time.time()
    print(a)
    print(b)
    print('asr.stt first infenrence cost time:', e - s)

    # now it's OK
Ejemplo n.º 7
0
            _, _, stop_flag, decoded = tf.while_loop(
                _cond,
                _body,
                loop_vars=(b_i, B, stop_flag, decoded),
                shape_invariants=(tf.TensorShape([]), tf.TensorShape([]),
                                  tf.TensorShape([None]),
                                  tf.TensorShape([None, None])))

            return decoded


if __name__ == '__main__':
    from utils.user_config import UserConfig
    from utils.text_featurizers import TextFeaturizer
    import time
    config = UserConfig(r'D:\TF2-ASR\configs\lm_data.yml',
                        r'D:\TF2-ASR\configs\transformer.yml')
    vocab_featurizer = TextFeaturizer(config['lm_vocab'])
    word_featurizer = TextFeaturizer(config['lm_word'])
    model_config = config['model_config']
    model_config.update({
        'input_vocab_size': vocab_featurizer.num_classes,
        'target_vocab_size': word_featurizer.num_classes
    })
    model = Transformer(**model_config)
    model._build()
    model.recognize(np.ones([2, 10]))
    s = time.time()
    c = model.recognize(np.ones([2, 10]))
    e = time.time()
    print(c, e - s)
Ejemplo n.º 8
0
        # token to vocab
        lm_result = self.lm.decode(lm_result[0].numpy(), self.lm.lm_featurizer)
        return lm_result


if __name__ == '__main__':
    import time
    # USE CPU:
    # os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
    # USE one GPU:
    # os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    # limit cpu to 1 core:
    # import tensorflow as tf
    # tf.config.threading.set_inter_op_parallelism_threads(1)
    # tf.config.threading.set_intra_op_parallelism_threads(1)
    am_config = UserConfig(r'./conformerCTC(M)/am_data.yml',
                           r'./conformerCTC(M)/conformerM.yml')
    lm_config = UserConfig(r'./transformer-logs/lm_data.yml',
                           r'./transformer-logs/transformerO2OE.yml')
    asr = ASR(am_config, lm_config)

    # first inference will be slow,it is normal
    s = time.time()
    a, b = asr.stt(r'BAC009S0764W0121.wav')
    e = time.time()
    print(a)
    print(b)
    print('asr.stt first infenrence cost time:', e - s)

    # now it's OK
    s = time.time()
    a, b = asr.stt(r'BAC009S0764W0121.wav')
Ejemplo n.º 9
0
            if self.runner._finished():
                self.runner.save_checkpoint()
                logging.info('Finish training!')
                break
            if self.runner.steps % self.config['running_config'][
                    'save_interval_steps'] == 0:
                self.dg.save_state(self.config['running_config']['outdir'])


if __name__ == '__main__':
    import argparse
    parse = argparse.ArgumentParser()
    parse.add_argument('--data_config',
                       type=str,
                       default='./configs/lm_data.yml',
                       help='the lm data config path')
    parse.add_argument('--lm_config',
                       type=str,
                       default='./configs/transfomer.yml',
                       help='the lm model config path')
    parse.add_argument('--punc_config',
                       type=str,
                       default='./configs/punc_settings.yml',
                       help='the lm model config path')
    args = parse.parse_args()

    lm_config = UserConfig(args.data_config, args.model_config)
    punc_config = UserConfig(args.punc_config, args.punc_config)
    train = Punc_Trainer(lm_config, punc_config)
    train.train()
Ejemplo n.º 10
0
    # Set CPU 1 core
    # import tensorflow as tf
    # import os
    # os.environ['CUDA_VISIBLE_DEVICES']='-1'
    # tf.config.threading.set_inter_op_parallelism_threads(1)
    # tf.config.threading.set_intra_op_parallelism_threads(1)
    parse = argparse.ArgumentParser()
    parse.add_argument('--common_config',
                       type=str,
                       default='./configs/common.yml',
                       help='the am data config path')
    parse.add_argument('--acoustic_config',
                       type=str,
                       default='./configs/fastspeech.yml',
                       help='the am model config path')
    parse.add_argument('--vocoder_config',
                       type=str,
                       default='./configs/vocoder.yml',
                       help='the am model config path')
    args = parse.parse_args()
    acoustic_config = UserConfig(args.common_config, args.acoustic_config)
    vocoder_config = UserConfig(args.common_config, args.vocoder_config)
    tts = TTS(acoustic_config, vocoder_config)

    wav = tts.tts('来一句长一点的话儿试一试呢。', 'spk1')
    s = time.time()
    wav = tts.tts('来一句常一点的话儿试一试呢。', 'spk1')
    e = time.time()
    sf.write('test.wav', wav, 8000)
    print('wav length:', wav.shape / 8000, 'tts cost time:', e - s)
Ejemplo n.º 11
0
            old_epoch = self.dg.epochs
            for batch in train_datasets:
                print(
                    '**********************************************3**********************************************************'
                )
                try:
                    self.strategy.run(self._train_step, args=(batch, ))
                except tf.errors.OutOfRangeError:
                    continue
                new_epoch = self.dg.epochs
                print(
                    '**********************************************4**********************************************************'
                )
                if new_epoch - old_epoch >= 1:
                    break
            loss = self.train_metrics['loss'].result().numpy()
            self._save_model(epoch, loss)
            print('epoch:{} loss:{}'.format(epoch, loss))
            self.train_metrics['loss'].reset_states()


if __name__ == '__main__':
    parse = argparse.ArgumentParser()
    parse.add_argument('--common_config',
                       type=str,
                       default='/home/jiangjing/project/my/config/common.yml')
    args = parse.parse_args()
    config = UserConfig(args.common_config)
    train = Sub_LM_Trainer(config)
    train.train()
Ejemplo n.º 12
0
    def am_test(self, wav_path):
        # am_result is token id
        am_result = self.am.predict(wav_path)
        # token to vocab
        if self.am.model_type == 'Transducer':
            am_result = self.decode_am_result(am_result[1:-1])
        else:
            am_result = self.decode_am_result(am_result[0])
        return am_result


if __name__ == '__main__':
    import time
    # USE CPU:
    # os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
    # USE one GPU:
    # os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    # limit cpu to 1 core:
    # import tensorflow as tf
    # tf.config.threading.set_inter_op_parallelism_threads(1)
    # tf.config.threading.set_intra_op_parallelism_threads(1)

    am_config = UserConfig(r'./streaming-logs/am_data.yml',
                           r'./streaming-logs/Streaming_ConformerS.yml')

    asr = ASR(am_config)

    # first inference will be slow,it is normal

    print(asr.am_test(r'BAC009S0764W0121.wav'))