Ejemplo n.º 1
0
def main():
    if len(sys.argv) >= 2:
        f = open(sys.argv[1], 'rb')
    else:
        f = frontend.default_reader()

    for msg in frontend.Frontend(f):
        print('---------------------------------')
        sys.stdout.write(str(msg))
Ejemplo n.º 2
0
def main(**settings):
    """
    This function returns a Pyramid WSGI application.
    """
    listen()

    # Load dragon configuration
    conf.load("jarvis")

    # Setup logging
    #logging.config.dictConfig(jarvis.LOGGING)

    # Start the frontend greenlets and create the WSGI application
    _frontend = frontend.Frontend()
    _frontend.start(start_wsgi_server=True, forever=True)
Ejemplo n.º 3
0
    def Text2Speech(self, text, req_id):

        time_start = time.time()

        # 1. front end
        idseq = frontend.Frontend(self.phone2id, self.idim, text)
        mat = torch.LongTensor(idseq).view(-1).to(device)
        time_frontend = time.time()

        # 2. acoustic model inference
        c, _, _ = self.model.inference(mat, self.inference_args)
        features = c.cpu().detach().numpy()

        logging.info("{} Pass Acoustic Model".format(__file__))
        time_acoustic = time.time()

        req_dir = os.path.join(self.tmp_dir, req_id)

        os.makedirs(req_dir, exist_ok=True)

        # 3. vocoder
        batch_feats = Split(self.overlap, features, self.num_chunk_frame)
        logging.info("{} Split {} done".format(__file__, len(batch_feats)))

        executor = ProcessPoolExecutor(max_workers=len(batch_feats))
        futures = []

        num_frame = 0
        for i, s1, s2, a1, a2, sub_feats in batch_feats:
            num_frame += a2 - a1
            futures.append(
                executor.submit(
                    partial(run_vocoder, self.overlap, self.num_chunk_frame,
                            self.tmp_dir, i, s1, s2, a1, a2, sub_feats)))

        batch_pcm = [future.result() for future in futures]

        pcm = np.zeros((num_frame, 160), dtype="int16")

        index = 0
        for one_pcm in batch_pcm:
            i, sub_pcm = one_pcm
            #print(i, sub_pcm.shape)
            start = index
            num = sub_pcm.shape[0]
            #print(start, num)
            pcm[start:start + num] = sub_pcm
            index += num

        pcm = np.reshape(pcm, -1)[:num_frame * 160]

        wav_path = os.path.join(req_dir, req_id + "-wave.wav")

        with wave.open(wav_path, 'wb') as fp:
            # 1 channel, 16 bits, 16000 sample rate
            fp.setparams((1, 2, 16000, 0, 'NONE', 'NONE'))
            fp.writeframes(pcm)

        audio_dur = num_frame * 0.01
        logging.info("{} Wave {:0.3f}s {}".format(__file__, audio_dur,
                                                  wav_path))

        if self.debug_mode:
            # numpy type feature
            np.save(os.path.join(req_dir, req_id + "-feats.npy"), features)
            # plot
            plt.figure()
            plt.matshow(np.flip(features.T))
            plt.savefig(os.path.join(req_dir, req_id + "-demo.png"),
                        format="png")
            plt.close()

        time_vocoder = time.time()

        time_count_frontend = time_frontend - time_start
        time_count_acoustic = time_acoustic - time_frontend
        time_count_vocoder = time_vocoder - time_acoustic

        logging.info(
            "{} Acoustic:{:0.3f}s Vocoder:{:0.3f}s A+V: {:0.3f}s".format(
                __file__, time_count_acoustic, time_count_vocoder,
                time_vocoder - time_start))

        return wav_path
Ejemplo n.º 4
0
def main():
    native = NinjaNativeFrontend()
    for msg in frontend.Frontend():
        native.handle(msg)
Ejemplo n.º 5
0
    def restart(self, obj):
        call(["halt", "--reboot"])

    def quit(self, widget, *event):

        gtk.main_quit()


def main():
    app = GUI()
    app.window.show()
    gtk.main()


if __name__ == "__main__":
    rontend = None
    if 'DISPLAY' not in os.environ:
        rontend = frontend.Frontend()
        rontend.set_lang()
        rontend.startx()
        rontend.init_gtk()
        rontend.start_wm()
        rontend.merge_xres()
    try:
        import gtk
    except ImportError:
        sys.exit("pygtk not found.")

    sys.exit(main())
Ejemplo n.º 6
0
    def Text2Speech(self, text, req_id):

        time_start = time.time()

        # 1. front end
        idseq = frontend.Frontend(self.phone2id, self.idim, text)
        mat = torch.LongTensor(idseq).view(-1).to(device)
        time_frontend = time.time()

        # 2. acoustic model inference
        c, _, _ = self.model.inference(mat, self.inference_args)
        features = c.cpu().detach().numpy()

        logging.info("{} Pass Acoustic Model".format(__file__))
        time_acoustic = time.time()

        req_dir = os.path.join(self.tmp_dir, req_id)

        os.makedirs(req_dir, exist_ok=True)

        # 3. vocoder
        f32_batch_path = self.SaveNPartAsF32(self.overlap, features,
                                             self.num_chunk_frame, req_dir,
                                             req_id)
        logging.info("{} Save F32 Done".format(__file__))

        executor = ProcessPoolExecutor(max_workers=len(f32_batch_path))
        futures = []

        for sub_req_id, sub_f32_path, s1, s2 in f32_batch_path:
            futures.append(
                executor.submit(
                    partial(ShellRunLPCNet, self.overlap, self.num_chunk_frame,
                            self.vocoder_path, self.tmp_dir, sub_f32_path,
                            req_id, sub_req_id, s1, s2)))

        wav_batch_path = [future.result() for future in futures]

        wav_path = os.path.join(req_dir, req_id + "-wave.wav")
        CombineAudio(wav_batch_path, wav_path)

        logging.info("{} Wave {}".format(__file__, wav_path))

        if self.debug_mode:
            # numpy type feature
            np.save(os.path.join(req_dir, req_id + "-feats.npy"), features)
            # plot
            plt.figure()
            plt.matshow(np.flip(features.T))
            plt.savefig(os.path.join(req_dir, req_id + "-demo.png"),
                        format="png")
            plt.close()

        time_vocoder = time.time()

        time_count_frontend = time_frontend - time_start
        time_count_acoustic = time_acoustic - time_frontend
        time_count_vocoder = time_vocoder - time_acoustic

        logging.info(
            "{} Acoustic:{:0.3f}s Vocoder:{:0.3f}s A+V: {:0.3f}s".format(
                __file__, time_count_acoustic, time_count_vocoder,
                time_vocoder - time_start))

        return wav_path