예제 #1
0
파일: eval.py 프로젝트: keithito/tacotron
def run_eval(args):
  print(hparams_debug_string())
  synth = Synthesizer()
  synth.load(args.checkpoint)
  base_path = get_output_base_path(args.checkpoint)
  for i, text in enumerate(sentences):
    path = '%s-%d.wav' % (base_path, i)
    print('Synthesizing: %s' % path)
    with open(path, 'wb') as f:
      f.write(synth.synthesize(text))
예제 #2
0
#      raise falcon.HTTPBadRequest('String too long',
#                                  'String length shorter than 150 is accepted.')
    res.data = synthesizer.synthesize(req.params.get('text'))
    res.content_type = 'audio/wav'

synthesizer = Synthesizer()
api = falcon.API()
api.add_route('/synthesize', SynthesisResource())
api.add_route('/', UIResource())

if __name__ == '__main__':
  from wsgiref import simple_server
  parser = argparse.ArgumentParser()
  parser.add_argument('--t_checkpoint', help='Full path to tacotron2 checkpoint')
  parser.add_argument('--v_checkpoint', help='Full path to melgan checkpoint')
  parser.add_argument('--port', type=int, default=8000)
  parser.add_argument('--hparams', default='',
    help='Hyperparameter overrides as a comma-separated list of name=value pairs')
  args = parser.parse_args()
  os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
  if args.t_checkpoint and args.v_checkpoint:
      synthesizer.load(args.t_checkpoint, args.v_checkpoint)
  else:
      t_model_path = os.path.join(PROJECT_PATH, 'models/upc_pau_tacotron2.pt')
      v_model_path = os.path.join(PROJECT_PATH, 'models/melgan_onapau_catotron.pt')
      synthesizer.load(t_model_path, v_model_path)
  print('Serving on port %d' % args.port)
  simple_server.make_server('0.0.0.0', args.port, api).serve_forever()
else:
  synthesizer.load('./models/upc_ona2_tacotron2.pt', './models/melgan_onapau_tacotronSTFT.pt')
예제 #3
0
    global global_config

    model_name = os.path.basename(global_config.load_path)
    relative_path = os.path.join(AUDIO_PATH, model_name)
    real_path = os.path.join(BASE_PATH, relative_path)

    return send_from_directory(real_path, path)


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--load_path', required=True)
    parser.add_argument('--checkpoint_step', default=None, type=int)
    parser.add_argument('--num_speakers', default=1, type=int)
    parser.add_argument('--speaker_id', default=1, type=int)
    parser.add_argument('--port', default=51000, type=int)
    parser.add_argument('--debug', default=False, type=str2bool)
    parser.add_argument('--is_korean', default=True, type=str2bool)
    config = parser.parse_args()

    if os.path.exists(config.load_path):
        prepare_dirs(config, hparams)

        global_config = config
        synthesizer.load(config.load_path, config.num_speakers,
                         config.checkpoint_step)
    else:
        print(" [!] load_path not found: {}".format(config.load_path))

    app.run(host='0.0.0.0', port=config.port, debug=config.debug)
예제 #4
0
            res.body = html_file.read()


class SynthesisResource:
    def on_get(self, req, res):
        if not req.params.get('text'):
            raise falcon.HTTPBadRequest()
        if len(req.params.get('text')) > 150:
            raise falcon.HTTPBadRequest(
                'String too long',
                'String length shorter than 150 is accepted.')
        res.data = synthesizer.synthesize(req.params.get('text'))
        res.content_type = 'audio/wav'


synthesizer = Synthesizer()
# TODO load via config
t_model_path = os.path.join(PROJECT_PATH, 'models/upc_ona_tacotron2.pt')
v_model_path = os.path.join(PROJECT_PATH, 'models/melgan_onapau_catotron.pt')
synthesizer.load(t_model_path, v_model_path)

app = falcon.API()
app.add_route('/synthesize', SynthesisResource())
app.add_route('/', UIResource())

if __name__ == '__main__':
    from wsgiref import simple_server
    port = 9000
    print('Serving on port %d' % port)
    simple_server.make_server('0.0.0.0', port, app).serve_forever()
예제 #5
0
if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--checkpoint',
                        required=True,
                        help='Full path to model checkpoint')
    parser.add_argument('--port', type=int, default=3000)
    parser.add_argument('--ip', type=str, default='0.0.0.0')
    parser.add_argument(
        '--hparams',
        default='',
        help=
        'Hyperparameter overrides as a comma-separated list of name=value pairs'
    )
    parser.add_argument('--gpu_assignment',
                        default='0',
                        help='Set the gpu the model should run on')
    parser.add_argument('--synthezier_helper',
                        default=False,
                        action="store_false",
                        help='uses the synthesize helper during sythesis')
    args = parser.parse_args()

    use_synthesize_helper = args.synthezier_helper

    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_assignment
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
    hparams.parse(args.hparams)
    print(hparams_debug_string())
    synthesizer.load(args.checkpoint)
    app.run(host=args.ip, port=args.port)
예제 #6
0
        return json.dumps(res)


@app.route('/datas', methods=['GET', 'POST'])
def updata():
    datas = getdatas(input_url, re_url, input_url)
    print(datas)
    return json.dumps(datas)


synthesizer = Synthesizer()
if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--checkpoint', required=True, help='')
    parser.add_argument('--port', type=int, default=5000)
    parser.add_argument(
        '--hparams',
        default='',
        help=
        'Hyperparameter overrides as a comma-separated list of name=value pairs'
    )
    args = parser.parse_args()
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
    hparams.parse(args.hparams)
    print(hparams_debug_string())
    synthesizer.load(args.checkpoint)
    print('Serving on port %d' % args.port)
    app.run()
else:
    synthesizer.load(os.environ['CHECKPOINT'])
from synthesizer import Synthesizer
from playsound import playsound
from hparams import hparams, hparams_debug_string

hparams.parse('')
checkpoint = 'new/model.ckpt-517000'
synth = Synthesizer()
hparams_debug_string()
synth.load(checkpoint)


def generate_voice(text):
    path = 'test.wav'
    with open(path, 'wb') as f:
        f.write(synth.synthesize(text))
    playsound(path)


for i in range(4):
    generate_voice(input('please enter the sentance'))
예제 #8
0
parser.add_argument(
    '--checkpoint',
    default='/home/spurs/tacotron/logs-tacotron/model.ckpt-274000',
    help='Path to model checkpoint')
#parser.add_argument('--reference_audio', default='/home/spurs/p264_071.wav', help='Reference audio path')
parser.add_argument(
    '--mel_targets',
    default=None,
    help='Mel-targets path, used when use teacher_force generation')
#parser.add_argument('--text', required=True, default=None, help='Single test text sentence')

args = parser.parse_args()
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

synth = Synthesizer(teacher_forcing_generating=False)
synth.load(args.checkpoint, reference_mel=True)
'''
ref_wav = audio.load_wav(args.reference_audio)
reference_mel = audio.melspectrogram(ref_wav).astype(np.float32).T
'''

iteration = args.checkpoint.split('-')[-1]
#speaker = args.reference_audio.split('/')[-1].split('_')[0]
base = '/home/spurs/website/tornado/static/res/'

mels = {
    "p225": "/home/spurs/website/tornado/static/wav/p225.npy",
    "p285": "/home/spurs/website/tornado/static/wav/p285.npy",
    "p300": "/home/spurs/website/tornado/static/wav/p300.npy",
    "p360": "/home/spurs/website/tornado/static/wav/p360.npy"
}
예제 #9
0
if __name__ == '__main__':
  from wsgiref import simple_server
  parser = argparse.ArgumentParser()
  parser.add_argument('--t_checkpoint', help='Full path to tts checkpoint')
  parser.add_argument('--v_checkpoint', help='Full path to melgan checkpoint')
  parser.add_argument('--t_config', help='Full path to tts config')
  parser.add_argument('--v_config', help='Full path to vocoder config')
  parser.add_argument('--port', type=int, default=8000)
  parser.add_argument('--hparams', default='',
    help='Hyperparameter overrides as a comma-separated list of name=value pairs')
  args = parser.parse_args()
  os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
  if args.t_checkpoint and args.v_checkpoint and not args.t_config:
      print('No tts config given, assuming nvidia tacotron2')
      synthesizer.load(args.t_checkpoint, args.v_checkpoint)
  elif args.t_checkpoint and args.v_checkpoint and args.t_config and args.v_config:
      print('Loading custom coqui tts')
      synthesizer.load(args.t_checkpoint, args.v_checkpoint,
                       args.t_config, args.v_config)
  else:
      print('Loading default coqui tts models')
      t_model_path = os.path.join(PROJECT_PATH,
                                  'models/upc_pau_coqui_speedy_speech.pth.tar')
      t_config_path = os.path.join(PROJECT_PATH,
                                  'models/config_pau_speedy_speech.json')
      v_model_path = os.path.join(PROJECT_PATH,
                                  'models/melgan_onapau_catotron.pt')
      v_config_path = os.path.join(PROJECT_PATH,
                                   'models/config_coqui_vocoder.json')
      synthesizer.load(t_model_path, v_model_path,
예제 #10
0
    return send_from_directory(
            os.path.join(static_path, 'basic_source'), path)

@app.route('/images/<path:path>')
def send_images(path):
    return send_from_directory(
            os.path.join(static_path, 'images'), path)

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--load_path',required=False)
    parser.add_argument('--checkpoint_step', default=None, type=int)
    parser.add_argument('--num_speakers', default=5, type=int)
    parser.add_argument('--port', default=51000, type=int)
    parser.add_argument('--debug', default=False, type=str2bool)
    parser.add_argument('--is_korean', default=True, type=str2bool)
    config = parser.parse_args()

    #if os.path.exists(config.load_path):
    #    prepare_dirs(config, hparams)

    #    global_config = config
    #    synthesizer.load(config.load_path, config.num_speakers, config.checkpoint_step)
    #else:
    #   print(" [!] load_path not found: {}".format(config.load_path))
    
    MY_IP = socket.gethostbyname(socket.gethostname())
    change_IP_in_HTML() #main.js는 따로 바꿔주기
    synthesizer.load('logs/backup_log/new_inna+kss+leejh', 3)
    app.run(host=MY_IP, port=8888)  # debug=True debug=config.debug 하면 수정사항이 실시간으로 반영됨
예제 #11
0
def main():
    parser = argparse.ArgumentParser()

    #parser.add_argument('--base_dir', default=os.path.expanduser('./'))
    parser.add_argument('--wav_path',
                        default='./wav_files',
                        help='the wav files to be minic')
    parser.add_argument('--output_dir',
                        default='./synthesis',
                        help='the output dir')
    parser.add_argument('--output_prefix',
                        default=' ',
                        help='the prefix of the name of the output')
    parser.add_argument('--model_path',
                        default=' ',
                        help='path of the trained model')
    parser.add_argument('--prenet_layer1',
                        default=256,
                        type=int,
                        help='batch_size')  #
    parser.add_argument('--prenet_layer2',
                        default=128,
                        type=int,
                        help='batch_size')  #
    parser.add_argument('--gru_size', default=256, type=int,
                        help='batch_size')  #
    parser.add_argument('--attention_size',
                        default=256,
                        type=int,
                        help='batch_size')  #
    parser.add_argument('--rnn_size', default=256, type=int,
                        help='batch_size')  #
    parser.add_argument('--enable_fv1',
                        default=True,
                        type=bool,
                        help='enable_fv1')  #
    parser.add_argument('--enable_fv2',
                        default=True,
                        type=bool,
                        help='enable_fv2')  #

    args = parser.parse_args()

    hparams.prenet_layer1 = args.prenet_layer1
    hparams.prenet_layer2 = args.prenet_layer2
    hparams.gru_size = args.gru_size
    hparams.attention_size = args.attention_size
    hparams.rnn_size = args.rnn_size
    hparams.enable_fv1 = args.enable_fv1
    hparams.enable_fv2 = args.enable_fv2

    synthesizer = Synthesizer(hparams)
    synthesizer.load(args.model_path)

    for person_id in os.listdir(args.wav_path):

        #log_dir = os.path.join(args.base_dir, 'logs-%s-%s' % (run_name, args.description))
        os.makedirs(os.path.join(args.output_dir,
                                 args.output_prefix + person_id),
                    exist_ok=True)
        current_dir = os.path.join(args.output_dir,
                                   args.output_prefix + person_id)

        mel_spectrograms = []
        for wav_file in os.listdir(os.path.join(args.wav_path, person_id)):

            # Load the audio to a numpy array:
            wav = audio.load_wav(
                os.path.join(args.wav_path, person_id, wav_file))

            # Compute the linear-scale spectrogram from the wav:
            # spectrogram = audio.spectrogram(wav).astype(np.float32)

            # Compute a mel-scale spectrogram from the wav:
            mel_spectrogram = audio.melspectrogram(wav).astype(np.float32).T
            mel_spectrograms.append(mel_spectrogram)
            print(wav_file)
            print(np.shape(mel_spectrogram))

        print(np.shape(mel_spectrograms))

        mel_spectrograms = _prepare_targets(mel_spectrograms, 1)

        for text in sentences:

            wav, alignment = synthesizer.synthesize(text=text,
                                                    mel_spec=mel_spectrograms)

            print(alignment.shape)

            plot.plot_alignment(alignment,
                                os.path.join(current_dir, text + '.png'))

            out = os.path.join(current_dir, text + '.wav')
            audio.save_wav(wav, out)
from synthesizer import Synthesizer

synthesizer = Synthesizer()
synthesizer.load(
    checkpoint_path='logdir-tacotron2/moon+son+kss_2020-05-12_02-13-33',
    num_speakers=3,
    checkpoint_step=None,
    inference_prenet_dropout=False)

f = open('gen/Inputs/test.txt', mode='rt', encoding='utf-8')
text = f.readline()

audio = synthesizer.synthesize(texts=text,
                               base_path="gen/Outputs",
                               speaker_ids=[2],
                               attention_trim=True,
                               base_alignment_path=None,
                               isKorean=True)[0]
예제 #13
0

@app.route('/audio/<path:path>')
def send_audio(path):
    return send_from_directory(os.path.join(static_path, 'audio'), path)


@app.route('/uploads/<path:path>')
def send_uploads(path):
    return send_from_directory(os.path.join(static_path, 'uploads'), path)


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--checkpoint_path', required=True)
    parser.add_argument('--waveglow_path', required=True)
    parser.add_argument('--port', default=51000, type=int)
    parser.add_argument('--debug', default=False, type=str2bool)
    parser.add_argument('--is_korean', default=True, type=str2bool)
    config = parser.parse_args()

    if os.path.exists(config.checkpoint_path):
        synthesizer.load(config.checkpoint_path, config.waveglow_path)
    else:
        print(" [!] load_path not found: {}".format(config.checkpoint_path))

    app.run(host='10.100.1.119',
            threaded=True,
            port=config.port,
            debug=config.debug)
예제 #14
0
    res.body = html_body


class SynthesisResource:
  def on_get(self, req, res):
    if not req.params.get('text'):
      raise falcon.HTTPBadRequest()
    res.data = synthesizer.synthesize(req.params.get('text'))
    res.content_type = 'audio/wav'


synthesizer = Synthesizer()
api = falcon.API()
api.add_route('/synthesize', SynthesisResource())
api.add_route('/', UIResource())


if __name__ == '__main__':
  from wsgiref import simple_server
  parser = argparse.ArgumentParser()
  parser.add_argument('--port', type=int, default=9000)
  parser.add_argument('--hparams', default='',
    help='Hyperparameter overrides as a comma-separated list of name=value pairs')
  args = parser.parse_args()
  os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
  synthesizer.load()
  print('Serving on port %d' % args.port)
  simple_server.make_server('0.0.0.0', args.port, api).serve_forever()
else:
  synthesizer.load()
예제 #15
0
import argparse
import falcon
from hparams import hparams, hparams_debug_string
import os
from synthesizer import Synthesizer

from wsgiref import simple_server

parser = argparse.ArgumentParser()
parser.add_argument('--checkpoint',
                    required=False,
                    default='/tmp/tacotron-20180906/model.ckpt')
parser.add_argument('--port', type=int, default=9000)
parser.add_argument(
    '--hparams',
    default='',
    help=
    'Hyperparameter overrides as a comma-separated list of name=value pairs')
args = parser.parse_args()
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
hparams.parse(args.hparams)
# print(hparams_debug_string())
synthesizer = Synthesizer()

synthesizer.load('tacotron-20180906/model.ckpt')
res = synthesizer.synthesize('apple i eat where is Jim')  #所以res是 一堆数字的编码

print("结果保存在ouput.wav里面")
예제 #16
0
class SynthesisResource:
  def on_get(self, req, res):
    if not req.params.get('text'):
      raise falcon.HTTPBadRequest()
    res.data = synthesizer.synthesize(req.params.get('text'))
    res.content_type = 'audio/wav'


synthesizer = Synthesizer()
api = falcon.API()
api.add_route('/synthesize', SynthesisResource())
api.add_route('/', UIResource())


if __name__ == '__main__':
  from wsgiref import simple_server
  parser = argparse.ArgumentParser()
  parser.add_argument('--checkpoint', required=True, help='Full path to model checkpoint')
  parser.add_argument('--port', type=int, default=9000)
  parser.add_argument('--hparams', default='',
    help='Hyperparameter overrides as a comma-separated list of name=value pairs')
  args = parser.parse_args()
  os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
  hparams.parse(args.hparams)
  print(hparams_debug_string())
  synthesizer.load(args.checkpoint)
  print('Serving on port %d' % args.port)
  simple_server.make_server('0.0.0.0', args.port, api).serve_forever()
else:
  synthesizer.load(os.environ['CHECKPOINT'])
예제 #17
0
        fvoice.close()
        #res.data = stream_dir.encode()


synthesizer = Synthesizer()
api = falcon.API()
api.add_route('/synthesize', SynthesisResource())
api.add_route('/', UIResource())

if __name__ == '__main__':
    from wsgiref import simple_server
    parser = argparse.ArgumentParser()
    #parser.add_argument('--checkpoint', required=True, help='Full path to model checkpoint')
    parser.add_argument('--port', type=int, default=9000)
    parser.add_argument(
        '--hparams',
        default='',
        help=
        'Hyperparameter overrides as a comma-separated list of name=value pairs'
    )
    args = parser.parse_args()
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
    hparams.parse(args.hparams)
    print(hparams_debug_string())
    synthesizer.load("logdir-tacotron2/monika",
                     2,
                     None,
                     inference_prenet_dropout=False)
    print('Serving on port %d' % args.port)
    simple_server.make_server('0.0.0.0', args.port, api).serve_forever()