예제 #1
0
def run_eval(args, checkpoint_path, output_dir, hparams, sentences):
    eval_dir = os.path.join(output_dir, "eval")
    log_dir = os.path.join(output_dir, "logs-eval")
    
    #Create output path if it doesn"t exist
    os.makedirs(eval_dir, exist_ok=True)
    os.makedirs(log_dir, exist_ok=True)
    os.makedirs(os.path.join(log_dir, "wavs"), exist_ok=True)
    os.makedirs(os.path.join(log_dir, "plots"), exist_ok=True)
    
    log(hparams_debug_string())
    synth = Tacotron2(checkpoint_path, hparams)
    
    #Set inputs batch wise
    sentences = [sentences[i: i+hparams.tacotron_synthesis_batch_size] for i 
                 in range(0, len(sentences), hparams.tacotron_synthesis_batch_size)]
    
    log("Starting Synthesis")
    with open(os.path.join(eval_dir, "map.txt"), "w") as file:
        for i, texts in enumerate(tqdm(sentences)):
            start = time.time()
            basenames = ["batch_{}_sentence_{}".format(i, j) for j in range(len(texts))]
            mel_filenames, speaker_ids = synth.synthesize(texts, basenames, eval_dir, log_dir, None)
            
            for elems in zip(texts, mel_filenames, speaker_ids):
                file.write("|".join([str(x) for x in elems]) + "\n")
    log("synthesized mel spectrograms at {}".format(eval_dir))
    return eval_dir
예제 #2
0
 def load(self):
     """
     Effectively loads the model to GPU memory given the weights file that was passed in the
     constructor.
     """
     if self._low_mem:
         raise Exception("Cannot load the synthesizer permanently in low mem mode")
     tf.reset_default_graph()
     self._model = Tacotron2(self.checkpoint_fpath, hparams)
예제 #3
0
    def _one_shot_synthesize_spectrograms(checkpoint_fpath, embeddings, texts):
        # Load the model and forward the inputs
        tf.reset_default_graph()
        model = Tacotron2(checkpoint_fpath, hparams)
        specs, alignments = model.my_synthesize(embeddings, texts)
        
        # Detach the outputs (not doing so will cause the process to hang)
        specs, alignments = [spec.copy() for spec in specs], alignments.copy()
        
        model.session.close()

        return specs, alignments
예제 #4
0
def run_synthesis(in_dir, out_dir, model_dir, hparams):
    synth_dir = os.path.join(out_dir, "mels_gta")
    os.makedirs(synth_dir, exist_ok=True)
    metadata_filename = os.path.join(in_dir, "train.txt")
    print(hparams_debug_string())

    # Load the model in memory
    weights_dir = os.path.join(model_dir, "taco_pretrained")
    checkpoint_fpath = tf.train.get_checkpoint_state(
        weights_dir).model_checkpoint_path
    checkpoint_fpath = checkpoint_fpath.replace(
        '/ssd_scratch/cvit/rudra/SV2TTS/', '')
    checkpoint_fpath = checkpoint_fpath.replace('logs-', '')
    synth = Tacotron2(checkpoint_fpath, hparams, gta=True)

    # Load the metadata
    with open(metadata_filename, encoding="utf-8") as f:
        metadata = [line.strip().split("|") for line in f][:149736]
        frame_shift_ms = hparams.hop_size / hparams.sample_rate
        hours = sum([int(x[4]) for x in metadata]) * frame_shift_ms / 3600
        print("Loaded metadata for {} examples ({:.2f} hours)".format(
            len(metadata), hours))

    #Set inputs batch wise
    metadata = [
        metadata[i:i + hparams.tacotron_synthesis_batch_size]
        for i in range(0, len(metadata), hparams.tacotron_synthesis_batch_size)
    ]
    # TODO: come on big boy, fix this
    # Quick and dirty fix to make sure that all batches have the same size
    metadata = metadata[:-1]

    print("Starting Synthesis")
    mel_dir = os.path.join(in_dir, "mels")
    embed_dir = os.path.join(in_dir, "embeds")
    meta_out_fpath = os.path.join(out_dir, "synthesized.txt")
    with open(meta_out_fpath, "w") as file:
        for i, meta in enumerate(tqdm(metadata)):
            texts = [m[5] for m in meta]
            mel_filenames = [os.path.join(mel_dir, m[1]) for m in meta]
            embed_filenames = [os.path.join(embed_dir, m[2]) for m in meta]
            basenames = [
                os.path.basename(m).replace(".npy", "").replace("mel-", "")
                for m in mel_filenames
            ]
            synth.synthesize(texts, basenames, synth_dir, None, mel_filenames,
                             embed_filenames)

            for elems in meta:
                file.write("|".join([str(x) for x in elems]) + "\n")

    print("Synthesized mel spectrograms at {}".format(synth_dir))
    return meta_out_fpath
예제 #5
0
 def _one_shot_synthesize_spectrograms(checkpoint_fpath, embeddings, texts):
     # Load the model and forward the inputs
     tf.compat.v1.reset_default_graph()
     model = Tacotron2(checkpoint_fpath, hparams, seed=self._seed)
     specs, alignments = model.my_synthesize(embeddings, texts)
     
     # Detach the outputs (not doing so will cause the process to hang)
     specs, alignments = [spec.copy() for spec in specs], alignments.copy()
     
     # Close cuda for this process
     model.session.close()
     numba.cuda.select_device(0)
     numba.cuda.close()
     
     return specs, alignments
예제 #6
0
    def __init__(self, checkpoints_dir: Path, verbose=True, low_mem=False):
        """
        Creates a synthesizer ready for inference. The actual model isn't loaded in memory until
        needed or until load() is called.
        
        :param checkpoints_dir: path to the directory containing the checkpoint file as well as the
        weight files (.data, .index and .meta files)
        :param verbose: if False, only tensorflow's output will be printed TODO: suppress them too
        :param low_mem: if True, the model will be loaded in a separate process and its resources 
        will be released after each usage. Adds a large overhead, only recommended if your GPU 
        memory is low (<= 2gb)
        """
        self.verbose = verbose
        self._low_mem = low_mem

        # Prepare the model
        self._model = None  # type: Tacotron2
        checkpoint_state = tf.train.get_checkpoint_state(checkpoints_dir)
        if checkpoint_state is None:
            raise Exception("Could not find any synthesizer weights under %s" %
                            checkpoints_dir)
        self.checkpoint_fpath = checkpoint_state.model_checkpoint_path

        if not self._low_mem:
            #session = self.create_session()
            tf.reset_default_graph()
            self._model = Tacotron2(self.checkpoint_fpath,
                                    hparams,
                                    session=None)

        if verbose:
            model_name = checkpoints_dir.parent.name.replace("logs-", "")
            step = int(self.checkpoint_fpath[self.checkpoint_fpath.rfind('-') +
                                             1:])
            print("Found synthesizer \"%s\" trained to step %d" %
                  (model_name, step))