def fastspeech_load(path, s3_path, model, name, normalizer, quantized=False, **kwargs): check_file(path[model], s3_path[model], quantized=quantized, **kwargs) if quantized: model_path = 'quantized' else: model_path = 'model' g = load_graph(path[model][model_path], **kwargs) output_nodes = ['decoder_output', 'post_mel_outputs'] outputs = {n: g.get_tensor_by_name(f'import/{n}:0') for n in output_nodes} return Fastspeech( X=g.get_tensor_by_name('import/Placeholder:0'), speed_ratios=g.get_tensor_by_name('import/speed_ratios:0'), f0_ratios=g.get_tensor_by_name('import/f0_ratios:0'), energy_ratios=g.get_tensor_by_name('import/energy_ratios:0'), logits=outputs, normalizer=normalizer, sess=generate_session(graph=g, **kwargs), model=model, name=name, )
def tacotron_load(path, s3_path, model, name, normalizer, quantized=False, **kwargs): check_file(path[model], s3_path[model], quantized=quantized, **kwargs) if quantized: model_path = 'quantized' else: model_path = 'model' g = load_graph(path[model][model_path], **kwargs) input_nodes = ['Placeholder', 'Placeholder_1'] output_nodes = [ 'decoder_output', 'post_mel_outputs', 'alignment_histories' ] eager_g, input_nodes, output_nodes = nodes_session(g, inputs, outputs) return Tacotron( input_nodes=input_nodes, output_nodes=output_nodes, normalizer=normalizer, sess=generate_session(graph=g, **kwargs), eager_g=eager_g, model=model, name=name, )
def load(model, module, quantized=False, **kwargs): path = check_file( file=model, module=module, keys={'model': 'model.pb'}, quantized=quantized, **kwargs, ) g = load_graph(path['model'], **kwargs) inputs = ['mel', 'ori_vector', 'target_vector', 'mel_lengths'] outputs = ['mel_before', 'mel_after'] input_nodes, output_nodes = nodes_session(g, inputs, outputs) speaker_vector_model = '-'.join(model.split('-')[2:]) speaker_model = speaker_vector.deep_model(speaker_vector_model, **kwargs) magnitudes = { 'vggvox-v2': lambda x: x * 30 - 3.5, 'speakernet': lambda x: x * 3, } return FastVC( input_nodes=input_nodes, output_nodes=output_nodes, waveform_to_mel=universal_mel, speaker_vector=speaker_model, magnitude=magnitude[speaker_vector_model], sess=generate_session(graph=g, **kwargs), model=model, name=module, )
def fastspeech_load( path, s3_path, model, name, normalizer, quantized = False, **kwargs ): check_file(path[model], s3_path[model], quantized = quantized, **kwargs) if quantized: model_path = 'quantized' else: model_path = 'model' g = load_graph(path[model][model_path], **kwargs) inputs = ['Placeholder', 'speed_ratios', 'f0_ratios', 'energy_ratios'] outputs = ['decoder_output', 'post_mel_outputs'] input_nodes, output_nodes = nodes_session(g, inputs, outputs) stats = np.load(path[model]['stats']) return Fastspeech( input_nodes = input_nodes, output_nodes = output_nodes, normalizer = normalizer, stats = stats, sess = generate_session(graph = g, **kwargs), model = model, name = name, )
def load_stft(path, s3_path, model, name, instruments, quantized=False, **kwargs): check_file(path[model], s3_path[model], quantized=quantized, **kwargs) if quantized: model_path = 'quantized' else: model_path = 'model' g = load_graph(path[model][model_path], **kwargs) inputs = ['Placeholder'] outputs = [f'logits_{i}' for i in range(len(instruments))] eager_g, input_nodes, output_nodes = nodes_session(g, inputs, outputs) return UNETSTFT( input_nodes=input_nodes, output_nodes=output_nodes, instruments=instruments, sess=generate_session(graph=g, **kwargs), eager_g=eager_g, model=model, name=name, )
def ctc_load(path, s3_path, model, name, quantized = False, **kwargs): check_file(path[model], s3_path[model], quantized = quantized, **kwargs) if quantized: model_path = 'quantized' else: model_path = 'model' g = load_graph(path[model][model_path], **kwargs) with open(path[model]['vocab']) as fopen: vocab = json.load(fopen) + ['{', '}', '['] featurizer = STTFeaturizer(normalize_per_feature = True) return STT( X = g.get_tensor_by_name('import/Placeholder:0'), X_len = g.get_tensor_by_name('import/Placeholder_1:0'), logits = g.get_tensor_by_name('import/logits:0'), seq_lens = g.get_tensor_by_name('import/seq_lens:0'), featurizer = featurizer, vocab = vocab, sess = generate_session(graph = g, **kwargs), model = model, name = name, )
def load(path, s3_path, model, name, extra, label, quantized=False, **kwargs): check_file(path[model], s3_path[model], quantized=quantized, **kwargs) if quantized: model_path = 'quantized' else: model_path = 'model' g = load_graph(path[model][model_path], **kwargs) vectorizer_mapping = { 'vggvox-v1': featurization.vggvox_v1, 'vggvox-v2': featurization.vggvox_v2, 'deep-speaker': featurization.deep_speaker, 'speakernet': featurization.SpeakerNetFeaturizer(**{ **speakernet_config, **extra }), } if name == 'speaker-vector': if model == 'speakernet': model_class = Speakernet else: model_class = Speaker2Vec else: if model == 'speakernet': model_class = SpeakernetClassification else: model_class = Classification if model == 'speakernet': inputs = ['Placeholder', 'Placeholder_1'] outputs = ['logits'] else: inputs = ['Placeholder'] outputs = ['logits'] eager_g, input_nodes, output_nodes = nodes_session(g, inputs, outputs) return model_class( input_nodes=input_nodes, output_nodes=output_nodes, vectorizer=vectorizer_mapping[model], sess=generate_session(graph=g, **kwargs), eager_g=eager_g, model=model, extra=extra, label=label, name=name, )
def transducer_load(path, s3_path, model, name, quantized = False, **kwargs): check_file(path[model], s3_path[model], quantized = quantized, **kwargs) if quantized: model_path = 'quantized' else: model_path = 'model' g = load_graph(path[model][model_path], **kwargs) vocab = subword_load(path[model]['vocab'].replace('.subwords', '')) featurizer = STTFeaturizer(normalize_per_feature = True) time_reduction_factor = {'small-conformer': 4, 'conformer': 4} input_nodes = [ 'X_placeholder', 'X_len_placeholder', 'encoded_placeholder', 'predicted_placeholder', 'states_placeholder', ] output_nodes = [ 'encoded', 'ytu', 'new_states', 'padded_features', 'padded_lens', 'initial_states', ] inputs = {n: g.get_tensor_by_name(f'import/{n}:0') for n in input_nodes} outputs = {n: g.get_tensor_by_name(f'import/{n}:0') for n in output_nodes} return Transducer( X_placeholder = inputs['X_placeholder'], X_len_placeholder = inputs['X_len_placeholder'], encoded_placeholder = inputs['encoded_placeholder'], predicted_placeholder = inputs['predicted_placeholder'], states_placeholder = inputs['states_placeholder'], padded_features = outputs['padded_features'], padded_lens = outputs['padded_lens'], encoded = outputs['encoded'], ytu = outputs['ytu'], new_states = outputs['new_states'], initial_states = outputs['initial_states'], featurizer = featurizer, vocab = vocab, time_reduction_factor = time_reduction_factor[model], sess = generate_session(graph = g, **kwargs), model = model, name = name, )
def load(model, module, extra, label, quantized = False, **kwargs): path = check_file( file = model, module = module, keys = {'model': 'model.pb'}, quantized = quantized, **kwargs, ) g = load_graph(path['model'], **kwargs) vectorizer_mapping = { 'vggvox-v1': featurization.vggvox_v1, 'vggvox-v2': featurization.vggvox_v2, 'deep-speaker': featurization.deep_speaker, 'speakernet': featurization.SpeakerNetFeaturizer( **{**speakernet_config, **extra} ), } if module == 'speaker-vector': if model == 'speakernet': model_class = Speakernet else: model_class = Speaker2Vec else: if model == 'speakernet': model_class = SpeakernetClassification else: model_class = Classification if model == 'speakernet': inputs = ['Placeholder', 'Placeholder_1'] outputs = ['logits'] else: inputs = ['Placeholder'] outputs = ['logits'] input_nodes, output_nodes = nodes_session(g, inputs, outputs) return model_class( input_nodes = input_nodes, output_nodes = output_nodes, vectorizer = vectorizer_mapping[model], sess = generate_session(graph = g, **kwargs), model = model, extra = extra, label = label, name = module, )
def transducer_load(model, module, quantized = False, **kwargs): path = check_file( file = model, module = module, keys = {'model': 'model.pb', 'vocab': TRANSDUCER_VOCAB}, quantized = quantized, **kwargs, ) g = load_graph(path['model'], **kwargs) vocab = subword_load(path['vocab'].replace('.subwords', '')) featurizer = STTFeaturizer(normalize_per_feature = True) time_reduction_factor = { 'small-conformer': 4, 'conformer': 4, 'large-conformer': 4, 'alconformer': 4, } inputs = [ 'X_placeholder', 'X_len_placeholder', 'encoded_placeholder', 'predicted_placeholder', 'states_placeholder', ] outputs = [ 'encoded', 'ytu', 'new_states', 'padded_features', 'padded_lens', 'initial_states', 'greedy_decoder', 'non_blank_transcript', 'non_blank_stime', ] input_nodes, output_nodes = nodes_session(g, inputs, outputs) return Transducer( input_nodes = input_nodes, output_nodes = output_nodes, featurizer = featurizer, vocab = vocab, time_reduction_factor = time_reduction_factor.get(model, 4), sess = generate_session(graph = g, **kwargs), model = model, name = module, )
def load_1d(model, module, quantized=False, **kwargs): path = check_file( file=model, module=module, keys={'model': 'model.pb'}, quantized=quantized, **kwargs, ) g = load_graph(path['model'], **kwargs) inputs = ['Placeholder'] outputs = ['logits'] input_nodes, output_nodes = nodes_session(g, inputs, outputs) return UNET1D( input_nodes=input_nodes, output_nodes=output_nodes, sess=generate_session(graph=g, **kwargs), model=model, name=module, )
def load(path, s3_path, model, name, quantized=False, **kwargs): check_file(path[model], s3_path[model], quantized=quantized, **kwargs) if quantized: model_path = 'quantized' else: model_path = 'model' g = load_graph(path[model][model_path], **kwargs) inputs = ['Placeholder'] outputs = ['logits'] eager_g, input_nodes, output_nodes = nodes_session(g, inputs, outputs) return UNET( input_nodes=input_nodes, output_nodes=output_nodes, sess=generate_session(graph=g, **kwargs), eager_g=eager_g, model=model, name=name, )
def load_stft(model, module, instruments, quantized=False, **kwargs): path = check_file( file=model, module=module, keys={'model': 'model.pb'}, quantized=quantized, **kwargs, ) g = load_graph(path['model'], **kwargs) inputs = ['Placeholder'] outputs = [f'logits_{i}' for i in range(len(instruments))] input_nodes, output_nodes = nodes_session(g, inputs, outputs) return UNETSTFT( input_nodes=input_nodes, output_nodes=output_nodes, instruments=instruments, sess=generate_session(graph=g, **kwargs), model=model, name=module, )
def load(path, s3_path, model, name, quantized=False, **kwargs): check_file(path[model], s3_path[model], quantized=quantized, **kwargs) if quantized: model_path = 'quantized' else: model_path = 'model' g = load_graph(path[model][model_path], **kwargs) inputs = ['mel', 'ori_vector', 'target_vector', 'mel_lengths'] outputs = ['mel_before', 'mel_after'] input_nodes, output_nodes = nodes_session(g, inputs, outputs) speaker_model = speaker_vector.deep_model('vggvox-v2', **kwargs) return FastVC( input_nodes=input_nodes, output_nodes=output_nodes, waveform_to_mel=universal_mel, speaker_vector=speaker_model, sess=generate_session(graph=g, **kwargs), model=model, name=name, )