예제 #1
0
def fastspeech_load(path,
                    s3_path,
                    model,
                    name,
                    normalizer,
                    quantized=False,
                    **kwargs):
    check_file(path[model], s3_path[model], quantized=quantized, **kwargs)
    if quantized:
        model_path = 'quantized'
    else:
        model_path = 'model'

    g = load_graph(path[model][model_path], **kwargs)
    output_nodes = ['decoder_output', 'post_mel_outputs']
    outputs = {n: g.get_tensor_by_name(f'import/{n}:0') for n in output_nodes}
    return Fastspeech(
        X=g.get_tensor_by_name('import/Placeholder:0'),
        speed_ratios=g.get_tensor_by_name('import/speed_ratios:0'),
        f0_ratios=g.get_tensor_by_name('import/f0_ratios:0'),
        energy_ratios=g.get_tensor_by_name('import/energy_ratios:0'),
        logits=outputs,
        normalizer=normalizer,
        sess=generate_session(graph=g, **kwargs),
        model=model,
        name=name,
    )
예제 #2
0
def tacotron_load(path,
                  s3_path,
                  model,
                  name,
                  normalizer,
                  quantized=False,
                  **kwargs):
    check_file(path[model], s3_path[model], quantized=quantized, **kwargs)
    if quantized:
        model_path = 'quantized'
    else:
        model_path = 'model'

    g = load_graph(path[model][model_path], **kwargs)
    input_nodes = ['Placeholder', 'Placeholder_1']
    output_nodes = [
        'decoder_output', 'post_mel_outputs', 'alignment_histories'
    ]
    eager_g, input_nodes, output_nodes = nodes_session(g, inputs, outputs)

    return Tacotron(
        input_nodes=input_nodes,
        output_nodes=output_nodes,
        normalizer=normalizer,
        sess=generate_session(graph=g, **kwargs),
        eager_g=eager_g,
        model=model,
        name=name,
    )
예제 #3
0
def load(model, module, quantized=False, **kwargs):
    path = check_file(
        file=model,
        module=module,
        keys={'model': 'model.pb'},
        quantized=quantized,
        **kwargs,
    )
    g = load_graph(path['model'], **kwargs)
    inputs = ['mel', 'ori_vector', 'target_vector', 'mel_lengths']
    outputs = ['mel_before', 'mel_after']
    input_nodes, output_nodes = nodes_session(g, inputs, outputs)

    speaker_vector_model = '-'.join(model.split('-')[2:])

    speaker_model = speaker_vector.deep_model(speaker_vector_model, **kwargs)

    magnitudes = {
        'vggvox-v2': lambda x: x * 30 - 3.5,
        'speakernet': lambda x: x * 3,
    }

    return FastVC(
        input_nodes=input_nodes,
        output_nodes=output_nodes,
        waveform_to_mel=universal_mel,
        speaker_vector=speaker_model,
        magnitude=magnitude[speaker_vector_model],
        sess=generate_session(graph=g, **kwargs),
        model=model,
        name=module,
    )
예제 #4
0
def fastspeech_load(
    path, s3_path, model, name, normalizer, quantized = False, **kwargs
):
    check_file(path[model], s3_path[model], quantized = quantized, **kwargs)
    if quantized:
        model_path = 'quantized'
    else:
        model_path = 'model'

    g = load_graph(path[model][model_path], **kwargs)
    inputs = ['Placeholder', 'speed_ratios', 'f0_ratios', 'energy_ratios']
    outputs = ['decoder_output', 'post_mel_outputs']
    input_nodes, output_nodes = nodes_session(g, inputs, outputs)

    stats = np.load(path[model]['stats'])

    return Fastspeech(
        input_nodes = input_nodes,
        output_nodes = output_nodes,
        normalizer = normalizer,
        stats = stats,
        sess = generate_session(graph = g, **kwargs),
        model = model,
        name = name,
    )
예제 #5
0
def load_stft(path,
              s3_path,
              model,
              name,
              instruments,
              quantized=False,
              **kwargs):
    check_file(path[model], s3_path[model], quantized=quantized, **kwargs)
    if quantized:
        model_path = 'quantized'
    else:
        model_path = 'model'

    g = load_graph(path[model][model_path], **kwargs)
    inputs = ['Placeholder']
    outputs = [f'logits_{i}' for i in range(len(instruments))]
    eager_g, input_nodes, output_nodes = nodes_session(g, inputs, outputs)

    return UNETSTFT(
        input_nodes=input_nodes,
        output_nodes=output_nodes,
        instruments=instruments,
        sess=generate_session(graph=g, **kwargs),
        eager_g=eager_g,
        model=model,
        name=name,
    )
예제 #6
0
def ctc_load(path, s3_path, model, name, quantized = False, **kwargs):
    check_file(path[model], s3_path[model], quantized = quantized, **kwargs)

    if quantized:
        model_path = 'quantized'
    else:
        model_path = 'model'

    g = load_graph(path[model][model_path], **kwargs)

    with open(path[model]['vocab']) as fopen:
        vocab = json.load(fopen) + ['{', '}', '[']

    featurizer = STTFeaturizer(normalize_per_feature = True)

    return STT(
        X = g.get_tensor_by_name('import/Placeholder:0'),
        X_len = g.get_tensor_by_name('import/Placeholder_1:0'),
        logits = g.get_tensor_by_name('import/logits:0'),
        seq_lens = g.get_tensor_by_name('import/seq_lens:0'),
        featurizer = featurizer,
        vocab = vocab,
        sess = generate_session(graph = g, **kwargs),
        model = model,
        name = name,
    )
예제 #7
0
def load(path, s3_path, model, name, extra, label, quantized=False, **kwargs):

    check_file(path[model], s3_path[model], quantized=quantized, **kwargs)

    if quantized:
        model_path = 'quantized'
    else:
        model_path = 'model'

    g = load_graph(path[model][model_path], **kwargs)

    vectorizer_mapping = {
        'vggvox-v1':
        featurization.vggvox_v1,
        'vggvox-v2':
        featurization.vggvox_v2,
        'deep-speaker':
        featurization.deep_speaker,
        'speakernet':
        featurization.SpeakerNetFeaturizer(**{
            **speakernet_config,
            **extra
        }),
    }

    if name == 'speaker-vector':
        if model == 'speakernet':
            model_class = Speakernet
        else:
            model_class = Speaker2Vec
    else:
        if model == 'speakernet':
            model_class = SpeakernetClassification
        else:
            model_class = Classification

    if model == 'speakernet':
        inputs = ['Placeholder', 'Placeholder_1']
        outputs = ['logits']
    else:
        inputs = ['Placeholder']
        outputs = ['logits']

    eager_g, input_nodes, output_nodes = nodes_session(g, inputs, outputs)

    return model_class(
        input_nodes=input_nodes,
        output_nodes=output_nodes,
        vectorizer=vectorizer_mapping[model],
        sess=generate_session(graph=g, **kwargs),
        eager_g=eager_g,
        model=model,
        extra=extra,
        label=label,
        name=name,
    )
예제 #8
0
def transducer_load(path, s3_path, model, name, quantized = False, **kwargs):
    check_file(path[model], s3_path[model], quantized = quantized, **kwargs)

    if quantized:
        model_path = 'quantized'
    else:
        model_path = 'model'

    g = load_graph(path[model][model_path], **kwargs)
    vocab = subword_load(path[model]['vocab'].replace('.subwords', ''))
    featurizer = STTFeaturizer(normalize_per_feature = True)

    time_reduction_factor = {'small-conformer': 4, 'conformer': 4}

    input_nodes = [
        'X_placeholder',
        'X_len_placeholder',
        'encoded_placeholder',
        'predicted_placeholder',
        'states_placeholder',
    ]
    output_nodes = [
        'encoded',
        'ytu',
        'new_states',
        'padded_features',
        'padded_lens',
        'initial_states',
    ]

    inputs = {n: g.get_tensor_by_name(f'import/{n}:0') for n in input_nodes}
    outputs = {n: g.get_tensor_by_name(f'import/{n}:0') for n in output_nodes}

    return Transducer(
        X_placeholder = inputs['X_placeholder'],
        X_len_placeholder = inputs['X_len_placeholder'],
        encoded_placeholder = inputs['encoded_placeholder'],
        predicted_placeholder = inputs['predicted_placeholder'],
        states_placeholder = inputs['states_placeholder'],
        padded_features = outputs['padded_features'],
        padded_lens = outputs['padded_lens'],
        encoded = outputs['encoded'],
        ytu = outputs['ytu'],
        new_states = outputs['new_states'],
        initial_states = outputs['initial_states'],
        featurizer = featurizer,
        vocab = vocab,
        time_reduction_factor = time_reduction_factor[model],
        sess = generate_session(graph = g, **kwargs),
        model = model,
        name = name,
    )
예제 #9
0
def load(model, module, extra, label, quantized = False, **kwargs):

    path = check_file(
        file = model,
        module = module,
        keys = {'model': 'model.pb'},
        quantized = quantized,
        **kwargs,
    )
    g = load_graph(path['model'], **kwargs)

    vectorizer_mapping = {
        'vggvox-v1': featurization.vggvox_v1,
        'vggvox-v2': featurization.vggvox_v2,
        'deep-speaker': featurization.deep_speaker,
        'speakernet': featurization.SpeakerNetFeaturizer(
            **{**speakernet_config, **extra}
        ),
    }

    if module == 'speaker-vector':
        if model == 'speakernet':
            model_class = Speakernet
        else:
            model_class = Speaker2Vec
    else:
        if model == 'speakernet':
            model_class = SpeakernetClassification
        else:
            model_class = Classification

    if model == 'speakernet':
        inputs = ['Placeholder', 'Placeholder_1']
        outputs = ['logits']
    else:
        inputs = ['Placeholder']
        outputs = ['logits']

    input_nodes, output_nodes = nodes_session(g, inputs, outputs)

    return model_class(
        input_nodes = input_nodes,
        output_nodes = output_nodes,
        vectorizer = vectorizer_mapping[model],
        sess = generate_session(graph = g, **kwargs),
        model = model,
        extra = extra,
        label = label,
        name = module,
    )
예제 #10
0
def transducer_load(model, module, quantized = False, **kwargs):
    path = check_file(
        file = model,
        module = module,
        keys = {'model': 'model.pb', 'vocab': TRANSDUCER_VOCAB},
        quantized = quantized,
        **kwargs,
    )
    g = load_graph(path['model'], **kwargs)
    vocab = subword_load(path['vocab'].replace('.subwords', ''))
    featurizer = STTFeaturizer(normalize_per_feature = True)

    time_reduction_factor = {
        'small-conformer': 4,
        'conformer': 4,
        'large-conformer': 4,
        'alconformer': 4,
    }

    inputs = [
        'X_placeholder',
        'X_len_placeholder',
        'encoded_placeholder',
        'predicted_placeholder',
        'states_placeholder',
    ]
    outputs = [
        'encoded',
        'ytu',
        'new_states',
        'padded_features',
        'padded_lens',
        'initial_states',
        'greedy_decoder',
        'non_blank_transcript',
        'non_blank_stime',
    ]
    input_nodes, output_nodes = nodes_session(g, inputs, outputs)

    return Transducer(
        input_nodes = input_nodes,
        output_nodes = output_nodes,
        featurizer = featurizer,
        vocab = vocab,
        time_reduction_factor = time_reduction_factor.get(model, 4),
        sess = generate_session(graph = g, **kwargs),
        model = model,
        name = module,
    )
예제 #11
0
def load_1d(model, module, quantized=False, **kwargs):
    path = check_file(
        file=model,
        module=module,
        keys={'model': 'model.pb'},
        quantized=quantized,
        **kwargs,
    )
    g = load_graph(path['model'], **kwargs)
    inputs = ['Placeholder']
    outputs = ['logits']
    input_nodes, output_nodes = nodes_session(g, inputs, outputs)

    return UNET1D(
        input_nodes=input_nodes,
        output_nodes=output_nodes,
        sess=generate_session(graph=g, **kwargs),
        model=model,
        name=module,
    )
예제 #12
0
def load(path, s3_path, model, name, quantized=False, **kwargs):
    check_file(path[model], s3_path[model], quantized=quantized, **kwargs)
    if quantized:
        model_path = 'quantized'
    else:
        model_path = 'model'

    g = load_graph(path[model][model_path], **kwargs)
    inputs = ['Placeholder']
    outputs = ['logits']
    eager_g, input_nodes, output_nodes = nodes_session(g, inputs, outputs)

    return UNET(
        input_nodes=input_nodes,
        output_nodes=output_nodes,
        sess=generate_session(graph=g, **kwargs),
        eager_g=eager_g,
        model=model,
        name=name,
    )
예제 #13
0
def load_stft(model, module, instruments, quantized=False, **kwargs):
    path = check_file(
        file=model,
        module=module,
        keys={'model': 'model.pb'},
        quantized=quantized,
        **kwargs,
    )
    g = load_graph(path['model'], **kwargs)
    inputs = ['Placeholder']
    outputs = [f'logits_{i}' for i in range(len(instruments))]
    input_nodes, output_nodes = nodes_session(g, inputs, outputs)

    return UNETSTFT(
        input_nodes=input_nodes,
        output_nodes=output_nodes,
        instruments=instruments,
        sess=generate_session(graph=g, **kwargs),
        model=model,
        name=module,
    )
예제 #14
0
def load(path, s3_path, model, name, quantized=False, **kwargs):
    check_file(path[model], s3_path[model], quantized=quantized, **kwargs)
    if quantized:
        model_path = 'quantized'
    else:
        model_path = 'model'

    g = load_graph(path[model][model_path], **kwargs)
    inputs = ['mel', 'ori_vector', 'target_vector', 'mel_lengths']
    outputs = ['mel_before', 'mel_after']
    input_nodes, output_nodes = nodes_session(g, inputs, outputs)

    speaker_model = speaker_vector.deep_model('vggvox-v2', **kwargs)
    return FastVC(
        input_nodes=input_nodes,
        output_nodes=output_nodes,
        waveform_to_mel=universal_mel,
        speaker_vector=speaker_model,
        sess=generate_session(graph=g, **kwargs),
        model=model,
        name=name,
    )