Python YTTMEncoder Examples

Programming Language: Python

Namespace/Package Name: malaya.text.bpe

Class/Type: YTTMEncoder

Examples at hotexamples.com: 6

Python YTTMEncoder - 6 examples found. These are the top rated real world Python examples of malaya.text.bpe.YTTMEncoder extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

YTTMEncoder(6)

Frequently Used Methods

YTTMEncoder (6)

Example #1

Show file

File: transformer.py Project: tx-qi/malaya

def load(path,
         s3_path,
         model,
         encoder,
         model_class,
         quantized=False,
         **kwargs):
    check_file(path[model], s3_path[model], quantized=quantized, **kwargs)
    if quantized:
        model_path = 'quantized'
    else:
        model_path = 'model'

    g = load_graph(path[model][model_path], **kwargs)

    if encoder == 'subword':
        encoder = text_encoder.SubwordTextEncoder(path[model]['vocab'])

    if encoder == 'yttm':
        bpe, subword_mode = load_yttm(path[model]['vocab'], True)
        encoder = YTTMEncoder(bpe, subword_mode)

    return model_class(
        X=g.get_tensor_by_name('import/Placeholder:0'),
        greedy=g.get_tensor_by_name('import/greedy:0'),
        beam=g.get_tensor_by_name('import/beam:0'),
        sess=generate_session(graph=g, **kwargs),
        encoder=encoder,
    )

Example #2

Show file

File: classification.py Project: huseinzol05/malaya

def multinomial(path, s3_path, module, label, sigmoid=False, **kwargs):
    path = check_file(path['multinomial'], s3_path['multinomial'], **kwargs)
    try:
        with open(path['model'], 'rb') as fopen:
            multinomial = pickle.load(fopen)
        with open(path['vector'], 'rb') as fopen:
            vectorize = pickle.load(fopen)
    except BaseException:
        path = os.path.normpath(f'{module}/multinomial')
        raise Exception(
            f"model corrupted due to some reasons, please run `malaya.clear_cache('{path}')` and try again"
        )

    bpe = YTTMEncoder(vocab_file=path['bpe'])

    stemmer = naive()
    cleaning = partial(_classification_textcleaning_stemmer, stemmer=stemmer)

    if sigmoid:
        selected_model = MultilabelBayes
    else:
        if len(label) > 2:
            selected_model = MulticlassBayes
        else:
            selected_model = BinaryBayes

    return selected_model(
        multinomial=multinomial,
        label=label,
        vectorize=vectorize,
        bpe=bpe,
        cleaning=cleaning,
    )

Example #3

Show file

File: transformer.py Project: madamroziyani/malaya

def load(module, model, encoder, model_class, quantized=False, **kwargs):

    path = check_file(
        file=model,
        module=module,
        keys={
            'model': 'model.pb',
            'vocab': LM_VOCAB[module]
        },
        quantized=quantized,
        **kwargs,
    )
    g = load_graph(path['model'], **kwargs)

    if encoder == 'subword':
        encoder = text_encoder.SubwordTextEncoder(path['vocab'])

    if encoder == 'yttm':
        bpe, subword_mode = load_yttm(path['vocab'], True)
        encoder = YTTMEncoder(bpe, subword_mode)

    inputs = ['Placeholder']
    outputs = ['greedy', 'beam']
    input_nodes, output_nodes = nodes_session(g, inputs, outputs)

    return model_class(
        input_nodes=input_nodes,
        output_nodes=output_nodes,
        sess=generate_session(graph=g, **kwargs),
        encoder=encoder,
    )

Example #4

Show file

File: language_detection.py Project: lantip/Malaya

def deep_model(quantized: bool = False, **kwargs):
    """
    Load deep learning language detection model.
    Original size is 51.2MB, Quantized size 12.8MB.

    quantized : bool, optional (default=False)
        if True, will load 8-bit quantized model.
        Quantized model not necessary faster, totally depends on the machine.

    Returns
    -------
    result : malaya.model.tf.DeepLang class
    """

    path = check_file(
        file='lang-32',
        module='language-detection',
        keys={
            'model': 'model.pb',
            'vector': LANGUAGE_DETECTION_BOW,
            'bpe': LANGUAGE_DETECTION_VOCAB,
        },
        quantized=quantized,
        **kwargs,
    )
    g = load_graph(path['model'], **kwargs)
    bpe = YTTMEncoder(vocab_file=path['bpe'])

    with open(path['vector'], 'rb') as fopen:
        vector = pickle.load(fopen)

    inputs = [
        'X_Placeholder/shape',
        'X_Placeholder/values',
        'X_Placeholder/indices',
        'W_Placeholder/shape',
        'W_Placeholder/values',
        'W_Placeholder/indices',
    ]
    outputs = ['logits']
    input_nodes, output_nodes = nodes_session(g, inputs, outputs)

    return DeepLang(
        input_nodes=input_nodes,
        output_nodes=output_nodes,
        sess=generate_session(graph=g, **kwargs),
        vectorizer=vector,
        bpe=bpe,
        label=lang_labels,
    )

Example #5

Show file

def deep_model(quantized: bool = False, **kwargs):
    """
    Load LSTM + Bahdanau Attention stemming model, this also include lemmatization.
    Original size 41.6MB, quantized size 10.6MB .

    Parameters
    ----------
    quantized : bool, optional (default=False)
        if True, will load 8-bit quantized model.
        Quantized model not necessary faster, totally depends on the machine.

    Returns
    -------
    result: malaya.stem.DeepStemmer class
    """

    path = check_file(
        file='lstm-bahdanau',
        module='stem',
        keys={
            'model': 'model.pb',
            'vocab': STEMMER_VOCAB
        },
        quantized=quantized,
        **kwargs,
    )
    g = load_graph(path['model'], **kwargs)
    inputs = ['Placeholder']
    outputs = []
    bpe = YTTMEncoder(vocab_file=path['vocab'], id_mode=True)
    input_nodes, output_nodes = nodes_session(
        g,
        inputs,
        outputs,
        extra={
            'greedy': 'import/decode_1/greedy:0',
            'beam': 'import/decode_2/beam:0',
        },
    )

    tokenizer = Tokenizer().tokenize

    return DeepStemmer(
        input_nodes=input_nodes,
        output_nodes=output_nodes,
        sess=generate_session(graph=g, **kwargs),
        bpe=bpe,
        tokenizer=tokenizer,
    )

Example #6

Show file

File: transformer.py Project: justinphan3110/Malaya

def load(path, s3_path, model, encoder, model_class, **kwargs):
    check_file(path[model], s3_path[model], **kwargs)
    g = load_graph(path[model]['model'], **kwargs)

    if encoder == 'subword':
        encoder = text_encoder.SubwordTextEncoder(path[model]['vocab'])

    if encoder == 'yttm':
        bpe, subword_mode = load_yttm(path[model]['vocab'], True)
        encoder = YTTMEncoder(bpe, subword_mode)

    return model_class(
        g.get_tensor_by_name('import/Placeholder:0'),
        g.get_tensor_by_name('import/greedy:0'),
        g.get_tensor_by_name('import/beam:0'),
        generate_session(graph=g, **kwargs),
        encoder,
    )