def load(path, s3_path, model, encoder, model_class, quantized=False, **kwargs): check_file(path[model], s3_path[model], quantized=quantized, **kwargs) if quantized: model_path = 'quantized' else: model_path = 'model' g = load_graph(path[model][model_path], **kwargs) if encoder == 'subword': encoder = text_encoder.SubwordTextEncoder(path[model]['vocab']) if encoder == 'yttm': bpe, subword_mode = load_yttm(path[model]['vocab'], True) encoder = YTTMEncoder(bpe, subword_mode) return model_class( X=g.get_tensor_by_name('import/Placeholder:0'), greedy=g.get_tensor_by_name('import/greedy:0'), beam=g.get_tensor_by_name('import/beam:0'), sess=generate_session(graph=g, **kwargs), encoder=encoder, )
def load(module, model, encoder, model_class, quantized=False, **kwargs): path = check_file( file=model, module=module, keys={ 'model': 'model.pb', 'vocab': LM_VOCAB[module] }, quantized=quantized, **kwargs, ) g = load_graph(path['model'], **kwargs) if encoder == 'subword': encoder = text_encoder.SubwordTextEncoder(path['vocab']) if encoder == 'yttm': bpe, subword_mode = load_yttm(path['vocab'], True) encoder = YTTMEncoder(bpe, subword_mode) inputs = ['Placeholder'] outputs = ['greedy', 'beam'] input_nodes, output_nodes = nodes_session(g, inputs, outputs) return model_class( input_nodes=input_nodes, output_nodes=output_nodes, sess=generate_session(graph=g, **kwargs), encoder=encoder, )
def load(path, s3_path, model, encoder, model_class, **kwargs): check_file(path[model], s3_path[model], **kwargs) g = load_graph(path[model]['model'], **kwargs) if encoder == 'subword': encoder = text_encoder.SubwordTextEncoder(path[model]['vocab']) if encoder == 'yttm': bpe, subword_mode = load_yttm(path[model]['vocab'], True) encoder = YTTMEncoder(bpe, subword_mode) return model_class( g.get_tensor_by_name('import/Placeholder:0'), g.get_tensor_by_name('import/greedy:0'), g.get_tensor_by_name('import/beam:0'), generate_session(graph=g, **kwargs), encoder, )
def transformer(model='base', **kwargs): """ Load transformer encoder-decoder model to translate MS-to-EN. Parameters ---------- model : str, optional (default='base') Model architecture supported. Allowed values: * ``'small'`` - transformer Small parameters. * ``'base'`` - transformer Base parameters. * ``'large'`` - transformer Large parameters. Returns ------- result: malaya.model.tf.TRANSLATION class """ model = model.lower() if model not in _transformer_availability: raise Exception( 'model not supported, please check supported models from malaya.translation.ms_en.available_transformer()' ) path = PATH_TRANSLATION['ms-en'] s3_path = S3_PATH_TRANSLATION['ms-en'] check_file(path[model], s3_path[model], **kwargs) g = load_graph(path[model]['model'], **kwargs) from malaya.text.t2t import text_encoder from malaya.model.tf import TRANSLATION encoder = text_encoder.SubwordTextEncoder(path[model]['vocab']) return TRANSLATION( g.get_tensor_by_name('import/Placeholder:0'), g.get_tensor_by_name('import/greedy:0'), g.get_tensor_by_name('import/beam:0'), generate_session(graph=g, **kwargs), encoder, )
def transformer(model='base', **kwargs): """ Load transformer encoder-decoder model to generate a paraphrase given a string. Parameters ---------- model : str, optional (default='base') Model architecture supported. Allowed values: * ``'base'`` - transformer Base parameters. * ``'tiny'`` - transformer Tiny parameters. * ``'tiny-bert'`` - BERT-BERT Tiny parameters. * ``'bert'`` - BERT-BERT Base parameters. Returns ------- result: malaya.model.tf.PARAPHRASE class """ model = model.lower() if model not in _transformer_availability: raise Exception( 'model not supported, please check supported models from malaya.paraphrase.available_transformer()' ) if 'bert' in model: path = PATH_PARAPHRASE[model] s3_path = S3_PATH_PARAPHRASE[model] check_file(path, s3_path, **kwargs) g = load_graph(path['model']) if model in ['bert', 'tiny-bert']: from malaya.text.bpe import sentencepiece_tokenizer_bert tokenizer = sentencepiece_tokenizer_bert(path['tokenizer'], path['vocab']) from malaya.model.bert import PARAPHRASE_BERT return PARAPHRASE_BERT( X=g.get_tensor_by_name('import/Placeholder:0'), segment_ids=g.get_tensor_by_name('import/Placeholder_1:0'), input_masks=g.get_tensor_by_name('import/Placeholder_2:0'), logits=g.get_tensor_by_name('import/greedy:0'), sess=generate_session(graph=g), tokenizer=tokenizer, ) else: path = PATH_PARAPHRASE['transformer'] s3_path = S3_PATH_PARAPHRASE['transformer'] check_file(path[model], s3_path[model], **kwargs) g = load_graph(path[model]['model']) from malaya.text.t2t import text_encoder from malaya.model.tf import PARAPHRASE encoder = text_encoder.SubwordTextEncoder(path[model]['vocab']) return PARAPHRASE( g.get_tensor_by_name('import/Placeholder:0'), g.get_tensor_by_name('import/greedy:0'), g.get_tensor_by_name('import/beam:0'), generate_session(graph=g), encoder, )