def test_shape_on_random_data(self):
        set_seed(42)

        bs = 3
        src_len = 5
        tgt_len = 7

        encoder_config = transformers.BertConfig(
            hidden_size=11,
            intermediate_size=44,
            vocab_size=17,
            num_hidden_layers=1,
            num_attention_heads=1,
        )
        encoder = transformers.BertModel(encoder_config)

        # decoder accepts vocabulary of schema vocab + pointer embeddings
        decoder_config = transformers.BertConfig(
            hidden_size=11,
            intermediate_size=44,
            vocab_size=23,
            is_decoder=True,
            num_hidden_layers=1,
            num_attention_heads=1,
        )
        decoder = transformers.BertModel(decoder_config)

        # logits are projected into schema vocab and combined with pointer scores
        max_pointer = src_len + 3
        model = EncoderDecoderWPointerModel(encoder=encoder,
                                            decoder=decoder,
                                            max_src_len=max_pointer)

        x_enc = torch.randint(0, encoder_config.vocab_size, size=(bs, src_len))
        x_dec = torch.randint(0, decoder_config.vocab_size, size=(bs, tgt_len))

        out = model(input_ids=x_enc, decoder_input_ids=x_dec)

        # different encoders return different number of outputs
        # e.g. BERT returns two, but DistillBERT only one
        self.assertGreaterEqual(len(out), 4)

        schema_vocab = decoder_config.vocab_size - max_pointer

        combined_logits = out[0]
        expected_shape = (bs, tgt_len, schema_vocab + src_len)
        self.assertEqual(combined_logits.shape, expected_shape)

        decoder_hidden = out[1]
        expected_shape = (bs, tgt_len, decoder_config.hidden_size)
        self.assertEqual(decoder_hidden.shape, expected_shape)

        combined_logits = out[2]
        expected_shape = (bs, decoder_config.hidden_size)
        self.assertEqual(combined_logits.shape, expected_shape)

        encoder_hidden = out[3]
        expected_shape = (bs, src_len, encoder_config.hidden_size)
        self.assertEqual(encoder_hidden.shape, expected_shape)
Beispiel #2
0
    def __init__(self, code_token_counter, query_token_counter):
        self.code_token_counter = code_token_counter
        get_counter_map(code_token_counter)
        self.code_config = transformers.BertConfig(
            vocab_size=len(code_token_counter),
            pad_token_id=get_counter_map(code_token_counter)["[PAD]"])
        self.code_model = transformers.BertModel(self.code_config)

        self.query_token_counter = query_token_counter
        self.query_config = transformers.BertConfig(
            vocab_size=len(query_token_counter),
            pad_token_id=get_counter_map(query_token_counter)["[PAD]"])
        self.query_model = transformers.BertModel(self.query_config)
    def test_loss_computation(self):
        torch.manual_seed(42)
        src_vocab_size = 17
        tgt_vocab_size = 23

        encoder_config = transformers.BertConfig(
            hidden_size=11,
            intermediate_size=44,
            vocab_size=src_vocab_size,
            num_hidden_layers=1,
            num_attention_heads=1,
        )
        encoder = transformers.BertModel(encoder_config)

        max_position = 7
        decoder_config = transformers.BertConfig(
            hidden_size=11,
            intermediate_size=44,
            vocab_size=tgt_vocab_size + max_position,
            is_decoder=True,
            num_hidden_layers=1,
            num_attention_heads=1,
        )
        decoder = transformers.BertModel(decoder_config)

        model = EncoderDecoderWPointerModel(encoder=encoder,
                                            decoder=decoder,
                                            max_src_len=7)

        # similar to real data
        src_seq = torch.LongTensor([[1, 6, 12, 15, 2, 0, 0],
                                    [1, 6, 12, 15, 5, 3, 2]])
        tgt_seq = torch.LongTensor([
            [8, 6, 4, 10, 11, 8, 5, 1, 12, 7, 7, 0, 0],
            [8, 6, 4, 10, 11, 8, 5, 1, 12, 13, 14, 7, 7],
        ])
        mask = torch.FloatTensor([[0, 1, 1, 1, 0, 0, 0], [0, 1, 1, 1, 1, 1,
                                                          0]])

        loss = model(
            input_ids=src_seq,
            decoder_input_ids=tgt_seq,
            pointer_mask=mask,
            labels=tgt_seq,
        )[0]

        self.assertEqual(loss.shape, torch.Size([]))
        self.assertEqual(loss.dtype, torch.float32)
        self.assertGreater(loss, 0)
Beispiel #4
0
def generate_onnx_model(model_name: str, filename: str, seq_len: int,
                        batch_size: int, backend: str):
    import transformers
    import torch
    import os

    test_device = torch.device('cuda:0') if backend == "GPU" else torch.device(
        'cpu:0')
    torch.set_grad_enabled(False)

    if model_name == "bert":
        cfg = transformers.BertConfig()
        model = transformers.BertModel(cfg)
    elif model_name == "albert":
        cfg = transformers.AlbertConfig()
        model = transformers.AlbertModel(cfg)
    elif model_name == "roberta":
        cfg = transformers.RobertaConfig()
        model = transformers.RobertaModel(cfg)
    else:
        raise (f"benchmark does not support {model_name}")

    model.eval()
    model.to(test_device)

    cfg = model.config  # type: transformers.BertConfig
    input_ids = torch.randint(low=0,
                              high=cfg.vocab_size - 1,
                              size=(batch_size, seq_len),
                              dtype=torch.long,
                              device=test_device)
    with open(filename, 'wb') as outf:
        torch.onnx.export(model=model, args=(input_ids, ), f=outf)
        outf.flush()
    return cfg.vocab_size
Beispiel #5
0
 def __init__(self,
              vocab_size,
              hidden_size,
              dropout,
              n_layers=1,
              vocab_file='./data/vocab.txt'):
     super(UntrainedEncoderBERT, self).__init__()
     self.vocab_size = vocab_size
     self.hidden_size = hidden_size
     self.dropout = dropout
     self.dropout_layer = nn.Dropout(dropout)
     self.embedding = nn.Embedding(vocab_size,
                                   hidden_size,
                                   padding_idx=PAD_token)
     self.embedding.weight.data.normal_(0, 0.1)
     self.config = transformers.BertConfig(vocab_size=self.vocab_size,
                                           hidden_size=self.hidden_size,
                                           num_hidden_layers=n_layers,
                                           hidden_dropout_prob=dropout,
                                           attention_probs_dropout=dropout,
                                           num_attention_heads=16,
                                           output_hidden_states=True,
                                           max_position_embeddings=1024)
     self.tokenizer = transformers.BertTokenizer(vocab_file,
                                                 pad_token='PAD',
                                                 unk_token='UNK',
                                                 sep_token='EOS')
     self.BERT = transformers.BertModel(self.config)
     self.training = True
Beispiel #6
0
def dataset():
    """
    Check if the data in two instances is different
    """
    args = "--config demo_tiny_128".split()
    config = transformers.BertConfig(**(vars(parse_bert_args(args))))
    opts = get_options(config)
    loader = TFRecordPretrainingDataset(config.input_files)
    loader = get_dataloader(config, opts)

    # Save part of the data as list
    loader_list = list(loader)[0][0][0].numpy()

    # MPI to broadcast data in root=1 to root=0
    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    loader_list_copy = np.copy(loader_list)
    comm.Bcast(loader_list, root=1)

    # Assert if data broadcast to root=0 is different
    if comm.Get_rank() == 0 and not np.all(loader_list_copy == loader_list):
        print('Passed test: instances have different data')

    # Wait until both roots are finished
    time.sleep(2)
Beispiel #7
0
def benchmark_torch(model_name: str, seq_len: int, batch_size: int, n: int,
                    num_threads: int):
    import torch
    import transformers
    import contexttimer
    import benchmark_helper
    torch.set_num_threads(num_threads)
    torch.set_grad_enabled(False)

    if model_name == "bert":
        cfg = transformers.BertConfig()
        model = transformers.BertModel(cfg)
    elif model_name == "albert":
        cfg = transformers.AlbertConfig()
        model = transformers.AlbertModel(cfg)
    elif model_name == "roberta":
        cfg = transformers.RobertaConfig()
        model = transformers.RobertaModel(cfg)
    else:
        raise (f"benchmark does not support {model_name}")
    model.eval()
    cfg = model.config  # type: transformers.BertConfig
    input_ids = torch.randint(low=0,
                              high=cfg.vocab_size - 1,
                              size=(batch_size, seq_len),
                              dtype=torch.long)
    benchmark_helper.run_model(lambda: model(input_ids), False, n, batch_size,
                               seq_len, "torch", num_threads)
Beispiel #8
0
    def __init__(self, hp: Optional[ModelParams] = ModelParams(), max_seq_len=1024):
        super().__init__()
        self.hp = hp

        config = transformers.BertConfig(
            hidden_size=self.hp.dim,
            num_hidden_layers=0,
            num_attention_heads=1,
            intermediate_size=0,
            max_position_embeddings=max_seq_len,
            output_attentions=False,
            output_hidden_states=False,
            return_dict=True,
        )

        self.e = transformers.BertModel(config, add_pooling_layer=False)
        for name, param in self.e.named_parameters():
            # param names
            #   embeddings.word_embeddings.weight
            #   embeddings.position_embeddings.weight
            #   embeddings.token_type_embeddings.weight
            #   embeddings.LayerNorm.weight
            #   embeddings.LayerNorm.bias
            if 'position_embeddings' in name:
                requires_grad = self.hp.position_embedding_requires_grad
            else:
                requires_grad = self.hp.requires_grad
            param.requires_grad = requires_grad
Beispiel #9
0
    def __init__(
        self,
        train_dataloader,
        val_dataloader,
        args,
        fold,
        baselines=None,
        gamma=0.99,
        beta=0.01,
        lr=1e-5,
        device="cpu",
    ):
        if args.decoder == "lstm":
            self.policy = PolicyNet(args=args)
        else:
            config = transformers.BertConfig()
            config.hidden_size = 2048
            config.num_attention_heads = 8
            config.num_hidden_layers = 4
            config.max_position_embeddings = 1500
            self.policy = Transformer(config, device)

        self.policy.to(device)
        self.optimizer = torch.optim.Adam(self.policy.parameters(), lr=lr)

        self.baselines = baselines

        self.train_dataloader = train_dataloader
        self.val_dataloader = val_dataloader

        self.device = device
        self.args = args
        self.fold = fold
        self.beta = beta
Beispiel #10
0
def get_torch_model(
    model_name: str,
    input_shape: Tuple[int, ...],
    output_shape: Tuple[int, int],  # pylint: disable=unused-argument
    dtype: str = "float32",
) -> Tuple[IRModule, Dict[str, NDArray]]:
    """Load model from torch model zoo
    Parameters
    ----------
    model_name : str
        The name of the model to load
    input_shape: Tuple[int, ...]
        Tuple for input shape
    output_shape: Tuple[int, int]
        Tuple for output shape
    dtype: str
        Tensor data type
    """

    assert dtype == "float32"

    import torch  # type: ignore # pylint: disable=import-error,import-outside-toplevel
    from torchvision import models  # type: ignore # pylint: disable=import-error,import-outside-toplevel
    import transformers  # type: ignore # pylint: disable=import-error,import-outside-toplevel
    import os  # type: ignore # pylint: disable=import-error,import-outside-toplevel

    def do_trace(model, inp):
        model.eval()
        model_trace = torch.jit.trace(model, inp)
        model_trace.eval()
        return model_trace

    # Load model from torchvision
    if MODEL_TYPES[model_name] == MODEL_TYPE.TEXT_CLASSIFICATION:
        os.environ["TOKENIZERS_PARALLELISM"] = "false"
        model = transformers.BertModel(
            transformers.BertConfig(
                num_hidden_layers=12,
                hidden_size=768,
                intermediate_size=3072,
                num_attention_heads=12,
                return_dict=False,
            ))
        model.eval()
        input_data = torch.randint(10000, input_shape)
        shape_list = [("input_ids", input_shape)]
        scripted_model = torch.jit.trace(model, [input_data], strict=False)
    elif MODEL_TYPES[model_name] == MODEL_TYPE.IMAGE_CLASSIFICATION:
        model = getattr(models, model_name)()
        # Setup input
        input_data = torch.randn(input_shape).type(torch.float32)
        shape_list = [("input0", input_shape)]
        # Get trace. Depending on the model type, wrapper may be necessary.
        scripted_model = do_trace(model, input_data)
    else:
        raise ValueError("Unsupported model in Torch model zoo.")

    # Convert torch model to relay module
    mod, params = relay.frontend.from_pytorch(scripted_model, shape_list)
    return mod, params
def generate_onnx_model(model_name: str,
                        use_gpu: bool,
                        filename: str,
                        seq_len: int,
                        batch_size: int,
                        backend: str,
                        use_dynamic_axes: bool = False):
    import transformers
    import torch
    import os

    test_device = torch.device(
        'cuda:0') if backend == "GPU" and use_gpu else torch.device('cpu:0')
    torch.set_grad_enabled(False)

    if model_name == "bert":
        # use a real model to check the correctness
        if checkonnxrest:
            model = transformers.BertModel.from_pretrained("bert-base-uncased")
        else:
            cfg = transformers.BertConfig()
            model = transformers.BertModel(cfg)
    elif model_name == "albert":
        cfg = transformers.AlbertConfig()
        model = transformers.AlbertModel(cfg)
    elif model_name == "roberta":
        cfg = transformers.RobertaConfig()
        model = transformers.RobertaModel(cfg)
    else:
        raise (f"benchmark does not support {model_name}")

    model.eval()
    model.to(test_device)

    cfg = model.config  # type: transformers.BertConfig
    input_ids = torch.randint(low=0,
                              high=cfg.vocab_size - 1,
                              size=(batch_size, seq_len),
                              dtype=torch.long,
                              device=test_device)

    with open(filename, 'wb') as outf:
        if not use_dynamic_axes:
            torch.onnx.export(model=model, args=(input_ids, ), f=outf)
        else:
            torch.onnx.export(model=model,
                              args=(input_ids, ),
                              f=outf,
                              input_names=['input'],
                              output_names=['output'],
                              dynamic_axes={
                                  'input': [0, 1],
                                  'output': [0, 1]
                              })
        # If not intended to make onnxruntime support variable batch size and sequence length,
        # you can unset the parameter `dynamic_axes`.
        # For some model, you have to try `opset_version=12`
        outf.flush()
    return cfg.vocab_size, cfg
def benchmark_turbo_transformers(model_name: str, seq_len: int,
                                 batch_size: int, n: int, enable_random: bool,
                                 max_seq_len: int, min_seq_len: int,
                                 num_threads: int, use_gpu: bool,
                                 enable_mem_opt: bool):
    import torch
    import transformers
    import turbo_transformers
    import benchmark_helper
    test_device = torch.device('cuda:0') if use_gpu else torch.device('cpu:0')
    cfg = None
    torch.set_grad_enabled(False)
    if model_name == "bert":
        cfg = transformers.BertConfig()
        model = transformers.BertModel(cfg)
        model.to(test_device)
        model.eval()
        model = turbo_transformers.BertModel.from_torch(model, backend="turbo")
    elif model_name == "albert":
        cfg = transformers.AlbertConfig(hidden_size=768,
                                        num_attention_heads=12,
                                        intermediate_size=3072)
        model = transformers.AlbertModel(cfg)
        model.to(test_device)
        model.eval()
        model = turbo_transformers.AlbertModel.from_torch(model)
    elif model_name == "roberta":
        cfg = transformers.RobertaConfig()
        model = transformers.RobertaModel(cfg)
        model.to(test_device)
        model.eval()
        model = turbo_transformers.RobertaModel.from_torch(model)
    elif model_name == "distilbert":
        cfg = transformers.DistilBertConfig()
        model = transformers.DistilBertModel(cfg)
        model.to(test_device)
        model.eval()
        model = turbo_transformers.DistilBertModel.from_torch(model)
    else:
        raise (f"benchmark does not support {model_name}")

    turbo_transformers.set_num_threads(num_threads)
    if enable_random:
        if enable_mem_opt:
            turbo_transformers.reset_allocator_schema("model-aware")
        benchmark_helper.run_variable_model(model, use_gpu, n, max_seq_len,
                                            min_seq_len, "turbo", num_threads,
                                            cfg, enable_mem_opt, model_name)
        if enable_mem_opt:
            turbo_transformers.reset_allocator_schema("naive")
    else:
        input_ids = torch.randint(low=0,
                                  high=cfg.vocab_size - 1,
                                  size=(batch_size, seq_len),
                                  dtype=torch.long,
                                  device=test_device)
        benchmark_helper.run_model(lambda: model(input_ids), use_gpu, n,
                                   batch_size, seq_len, "turbo", num_threads,
                                   enable_mem_opt, model_name)
    def test_shape_on_real_data_batched(self):
        set_seed(42)
        src_vocab_size = 17
        tgt_vocab_size = 23
        max_position = 7

        encoder_config = transformers.BertConfig(
            hidden_size=11,
            intermediate_size=44,
            vocab_size=src_vocab_size,
            num_hidden_layers=1,
            num_attention_heads=1,
        )
        encoder = transformers.BertModel(encoder_config)

        decoder_config = transformers.BertConfig(
            hidden_size=11,
            intermediate_size=44,
            vocab_size=tgt_vocab_size + max_position,
            is_decoder=True,
            num_hidden_layers=1,
            num_attention_heads=1,
        )
        decoder = transformers.BertModel(decoder_config)

        model = EncoderDecoderWPointerModel(encoder=encoder,
                                            decoder=decoder,
                                            max_src_len=max_position)

        # similar to real data
        src_seq = torch.LongTensor([[1, 6, 12, 15, 2, 0, 0],
                                    [1, 6, 12, 15, 5, 3, 2]])
        tgt_seq = torch.LongTensor([
            [8, 6, 4, 10, 11, 8, 5, 1, 12, 7, 7, 0, 0],
            [8, 6, 4, 10, 11, 8, 5, 1, 12, 13, 14, 7, 7],
        ])
        mask = torch.FloatTensor([[0, 1, 1, 1, 0, 0, 0], [0, 1, 1, 1, 1, 1,
                                                          0]])

        combined_logits = model(input_ids=src_seq,
                                decoder_input_ids=tgt_seq,
                                pointer_mask=mask)[0]

        expected_shape = (2, tgt_seq.shape[1],
                          tgt_vocab_size + src_seq.shape[1])
        self.assertEqual(combined_logits.shape, expected_shape)
    def test_shape_on_real_data(self):
        set_seed(42)
        src_vocab_size = 17
        tgt_vocab_size = 23
        max_position = 5

        encoder_config = transformers.BertConfig(
            hidden_size=11,
            intermediate_size=44,
            vocab_size=src_vocab_size,
            num_hidden_layers=1,
            num_attention_heads=1,
        )
        encoder = transformers.BertModel(encoder_config)

        decoder_config = transformers.BertConfig(
            hidden_size=11,
            intermediate_size=44,
            vocab_size=tgt_vocab_size + max_position,
            is_decoder=True,
            num_hidden_layers=1,
            num_attention_heads=1,
        )
        decoder = transformers.BertModel(decoder_config)

        model = EncoderDecoderWPointerModel(encoder=encoder,
                                            decoder=decoder,
                                            max_src_len=max_position)

        # similar to real data
        # e.g. '[CLS] Directions to Lowell [SEP]'
        src_seq = torch.LongTensor([[1, 6, 12, 15, 2]])
        # e.g. '[IN:GET_DIRECTIONS Directions to [SL:DESTINATION Lowell]]'
        tgt_seq = torch.LongTensor([[8, 6, 4, 10, 11, 8, 5, 1, 12, 7, 7]])
        mask = torch.FloatTensor([[0, 1, 1, 1, 0]])

        combined_logits = model(input_ids=src_seq,
                                decoder_input_ids=tgt_seq,
                                pointer_mask=mask)[0]

        expected_shape = (1, tgt_seq.shape[1],
                          tgt_vocab_size + src_seq.shape[1])
        self.assertEqual(combined_logits.shape, expected_shape)
Beispiel #15
0
    def test_run_with_no_exception(self):
        # Arrange
        train_data_file = os.path.join(os.path.dirname(__file__),
                                       "sample_sst2.csv")
        tempdir = tempfile.mkdtemp()
        batch = 3

        # Bert Config
        vocab_size = 20000
        sequence_len = 20
        num_classes = 14
        bert_config = transformers.BertConfig(vocab_size=vocab_size,
                                              hidden_size=10,
                                              num_hidden_layers=1,
                                              num_attention_heads=1,
                                              num_labels=num_classes)

        # Mock tokenisor
        mock_tokenisor = MagicMock()
        mock_tokenisor.tokenize.side_effect = lambda x: x.split(" ")
        mock_tokenisor.convert_tokens_to_ids = lambda x: [
            i for i, _ in enumerate(x)
        ]

        # Builder
        b = Builder(train_data=train_data_file,
                    val_data=train_data_file,
                    dataset_factory_name=
                    "datasets.sst2_dataset_factory.SST2DatasetFactory",
                    checkpoint_dir=tempdir,
                    epochs=2,
                    grad_accumulation_steps=1,
                    early_stopping_patience=2,
                    batch_size=batch,
                    max_seq_len=sequence_len,
                    model_dir=tempdir)
        b.set_tokensior(mock_tokenisor)
        b.set_bert_config(bert_config)

        trainer = b.get_trainer()

        # Get data loaders
        train_dataloader, val_dataloader = b.get_train_val_dataloader()

        # Act
        # Run training
        trainer.run_train(train_iter=train_dataloader,
                          validation_iter=val_dataloader,
                          model_network=b.get_network(),
                          loss_function=b.get_loss_function(),
                          optimizer=b.get_optimiser(),
                          pos_label=b.get_pos_label_index())
def test_smart_batch(use_cuda: bool):
    test_device = torch.device('cuda:0') if use_cuda else \
        torch.device('cpu:0')
    cfg = transformers.BertConfig(attention_probs_dropout_prob=0.0,
                                  hidden_dropout_prob=0.0)
    torch_model = transformers.BertModel(cfg)

    # model_id = "bert-base-uncased"
    # torch_model = transformers.BertModel.from_pretrained(model_id)
    torch_model.eval()
    torch_model.to(test_device)
    torch.set_grad_enabled(False)

    cfg = torch_model.config
    # use 4 threads for computing
    if not use_cuda:
        turbo_transformers.set_num_threads(4)

    # Initialize a turbo BertModel with smart batching from torch model.
    turbo_model = turbo_transformers.BertModelSmartBatch.from_torch(
        torch_model)

    # a batch of queries with different lengths.
    query_seq_len_list = [18, 2, 3, 51]
    input_list = []

    # generate random inputs. Of course you can use real data.
    for query_seq_len in query_seq_len_list:
        input_seq = torch.randint(low=0,
                                  high=cfg.vocab_size - 1,
                                  size=(1, query_seq_len),
                                  dtype=torch.long,
                                  device=test_device)
        input_list.append(input_seq)

    # start inference
    s_res = serial_bert_inference(torch_model, input_list)
    b_res = batch_bert_inference(turbo_model, input_list, query_seq_len_list)
    print(torch.max(torch.abs(b_res - s_res)))
    assert (torch.max(torch.abs(b_res - s_res)) < 1e-2)

    start_time = time.time()
    for i in range(10):
        serial_bert_inference(torch_model, input_list)
    end_time = time.time()
    print("\ntorch time consum: {}".format(end_time - start_time))

    start_time = time.time()
    for i in range(10):
        batch_bert_inference(turbo_model, input_list, query_seq_len_list)
    end_time = time.time()
    print("\nturbo time consum: {}".format(end_time - start_time))
def init_model(num):
    configuration = transformers.BertConfig("./model/config.json")
    model_class = transformers.BertForSequenceClassification
    tokenizer_class = transformers.BertTokenizer
    if num == 1:
        pretrained_weights = './model/model1/'
        idx_to_token = {
            0: "[no connective]",
            1: "and",
            2: "by contrast",
            3: "by then",
            4: "finally",
            5: "for example",
            6: "however",
            7: "in other words",
            8: "in particular",
            9: "indeed",
            10: "instead",
            11: "meanwhile",
            12: "moreover",
            13: "nevertheless",
            14: "on the other hand",
            15: "otherwise",
            16: "overall",
            17: "rather",
            18: "then",
            19: "therefore"
        }
    else:
        pretrained_weights = './model/model2/'
        idx_to_token = {
            0: "[no connective]",
            1: "although",
            2: "and",
            3: "because",
            4: "but",
            5: "for example",
            6: "however",
            7: "or",
            8: "so",
            9: "so that",
            10: "unless",
            11: "while"
        }
    pretrained_tokenizer = './tokenizer'

    # load weights for tokenizer and model
    tokenizer = tokenizer_class.from_pretrained(pretrained_tokenizer)
    model = model_class.from_pretrained(pretrained_weights, )

    model.eval()
    return model, tokenizer, idx_to_token
Beispiel #18
0
    def __post_init__(self):
        assert self.size in ["small", "large"]
        if self.size == "small":
            self.hidden_size = 256
            self.num_attention_heads = 4
            self.num_hidden_layers = 2

        self.model_config = transformers.BertConfig(
            hidden_size=self.hidden_size,
            num_attention_heads=self.num_attention_heads,
            num_hidden_layers=self.num_hidden_layers,
            max_position_embeddings=self.max_position_embeddings,
        )
def benchmark_turbo_transformers(model_name: str, seq_len: int,
                                 batch_size: int, n: int, enable_random: bool,
                                 max_seq_len: int, min_seq_len: int,
                                 num_threads: int, use_gpu: bool):
    import torch
    import transformers
    import contexttimer
    import turbo_transformers
    import benchmark_helper
    test_device = torch.device('cuda:0') if use_gpu else torch.device('cpu:0')
    if use_gpu:
        print("using GPU")
    else:
        print("using CPU")
    cfg = None
    torch.set_grad_enabled(False)
    if model_name == "bert":
        cfg = transformers.BertConfig()
        model = transformers.BertModel(cfg)
        model.to(test_device)
        model.eval()
        model = turbo_transformers.BertModel.from_torch(model)
    elif model_name == "albert":
        cfg = transformers.AlbertConfig()
        model = transformers.AlbertModel(cfg)
        model.to(test_device)
        model.eval()
        model = turbo_transformers.AlbertModel.from_torch(model)
    elif model_name == "roberta":
        cfg = transformers.RobertaConfig()
        model = transformers.RobertaModel(cfg)
        model.to(test_device)
        model.eval()
        model = turbo_transformers.RobertaModel.from_torch(model)
    else:
        raise (f"benchmark does not support {model_name}")

    turbo_transformers.set_num_threads(num_threads)
    if enable_random:
        benchmark_helper.run_variable_model(model, use_gpu, n, max_seq_len,
                                            min_seq_len, "turbo", num_threads,
                                            cfg)
    else:
        input_ids = torch.randint(low=0,
                                  high=cfg.vocab_size - 1,
                                  size=(batch_size, seq_len),
                                  dtype=torch.long,
                                  device=test_device)

        benchmark_helper.run_model(lambda: model(input_ids), use_gpu, n,
                                   batch_size, seq_len, "turbo", num_threads)
Beispiel #20
0
 def __init__(self, decode_fn):
     super(BleurtEmbDiscriminator, self).__init__()
     self.decode_fn = decode_fn
     from BleurtTorch import BleurtModel
     import transformers
     checkpoint = "bleurt/bleurt-base-128-torch.pb"
     config = transformers.BertConfig()
     self.bleurt_model = BleurtModel(config)
     self.bleurt_model.load_state_dict(torch.load(checkpoint))
     self.bleurt_model.eval()
     self.tokenizer = transformers.BertTokenizerFast.from_pretrained(
         "bert-base-uncased")
     from BleurtTorch import encode_batch
     self.encode_batch = encode_batch
def test_wikipedia_dataset():
    args = "--config demo_tiny_128".split()
    config = transformers.BertConfig(**(vars(parse_bert_args(args))))
    config.vocab_size = 30522
    config.input_files = ["data/wikipedia/128/wiki_000.tfrecord"]

    num_tokens = 0
    replacement_counts = Counter({"103": 0, "same": 0, "random": 0})

    dataset = get_dataset(config)
    opts = get_options(config)
    loader = DataLoader(opts,
                        dataset,
                        batch_size=config.batch_size,
                        num_workers=config.dataloader_workers)

    for datum in tqdm(loader):
        tokens, attn_mask, types, mask_lm_pos, labels, nsp = datum
        tokens = tokens.numpy()
        attn_mask = attn_mask.numpy()
        types = types.numpy()
        mask_lm_pos = mask_lm_pos.numpy()
        labels = labels.numpy()
        nsp = nsp.numpy()
        for b in range(config.batch_size):
            check_dimensions(config, tokens[b], attn_mask[b], types[b],
                             mask_lm_pos[b], labels[b], nsp[b])
            check_tokens(config, tokens[b], mask_lm_pos[b], labels[b])
            check_attention_mask(attn_mask[b], tokens[b])
            check_mask_lm_positions(config, mask_lm_pos[b])
            check_labels(config, tokens[b], mask_lm_pos[b], labels[b])
            check_token_type(types[b])
            check_nsp(nsp[b])

            replacement_counts += mask_type_count(tokens[b], mask_lm_pos[b],
                                                  labels[b])

            # Number of tokens, not including padding
            num_tokens += attn_mask[b, attn_mask[b] == 1].shape[0]

    # Test masked token proportions
    total = sum(replacement_counts.values())
    for k in replacement_counts:
        replacement_counts[k] /= total

    assert (0.79 < replacement_counts["103"] < 0.81)
    assert (0.09 < replacement_counts["same"] < 0.11)
    assert (0.09 < replacement_counts["random"] < 0.11)
    assert (0.14 < total / num_tokens < 0.16)  # should be ~0.15
Beispiel #22
0
def benchmark_torch_jit(model_name: str, seq_len: int, batch_size: int, n: int,
                        enable_random: bool, max_seq_len: int,
                        min_seq_len: int, num_threads: int, use_gpu: bool,
                        enable_mem_opt: bool):
    import transformers
    import contexttimer
    import torch.jit
    torch.set_num_threads(num_threads)
    torch.set_grad_enabled(False)
    if model_name == "bert":
        cfg = transformers.BertConfig()
        model = transformers.BertModel(cfg)
    elif model_name == "albert":
        cfg = transformers.AlbertConfig()
        model = transformers.AlbertModel(cfg)
    elif model_name == "roberta":
        cfg = transformers.RobertaConfig()
        model = transformers.RobertaModel(cfg)
    else:
        raise (f"benchmark does not support {model_name}")
    model.eval()
    input_ids = torch.randint(low=0,
                              high=cfg.vocab_size - 1,
                              size=(batch_size, seq_len),
                              dtype=torch.long)

    model = torch.jit.trace(model, (input_ids, ))

    with torch.jit.optimized_execution(True):
        model(input_ids)
        with contexttimer.Timer() as t:
            for _ in range(n):
                model(input_ids)

    print(
        json.dumps({
            "QPS": n / t.elapsed,
            "elapsed": t.elapsed,
            "n": n,
            "batch_size": batch_size,
            "seq_len": seq_len,
            "framework": "torch_jit",
            "n_threads": num_threads,
            "model_name": model_name
        }))
Beispiel #23
0
    def __init__(self,
                 vocab: nnlp.Vocab,
                 n_embd: int = 256,
                 n_layer: int = 4,
                 n_head: int = 4,
                 n_position: int = 128,
                 n_ctx: int = 128):
        super(BERTLMModel, self).__init__()

        config = transformers.BertConfig(vocab_size=len(vocab),
                                         hidden_size=n_embd,
                                         num_hidden_layers=n_layer,
                                         num_attention_heads=n_head,
                                         output_hidden_states=True)

        self.bert_model = transformers.BertForMaskedLM(config)
        self.vocab = vocab
        self.mlm_probability = 0.15
Beispiel #24
0
def test(use_cuda: bool):
    test_device_name = "GPU" if use_cuda else "CPU"

    test_device = torch.device('cuda:0') if use_cuda else \
        torch.device('cpu:0')

    cfg = transformers.BertConfig()
    # use 4 threads for computing
    turbo_transformers.set_num_threads(4)

    input_ids = np.array(
        ([12166, 10699, 16752, 4454], [5342, 16471, 817, 16022]),
        dtype=np.int64)
    segment_ids = np.array(([1, 1, 1, 0], [1, 0, 0, 0]), dtype=np.int64)

    input_ids_tensor = turbo_transformers.nparray2tensor(
        input_ids, test_device_name)
    segment_ids_tensor = turbo_transformers.nparray2tensor(
        segment_ids, test_device_name)
    # 3. load model from npz
    if len(sys.argv) == 2:
        try:
            print(sys.argv[1])
            in_file = sys.argv[1]
        except:
            sys.exit("ERROR. can not open ", sys.argv[1])
    else:
        in_file = "/home/jiaruifang/codes/TurboTransformers/bert.npz"
    # 255 MiB

    tt_model = turbo_transformers.BertModel.from_npz(in_file, cfg, test_device)

    # 1169 MiB
    start_time = time.time()
    for _ in range(10):
        res = tt_model(input_ids_tensor,
                       token_type_ids=segment_ids_tensor,
                       return_type=turbo_transformers.ReturnType.NUMPY
                       )  # sequence_output, pooled_output
    end_time = time.time()

    print("turbo bert sequence output:", res[0][:, 0, :])
    print("turbo bert pooler output: ", res[1])  # pooled_output
    print("\nturbo time consum: {}".format(end_time - start_time))
def benchmark_torch(model_name: str, seq_len: int, batch_size: int, n: int,
                    enable_random: bool, max_seq_len: int, min_seq_len: int,
                    num_threads: int, use_gpu: bool, enable_mem_opt: bool):
    import torch
    import transformers
    import benchmark_helper

    test_device = torch.device('cuda:0') if use_gpu else torch.device('cpu:0')
    torch.set_grad_enabled(False)
    torch.set_num_threads(num_threads)

    cfg = None
    if model_name == "bert":
        cfg = transformers.BertConfig()
        model = transformers.BertModel(cfg)
    elif model_name == "albert":
        cfg = transformers.AlbertConfig()
        model = transformers.AlbertModel(cfg)
    elif model_name == "roberta":
        cfg = transformers.RobertaConfig()
        model = transformers.RobertaModel(cfg)
    elif model_name == "distilbert":
        cfg = transformers.DistilBertConfig()
        model = transformers.DistilBertModel(cfg)
    else:
        raise (f"benchmark does not support {model_name}")
    model.eval()
    model.to(test_device)

    # cfg = model.config  # type: transformers.BertConfig
    if enable_random:
        benchmark_helper.run_variable_model(model, use_gpu, n, max_seq_len,
                                            min_seq_len, "torch", num_threads,
                                            cfg, enable_mem_opt, model_name)
    else:
        input_ids = torch.randint(low=0,
                                  high=cfg.vocab_size - 1,
                                  size=(batch_size, seq_len),
                                  dtype=torch.long,
                                  device=test_device)
        benchmark_helper.run_model(lambda: model(input_ids), use_gpu, n,
                                   batch_size, seq_len, "torch", num_threads,
                                   enable_mem_opt, model_name)
def benchmark_turbo_transformers(model_name: str, seq_len: int,
                                 batch_size: int, n: int):
    import torch
    import transformers
    import contexttimer
    import turbo_transformers
    import benchmark_helper

    if not torch.cuda.is_available():
        print("cuda is not available for torch")
        return
    test_device = torch.device('cuda:0')

    if model_name == "bert":
        cfg = transformers.BertConfig()
        model = transformers.BertModel(cfg)
        model.to(test_device)
        model.eval()
        model = turbo_transformers.BertModel.from_torch(model)
    elif model_name == "albert":
        cfg = transformers.AlbertConfig()
        model = transformers.AlbertModel(cfg)
        model.to(test_device)
        model.eval()
        model = turbo_transformers.AlbertModel.from_torch(model)
    elif model_name == "roberta":
        cfg = transformers.RobertaConfig()
        model = transformers.RobertaModel(cfg)
        model.to(test_device)
        model.eval()
        model = turbo_transformers.RobertaModel.from_torch(model)
    else:
        raise (f"benchmark does not support {model_name}")

    cfg = model.config  # type: transformers.BertConfig
    input_ids = torch.randint(low=0,
                              high=cfg.vocab_size - 1,
                              size=(batch_size, seq_len),
                              dtype=torch.long,
                              device=test_device)

    benchmark_helper.run_model(lambda: model(input_ids), True, n, batch_size,
                               seq_len, "turbo")
Beispiel #27
0
def build_ner_biobert_model(hparams):
   # data_dir = pathlib.Path(DATA_DIR)
    exp_dirpath = pathlib.Path(dirname)
    train_filename = "./ner/%s/%s.train"%(hparams.get("dataset"), hparams.get("dataset"))
    dev_filename = "./ner/%s/%s.dev"%(hparams.get("dataset"), hparams.get("dataset"))
    test_filename = "./ner/%s/%s.test"%(hparams.get("dataset"), hparams.get("dataset"))

    data_manager = BioNERDatasetManager(
        train_filename=train_filename,
        dev_filename=dev_filename,
        test_filename=test_filename,
        column_names=["NER"],
        train_only="ner",
    )

    config = transformers.BertConfig(output_hidden_states=True, vocab_size=28996)

    model = AutoModelWithLMHead.from_pretrained("monologg/biobert_v1.1_pubmed", config=config)
    tokenizer = AutoTokenizer.from_pretrained("monologg/biobert_v1.1_pubmed")

    # TODO: Specifying the max length

    biobert2seqencoder = Biobert2SeqEncoder(
        tokenizer=tokenizer,
        model=model,
        device=torch.device(hparams.get("device")),
    )

    model = RnnSeqCrfTagger(
        rnn2seqencoder=biobert2seqencoder,
        encoding_dim=768,
        device=torch.device(hparams.get("device")),
        datasets_manager=data_manager,
    )

    infer = SequenceLabellingInference(
        model=model,
        model_filepath=str(exp_dirpath.joinpath("checkpoints", "best_model.pt")),
        datasets_manager=data_manager,
    )

    return infer
Beispiel #28
0
  def __init__(self, p):

    super().__init__()

    self.bert_config = transformers.BertConfig(
      vocab_size=1,
      hidden_size=p.input_dim,
      num_hidden_layers=p.n_layers,
      num_attention_heads=p.n_heads,
      intermediate_size=p.ffn_dim,
      hidden_dropout_prob=p.dropout_p,
      attention_probs_dropout_prob=p.dropout_p,
      max_position_embeddings=p.seq_length,
      position_embedding_type='absolute'
    )

    self.bert = BertModel(self.bert_config)
    self.conv = nn.Conv2d(1, 1, kernel_size=(1,p.input_dim))
    self.fc = nn.Linear(in_features=p.input_dim, out_features=1)
    self.relu = nn.ReLU()
    self.last_layer = p.last_layer
Beispiel #29
0
    def __init__(self, hp: Optional[ModelParams] = ModelParams()):
        super().__init__()
        self.hp = hp

        config = transformers.BertConfig(
            hidden_size=self.hp.dim,
            num_hidden_layers=0,
            num_attention_heads=1,
            intermediate_size=0,
            max_position_embeddings=self.hp.max_seq_len,
            output_attentions=False,
            output_hidden_states=False,
            return_dict=True,
        )

        self.e = transformers.BertModel(config, add_pooling_layer=False)
        for name, param in self.e.named_parameters():
            if name == 'position_embeddings':
                requires_grad = False
            else:
                requires_grad = self.hp.requires_grad
            param.requires_grad = requires_grad
def test_constant_lrschedule():
    """
    Test that lr schedule "constant" results in unchanging LR
    """
    import warnings
    warnings.filterwarnings("ignore", category=torch.jit.TracerWarning)

    args = """
    --config unit_test
    --lr-schedule constant
    """.split()
    config = transformers.BertConfig(**(vars(parse_bert_args(args))))
    opts = get_options(config)

    # IPU Model and Optimizer
    model = PipelinedBertWithLoss(config).half().train()
    optimizer = get_optimizer(config, model)
    scheduler = get_lr_scheduler(optimizer, "constant")
    poptorch_model = poptorch.trainingModel(model, opts, optimizer=optimizer)

    def mock_data():
        return get_generated_datum(config)

    # Compile the model
    poptorch_model.compile(*mock_data())

    # Starting lr should be 1.0
    assert poptorch_model._dict_optimizer["groups"][0]["learningRate"][
        0] == config.learning_rate

    # Run for some steps
    for _ in range(5):
        outputs = poptorch_model(*mock_data())
        scheduler.step()
        poptorch_model.setOptimizer(optimizer)

    # LR should be unchanged
    assert poptorch_model._dict_new_optimizer["groups"][0]["learningRate"][
        0] == config.learning_rate