def test_shape_on_random_data(self):
        set_seed(42)

        bs = 3
        src_len = 5
        tgt_len = 7

        encoder_config = transformers.BertConfig(
            hidden_size=11,
            intermediate_size=44,
            vocab_size=17,
            num_hidden_layers=1,
            num_attention_heads=1,
        )
        encoder = transformers.BertModel(encoder_config)

        # decoder accepts vocabulary of schema vocab + pointer embeddings
        decoder_config = transformers.BertConfig(
            hidden_size=11,
            intermediate_size=44,
            vocab_size=23,
            is_decoder=True,
            num_hidden_layers=1,
            num_attention_heads=1,
        )
        decoder = transformers.BertModel(decoder_config)

        # logits are projected into schema vocab and combined with pointer scores
        max_pointer = src_len + 3
        model = EncoderDecoderWPointerModel(encoder=encoder,
                                            decoder=decoder,
                                            max_src_len=max_pointer)

        x_enc = torch.randint(0, encoder_config.vocab_size, size=(bs, src_len))
        x_dec = torch.randint(0, decoder_config.vocab_size, size=(bs, tgt_len))

        out = model(input_ids=x_enc, decoder_input_ids=x_dec)

        # different encoders return different number of outputs
        # e.g. BERT returns two, but DistillBERT only one
        self.assertGreaterEqual(len(out), 4)

        schema_vocab = decoder_config.vocab_size - max_pointer

        combined_logits = out[0]
        expected_shape = (bs, tgt_len, schema_vocab + src_len)
        self.assertEqual(combined_logits.shape, expected_shape)

        decoder_hidden = out[1]
        expected_shape = (bs, tgt_len, decoder_config.hidden_size)
        self.assertEqual(decoder_hidden.shape, expected_shape)

        combined_logits = out[2]
        expected_shape = (bs, decoder_config.hidden_size)
        self.assertEqual(combined_logits.shape, expected_shape)

        encoder_hidden = out[3]
        expected_shape = (bs, src_len, encoder_config.hidden_size)
        self.assertEqual(encoder_hidden.shape, expected_shape)
Beispiel #2
0
    def __init__(self, code_token_counter, query_token_counter):
        self.code_token_counter = code_token_counter
        get_counter_map(code_token_counter)
        self.code_config = transformers.BertConfig(
            vocab_size=len(code_token_counter),
            pad_token_id=get_counter_map(code_token_counter)["[PAD]"])
        self.code_model = transformers.BertModel(self.code_config)

        self.query_token_counter = query_token_counter
        self.query_config = transformers.BertConfig(
            vocab_size=len(query_token_counter),
            pad_token_id=get_counter_map(query_token_counter)["[PAD]"])
        self.query_model = transformers.BertModel(self.query_config)
    def test_loss_computation(self):
        torch.manual_seed(42)
        src_vocab_size = 17
        tgt_vocab_size = 23

        encoder_config = transformers.BertConfig(
            hidden_size=11,
            intermediate_size=44,
            vocab_size=src_vocab_size,
            num_hidden_layers=1,
            num_attention_heads=1,
        )
        encoder = transformers.BertModel(encoder_config)

        max_position = 7
        decoder_config = transformers.BertConfig(
            hidden_size=11,
            intermediate_size=44,
            vocab_size=tgt_vocab_size + max_position,
            is_decoder=True,
            num_hidden_layers=1,
            num_attention_heads=1,
        )
        decoder = transformers.BertModel(decoder_config)

        model = EncoderDecoderWPointerModel(encoder=encoder,
                                            decoder=decoder,
                                            max_src_len=7)

        # similar to real data
        src_seq = torch.LongTensor([[1, 6, 12, 15, 2, 0, 0],
                                    [1, 6, 12, 15, 5, 3, 2]])
        tgt_seq = torch.LongTensor([
            [8, 6, 4, 10, 11, 8, 5, 1, 12, 7, 7, 0, 0],
            [8, 6, 4, 10, 11, 8, 5, 1, 12, 13, 14, 7, 7],
        ])
        mask = torch.FloatTensor([[0, 1, 1, 1, 0, 0, 0], [0, 1, 1, 1, 1, 1,
                                                          0]])

        loss = model(
            input_ids=src_seq,
            decoder_input_ids=tgt_seq,
            pointer_mask=mask,
            labels=tgt_seq,
        )[0]

        self.assertEqual(loss.shape, torch.Size([]))
        self.assertEqual(loss.dtype, torch.float32)
        self.assertGreater(loss, 0)
Beispiel #4
0
def generate_onnx_model(model_name: str, filename: str, seq_len: int,
                        batch_size: int, backend: str):
    import transformers
    import torch
    import os

    test_device = torch.device('cuda:0') if backend == "GPU" else torch.device(
        'cpu:0')
    torch.set_grad_enabled(False)

    if model_name == "bert":
        cfg = transformers.BertConfig()
        model = transformers.BertModel(cfg)
    elif model_name == "albert":
        cfg = transformers.AlbertConfig()
        model = transformers.AlbertModel(cfg)
    elif model_name == "roberta":
        cfg = transformers.RobertaConfig()
        model = transformers.RobertaModel(cfg)
    else:
        raise (f"benchmark does not support {model_name}")

    model.eval()
    model.to(test_device)

    cfg = model.config  # type: transformers.BertConfig
    input_ids = torch.randint(low=0,
                              high=cfg.vocab_size - 1,
                              size=(batch_size, seq_len),
                              dtype=torch.long,
                              device=test_device)
    with open(filename, 'wb') as outf:
        torch.onnx.export(model=model, args=(input_ids, ), f=outf)
        outf.flush()
    return cfg.vocab_size
Beispiel #5
0
    def __init__(self, hp: Optional[ModelParams] = ModelParams(), max_seq_len=1024):
        super().__init__()
        self.hp = hp

        config = transformers.BertConfig(
            hidden_size=self.hp.dim,
            num_hidden_layers=0,
            num_attention_heads=1,
            intermediate_size=0,
            max_position_embeddings=max_seq_len,
            output_attentions=False,
            output_hidden_states=False,
            return_dict=True,
        )

        self.e = transformers.BertModel(config, add_pooling_layer=False)
        for name, param in self.e.named_parameters():
            # param names
            #   embeddings.word_embeddings.weight
            #   embeddings.position_embeddings.weight
            #   embeddings.token_type_embeddings.weight
            #   embeddings.LayerNorm.weight
            #   embeddings.LayerNorm.bias
            if 'position_embeddings' in name:
                requires_grad = self.hp.position_embedding_requires_grad
            else:
                requires_grad = self.hp.requires_grad
            param.requires_grad = requires_grad
Beispiel #6
0
def get_torch_model(
    model_name: str,
    input_shape: Tuple[int, ...],
    output_shape: Tuple[int, int],  # pylint: disable=unused-argument
    dtype: str = "float32",
) -> Tuple[IRModule, Dict[str, NDArray]]:
    """Load model from torch model zoo
    Parameters
    ----------
    model_name : str
        The name of the model to load
    input_shape: Tuple[int, ...]
        Tuple for input shape
    output_shape: Tuple[int, int]
        Tuple for output shape
    dtype: str
        Tensor data type
    """

    assert dtype == "float32"

    import torch  # type: ignore # pylint: disable=import-error,import-outside-toplevel
    from torchvision import models  # type: ignore # pylint: disable=import-error,import-outside-toplevel
    import transformers  # type: ignore # pylint: disable=import-error,import-outside-toplevel
    import os  # type: ignore # pylint: disable=import-error,import-outside-toplevel

    def do_trace(model, inp):
        model.eval()
        model_trace = torch.jit.trace(model, inp)
        model_trace.eval()
        return model_trace

    # Load model from torchvision
    if MODEL_TYPES[model_name] == MODEL_TYPE.TEXT_CLASSIFICATION:
        os.environ["TOKENIZERS_PARALLELISM"] = "false"
        model = transformers.BertModel(
            transformers.BertConfig(
                num_hidden_layers=12,
                hidden_size=768,
                intermediate_size=3072,
                num_attention_heads=12,
                return_dict=False,
            ))
        model.eval()
        input_data = torch.randint(10000, input_shape)
        shape_list = [("input_ids", input_shape)]
        scripted_model = torch.jit.trace(model, [input_data], strict=False)
    elif MODEL_TYPES[model_name] == MODEL_TYPE.IMAGE_CLASSIFICATION:
        model = getattr(models, model_name)()
        # Setup input
        input_data = torch.randn(input_shape).type(torch.float32)
        shape_list = [("input0", input_shape)]
        # Get trace. Depending on the model type, wrapper may be necessary.
        scripted_model = do_trace(model, input_data)
    else:
        raise ValueError("Unsupported model in Torch model zoo.")

    # Convert torch model to relay module
    mod, params = relay.frontend.from_pytorch(scripted_model, shape_list)
    return mod, params
Beispiel #7
0
 def __init__(self,
              vocab_size,
              hidden_size,
              dropout,
              n_layers=1,
              vocab_file='./data/vocab.txt'):
     super(UntrainedEncoderBERT, self).__init__()
     self.vocab_size = vocab_size
     self.hidden_size = hidden_size
     self.dropout = dropout
     self.dropout_layer = nn.Dropout(dropout)
     self.embedding = nn.Embedding(vocab_size,
                                   hidden_size,
                                   padding_idx=PAD_token)
     self.embedding.weight.data.normal_(0, 0.1)
     self.config = transformers.BertConfig(vocab_size=self.vocab_size,
                                           hidden_size=self.hidden_size,
                                           num_hidden_layers=n_layers,
                                           hidden_dropout_prob=dropout,
                                           attention_probs_dropout=dropout,
                                           num_attention_heads=16,
                                           output_hidden_states=True,
                                           max_position_embeddings=1024)
     self.tokenizer = transformers.BertTokenizer(vocab_file,
                                                 pad_token='PAD',
                                                 unk_token='UNK',
                                                 sep_token='EOS')
     self.BERT = transformers.BertModel(self.config)
     self.training = True
    def __init__(self, L=30, model_state=None):
        super(MbPA, self).__init__()

        if model_state is None:
            # Key network to find key representation of content
            self.key_encoder = transformers.BertModel.from_pretrained(
                'bert-base-uncased')
            # Bert model for text classification
            self.classifier = transformers.BertForSequenceClassification.from_pretrained(
                'bert-base-uncased', num_labels=33)

        else:

            cls_config = transformers.BertConfig.from_pretrained(
                'bert-base-uncased', num_labels=33)
            self.classifier = transformers.BertForSequenceClassification(
                cls_config)
            self.classifier.load_state_dict(model_state['classifier'])
            key_config = transformers.BertConfig.from_pretrained(
                'bert-base-uncased')
            self.key_encoder = transformers.BertModel(key_config)
            self.key_encoder.load_state_dict(model_state['key_encoder'])
            # load base model weights
            # we need to detach since parameters() method returns reference to the original parameters
            self.base_weights = self.classifier.parameters().clone().detach(
            ).to("cuda" if torch.cuda.is_available() else "cpu")
        # local adaptation learning rate - 1e-3 or 5e-3
        self.loc_adapt_lr = 1e-3
        # Number of local adaptation steps
        self.L = L
Beispiel #9
0
def benchmark_torch(model_name: str, seq_len: int, batch_size: int, n: int,
                    num_threads: int):
    import torch
    import transformers
    import contexttimer
    import benchmark_helper
    torch.set_num_threads(num_threads)
    torch.set_grad_enabled(False)

    if model_name == "bert":
        cfg = transformers.BertConfig()
        model = transformers.BertModel(cfg)
    elif model_name == "albert":
        cfg = transformers.AlbertConfig()
        model = transformers.AlbertModel(cfg)
    elif model_name == "roberta":
        cfg = transformers.RobertaConfig()
        model = transformers.RobertaModel(cfg)
    else:
        raise (f"benchmark does not support {model_name}")
    model.eval()
    cfg = model.config  # type: transformers.BertConfig
    input_ids = torch.randint(low=0,
                              high=cfg.vocab_size - 1,
                              size=(batch_size, seq_len),
                              dtype=torch.long)
    benchmark_helper.run_model(lambda: model(input_ids), False, n, batch_size,
                               seq_len, "torch", num_threads)
def generate_onnx_model(model_name: str,
                        use_gpu: bool,
                        filename: str,
                        seq_len: int,
                        batch_size: int,
                        backend: str,
                        use_dynamic_axes: bool = False):
    import transformers
    import torch
    import os

    test_device = torch.device(
        'cuda:0') if backend == "GPU" and use_gpu else torch.device('cpu:0')
    torch.set_grad_enabled(False)

    if model_name == "bert":
        # use a real model to check the correctness
        if checkonnxrest:
            model = transformers.BertModel.from_pretrained("bert-base-uncased")
        else:
            cfg = transformers.BertConfig()
            model = transformers.BertModel(cfg)
    elif model_name == "albert":
        cfg = transformers.AlbertConfig()
        model = transformers.AlbertModel(cfg)
    elif model_name == "roberta":
        cfg = transformers.RobertaConfig()
        model = transformers.RobertaModel(cfg)
    else:
        raise (f"benchmark does not support {model_name}")

    model.eval()
    model.to(test_device)

    cfg = model.config  # type: transformers.BertConfig
    input_ids = torch.randint(low=0,
                              high=cfg.vocab_size - 1,
                              size=(batch_size, seq_len),
                              dtype=torch.long,
                              device=test_device)

    with open(filename, 'wb') as outf:
        if not use_dynamic_axes:
            torch.onnx.export(model=model, args=(input_ids, ), f=outf)
        else:
            torch.onnx.export(model=model,
                              args=(input_ids, ),
                              f=outf,
                              input_names=['input'],
                              output_names=['output'],
                              dynamic_axes={
                                  'input': [0, 1],
                                  'output': [0, 1]
                              })
        # If not intended to make onnxruntime support variable batch size and sequence length,
        # you can unset the parameter `dynamic_axes`.
        # For some model, you have to try `opset_version=12`
        outf.flush()
    return cfg.vocab_size, cfg
def benchmark_turbo_transformers(model_name: str, seq_len: int,
                                 batch_size: int, n: int, enable_random: bool,
                                 max_seq_len: int, min_seq_len: int,
                                 num_threads: int, use_gpu: bool,
                                 enable_mem_opt: bool):
    import torch
    import transformers
    import turbo_transformers
    import benchmark_helper
    test_device = torch.device('cuda:0') if use_gpu else torch.device('cpu:0')
    cfg = None
    torch.set_grad_enabled(False)
    if model_name == "bert":
        cfg = transformers.BertConfig()
        model = transformers.BertModel(cfg)
        model.to(test_device)
        model.eval()
        model = turbo_transformers.BertModel.from_torch(model, backend="turbo")
    elif model_name == "albert":
        cfg = transformers.AlbertConfig(hidden_size=768,
                                        num_attention_heads=12,
                                        intermediate_size=3072)
        model = transformers.AlbertModel(cfg)
        model.to(test_device)
        model.eval()
        model = turbo_transformers.AlbertModel.from_torch(model)
    elif model_name == "roberta":
        cfg = transformers.RobertaConfig()
        model = transformers.RobertaModel(cfg)
        model.to(test_device)
        model.eval()
        model = turbo_transformers.RobertaModel.from_torch(model)
    elif model_name == "distilbert":
        cfg = transformers.DistilBertConfig()
        model = transformers.DistilBertModel(cfg)
        model.to(test_device)
        model.eval()
        model = turbo_transformers.DistilBertModel.from_torch(model)
    else:
        raise (f"benchmark does not support {model_name}")

    turbo_transformers.set_num_threads(num_threads)
    if enable_random:
        if enable_mem_opt:
            turbo_transformers.reset_allocator_schema("model-aware")
        benchmark_helper.run_variable_model(model, use_gpu, n, max_seq_len,
                                            min_seq_len, "turbo", num_threads,
                                            cfg, enable_mem_opt, model_name)
        if enable_mem_opt:
            turbo_transformers.reset_allocator_schema("naive")
    else:
        input_ids = torch.randint(low=0,
                                  high=cfg.vocab_size - 1,
                                  size=(batch_size, seq_len),
                                  dtype=torch.long,
                                  device=test_device)
        benchmark_helper.run_model(lambda: model(input_ids), use_gpu, n,
                                   batch_size, seq_len, "turbo", num_threads,
                                   enable_mem_opt, model_name)
    def test_shape_on_real_data_batched(self):
        set_seed(42)
        src_vocab_size = 17
        tgt_vocab_size = 23
        max_position = 7

        encoder_config = transformers.BertConfig(
            hidden_size=11,
            intermediate_size=44,
            vocab_size=src_vocab_size,
            num_hidden_layers=1,
            num_attention_heads=1,
        )
        encoder = transformers.BertModel(encoder_config)

        decoder_config = transformers.BertConfig(
            hidden_size=11,
            intermediate_size=44,
            vocab_size=tgt_vocab_size + max_position,
            is_decoder=True,
            num_hidden_layers=1,
            num_attention_heads=1,
        )
        decoder = transformers.BertModel(decoder_config)

        model = EncoderDecoderWPointerModel(encoder=encoder,
                                            decoder=decoder,
                                            max_src_len=max_position)

        # similar to real data
        src_seq = torch.LongTensor([[1, 6, 12, 15, 2, 0, 0],
                                    [1, 6, 12, 15, 5, 3, 2]])
        tgt_seq = torch.LongTensor([
            [8, 6, 4, 10, 11, 8, 5, 1, 12, 7, 7, 0, 0],
            [8, 6, 4, 10, 11, 8, 5, 1, 12, 13, 14, 7, 7],
        ])
        mask = torch.FloatTensor([[0, 1, 1, 1, 0, 0, 0], [0, 1, 1, 1, 1, 1,
                                                          0]])

        combined_logits = model(input_ids=src_seq,
                                decoder_input_ids=tgt_seq,
                                pointer_mask=mask)[0]

        expected_shape = (2, tgt_seq.shape[1],
                          tgt_vocab_size + src_seq.shape[1])
        self.assertEqual(combined_logits.shape, expected_shape)
Beispiel #13
0
    def __init__(self, config):
        super(BertForSequenceRegression, self).__init__(config)
        self.num_labels = config.num_labels

        self.bert = ptt.BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, 1)

        self.apply(self.init_weights)
    def test_shape_on_real_data(self):
        set_seed(42)
        src_vocab_size = 17
        tgt_vocab_size = 23
        max_position = 5

        encoder_config = transformers.BertConfig(
            hidden_size=11,
            intermediate_size=44,
            vocab_size=src_vocab_size,
            num_hidden_layers=1,
            num_attention_heads=1,
        )
        encoder = transformers.BertModel(encoder_config)

        decoder_config = transformers.BertConfig(
            hidden_size=11,
            intermediate_size=44,
            vocab_size=tgt_vocab_size + max_position,
            is_decoder=True,
            num_hidden_layers=1,
            num_attention_heads=1,
        )
        decoder = transformers.BertModel(decoder_config)

        model = EncoderDecoderWPointerModel(encoder=encoder,
                                            decoder=decoder,
                                            max_src_len=max_position)

        # similar to real data
        # e.g. '[CLS] Directions to Lowell [SEP]'
        src_seq = torch.LongTensor([[1, 6, 12, 15, 2]])
        # e.g. '[IN:GET_DIRECTIONS Directions to [SL:DESTINATION Lowell]]'
        tgt_seq = torch.LongTensor([[8, 6, 4, 10, 11, 8, 5, 1, 12, 7, 7]])
        mask = torch.FloatTensor([[0, 1, 1, 1, 0]])

        combined_logits = model(input_ids=src_seq,
                                decoder_input_ids=tgt_seq,
                                pointer_mask=mask)[0]

        expected_shape = (1, tgt_seq.shape[1],
                          tgt_vocab_size + src_seq.shape[1])
        self.assertEqual(combined_logits.shape, expected_shape)
def test_smart_batch(use_cuda: bool):
    test_device = torch.device('cuda:0') if use_cuda else \
        torch.device('cpu:0')
    cfg = transformers.BertConfig(attention_probs_dropout_prob=0.0,
                                  hidden_dropout_prob=0.0)
    torch_model = transformers.BertModel(cfg)

    # model_id = "bert-base-uncased"
    # torch_model = transformers.BertModel.from_pretrained(model_id)
    torch_model.eval()
    torch_model.to(test_device)
    torch.set_grad_enabled(False)

    cfg = torch_model.config
    # use 4 threads for computing
    if not use_cuda:
        turbo_transformers.set_num_threads(4)

    # Initialize a turbo BertModel with smart batching from torch model.
    turbo_model = turbo_transformers.BertModelSmartBatch.from_torch(
        torch_model)

    # a batch of queries with different lengths.
    query_seq_len_list = [18, 2, 3, 51]
    input_list = []

    # generate random inputs. Of course you can use real data.
    for query_seq_len in query_seq_len_list:
        input_seq = torch.randint(low=0,
                                  high=cfg.vocab_size - 1,
                                  size=(1, query_seq_len),
                                  dtype=torch.long,
                                  device=test_device)
        input_list.append(input_seq)

    # start inference
    s_res = serial_bert_inference(torch_model, input_list)
    b_res = batch_bert_inference(turbo_model, input_list, query_seq_len_list)
    print(torch.max(torch.abs(b_res - s_res)))
    assert (torch.max(torch.abs(b_res - s_res)) < 1e-2)

    start_time = time.time()
    for i in range(10):
        serial_bert_inference(torch_model, input_list)
    end_time = time.time()
    print("\ntorch time consum: {}".format(end_time - start_time))

    start_time = time.time()
    for i in range(10):
        batch_bert_inference(turbo_model, input_list, query_seq_len_list)
    end_time = time.time()
    print("\nturbo time consum: {}".format(end_time - start_time))
def benchmark_turbo_transformers(model_name: str, seq_len: int,
                                 batch_size: int, n: int, enable_random: bool,
                                 max_seq_len: int, min_seq_len: int,
                                 num_threads: int, use_gpu: bool):
    import torch
    import transformers
    import contexttimer
    import turbo_transformers
    import benchmark_helper
    test_device = torch.device('cuda:0') if use_gpu else torch.device('cpu:0')
    if use_gpu:
        print("using GPU")
    else:
        print("using CPU")
    cfg = None
    torch.set_grad_enabled(False)
    if model_name == "bert":
        cfg = transformers.BertConfig()
        model = transformers.BertModel(cfg)
        model.to(test_device)
        model.eval()
        model = turbo_transformers.BertModel.from_torch(model)
    elif model_name == "albert":
        cfg = transformers.AlbertConfig()
        model = transformers.AlbertModel(cfg)
        model.to(test_device)
        model.eval()
        model = turbo_transformers.AlbertModel.from_torch(model)
    elif model_name == "roberta":
        cfg = transformers.RobertaConfig()
        model = transformers.RobertaModel(cfg)
        model.to(test_device)
        model.eval()
        model = turbo_transformers.RobertaModel.from_torch(model)
    else:
        raise (f"benchmark does not support {model_name}")

    turbo_transformers.set_num_threads(num_threads)
    if enable_random:
        benchmark_helper.run_variable_model(model, use_gpu, n, max_seq_len,
                                            min_seq_len, "turbo", num_threads,
                                            cfg)
    else:
        input_ids = torch.randint(low=0,
                                  high=cfg.vocab_size - 1,
                                  size=(batch_size, seq_len),
                                  dtype=torch.long,
                                  device=test_device)

        benchmark_helper.run_model(lambda: model(input_ids), use_gpu, n,
                                   batch_size, seq_len, "turbo", num_threads)
    def __init__(self,
                 emb=False,
                 pretrained='bert-large-uncased',
                 finetune=True):
        super().__init__()
        if emb:
            self.model = pt.BertModel(
                pt.BertConfig.from_pretrained(pretrained))
        else:
            self.model = pt.BertModel.from_pretrained(pretrained)
        self.tokenizer = pt.BertTokenizer.from_pretrained(pretrained)

        self.hidden_dim = self.model.encoder.layer[
            -1].output.dense.out_features
Beispiel #18
0
    def __init__(self, config):
        super(BertForSpanComparisonClassification, self).__init__(config)
        self.num_labels = config.num_labels
        self.num_spans = config.num_spans

        self.bert = ptt.BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)

        self.span_attention_extractor = SelfAttentiveSpanExtractor(
            config.hidden_size)
        self.classifier = nn.Linear(config.hidden_size * self.num_spans,
                                    self.num_labels)

        self.apply(self.init_weights)
Beispiel #19
0
 def __init__(self, config, initialize_wBERT=False):
     super().__init__(config)
     assert config.projection or config.indexing_dimension == 768, \
         'If no projection then indexing dimension must be equal to 768'
     self.config = config
     if initialize_wBERT:
         self.model = transformers.BertModel.from_pretrained(
             'bert-base-uncased')
     else:
         self.model = transformers.BertModel(config)
     if self.config.projection:
         self.proj = nn.Linear(self.model.config.hidden_size,
                               self.config.indexing_dimension)
         self.norm = nn.LayerNorm(self.config.indexing_dimension)
     self.loss_fct = torch.nn.KLDivLoss()
Beispiel #20
0
 def __init__(self, config):
     super().__init__(config)
     self.bert = transformers.BertModel(config)
     self.context_projection = torch.nn.Linear(
         in_features=config.hidden_size,
         out_features=config.entity_embedding_dim,
     )
     self.dropout = torch.nn.Dropout(config.hidden_dropout_prob)
     self.scaling_constant = torch.nn.Parameter(torch.tensor(1.0))
     self.entity_embeddings = torch.nn.Embedding(
         num_embeddings=config.entity_vocab_size,
         embedding_dim=config.entity_embedding_dim,
     )
     torch.nn.init.normal_(self.entity_embeddings.weight,
                           std=0.02)  # Std. from Nick
     self.use_batch_negatives = config.use_batch_negatives
     self.random_negatives = config.random_negatives
Beispiel #21
0
def benchmark_torch_jit(model_name: str, seq_len: int, batch_size: int, n: int,
                        enable_random: bool, max_seq_len: int,
                        min_seq_len: int, num_threads: int, use_gpu: bool,
                        enable_mem_opt: bool):
    import transformers
    import contexttimer
    import torch.jit
    torch.set_num_threads(num_threads)
    torch.set_grad_enabled(False)
    if model_name == "bert":
        cfg = transformers.BertConfig()
        model = transformers.BertModel(cfg)
    elif model_name == "albert":
        cfg = transformers.AlbertConfig()
        model = transformers.AlbertModel(cfg)
    elif model_name == "roberta":
        cfg = transformers.RobertaConfig()
        model = transformers.RobertaModel(cfg)
    else:
        raise (f"benchmark does not support {model_name}")
    model.eval()
    input_ids = torch.randint(low=0,
                              high=cfg.vocab_size - 1,
                              size=(batch_size, seq_len),
                              dtype=torch.long)

    model = torch.jit.trace(model, (input_ids, ))

    with torch.jit.optimized_execution(True):
        model(input_ids)
        with contexttimer.Timer() as t:
            for _ in range(n):
                model(input_ids)

    print(
        json.dumps({
            "QPS": n / t.elapsed,
            "elapsed": t.elapsed,
            "n": n,
            "batch_size": batch_size,
            "seq_len": seq_len,
            "framework": "torch_jit",
            "n_threads": num_threads,
            "model_name": model_name
        }))
def benchmark_torch(model_name: str, seq_len: int, batch_size: int, n: int,
                    enable_random: bool, max_seq_len: int, min_seq_len: int,
                    num_threads: int, use_gpu: bool, enable_mem_opt: bool):
    import torch
    import transformers
    import benchmark_helper

    test_device = torch.device('cuda:0') if use_gpu else torch.device('cpu:0')
    torch.set_grad_enabled(False)
    torch.set_num_threads(num_threads)

    cfg = None
    if model_name == "bert":
        cfg = transformers.BertConfig()
        model = transformers.BertModel(cfg)
    elif model_name == "albert":
        cfg = transformers.AlbertConfig()
        model = transformers.AlbertModel(cfg)
    elif model_name == "roberta":
        cfg = transformers.RobertaConfig()
        model = transformers.RobertaModel(cfg)
    elif model_name == "distilbert":
        cfg = transformers.DistilBertConfig()
        model = transformers.DistilBertModel(cfg)
    else:
        raise (f"benchmark does not support {model_name}")
    model.eval()
    model.to(test_device)

    # cfg = model.config  # type: transformers.BertConfig
    if enable_random:
        benchmark_helper.run_variable_model(model, use_gpu, n, max_seq_len,
                                            min_seq_len, "torch", num_threads,
                                            cfg, enable_mem_opt, model_name)
    else:
        input_ids = torch.randint(low=0,
                                  high=cfg.vocab_size - 1,
                                  size=(batch_size, seq_len),
                                  dtype=torch.long,
                                  device=test_device)
        benchmark_helper.run_model(lambda: model(input_ids), use_gpu, n,
                                   batch_size, seq_len, "torch", num_threads,
                                   enable_mem_opt, model_name)
def benchmark_turbo_transformers(model_name: str, seq_len: int,
                                 batch_size: int, n: int):
    import torch
    import transformers
    import contexttimer
    import turbo_transformers
    import benchmark_helper

    if not torch.cuda.is_available():
        print("cuda is not available for torch")
        return
    test_device = torch.device('cuda:0')

    if model_name == "bert":
        cfg = transformers.BertConfig()
        model = transformers.BertModel(cfg)
        model.to(test_device)
        model.eval()
        model = turbo_transformers.BertModel.from_torch(model)
    elif model_name == "albert":
        cfg = transformers.AlbertConfig()
        model = transformers.AlbertModel(cfg)
        model.to(test_device)
        model.eval()
        model = turbo_transformers.AlbertModel.from_torch(model)
    elif model_name == "roberta":
        cfg = transformers.RobertaConfig()
        model = transformers.RobertaModel(cfg)
        model.to(test_device)
        model.eval()
        model = turbo_transformers.RobertaModel.from_torch(model)
    else:
        raise (f"benchmark does not support {model_name}")

    cfg = model.config  # type: transformers.BertConfig
    input_ids = torch.randint(low=0,
                              high=cfg.vocab_size - 1,
                              size=(batch_size, seq_len),
                              dtype=torch.long,
                              device=test_device)

    benchmark_helper.run_model(lambda: model(input_ids), True, n, batch_size,
                               seq_len, "turbo")
Beispiel #24
0
    def __init__(self, hp: Optional[ModelParams] = ModelParams()):
        super().__init__()
        self.hp = hp

        config = transformers.BertConfig(
            hidden_size=self.hp.dim,
            num_hidden_layers=0,
            num_attention_heads=1,
            intermediate_size=0,
            max_position_embeddings=self.hp.max_seq_len,
            output_attentions=False,
            output_hidden_states=False,
            return_dict=True,
        )

        self.e = transformers.BertModel(config, add_pooling_layer=False)
        for name, param in self.e.named_parameters():
            if name == 'position_embeddings':
                requires_grad = False
            else:
                requires_grad = self.hp.requires_grad
            param.requires_grad = requires_grad
Beispiel #25
0
    def __init__(self, device, cfg):
        super().__init__()

        if cfg.tokens_pretrained:
            self.tokenizer = transformers.BertTokenizer.from_pretrained(
                'bert-base-uncased')
        else:
            self.tokenizer = transformers.BertTokenizer(
                cfg.vocab_path, cfg.merge_path)

        if cfg.embeddings_pretrained:
            self.model = transformers.BertModel.from_pretrained(
                'bert-base-uncased')
        else:
            self.model = transformers.BertModel('bert-base-uncased')
        self.model = self.model.to(device)

        self.pad_token = 'pad_token'
        self.device = device

        self.max_len = cfg.max_seq_len
        self.trainable = cfg.embeddings_trainable
Beispiel #26
0
def _get_network(
    args: Tuple[str, List[int]]
) -> Tuple[IRModule, bytearray, Tuple[str, List[int], str]]:
    name: str
    input_shape: List[int]
    name, input_shape = args

    mod: IRModule

    if name in [
            "resnet_18",
            "resnet_50",
            "wide_resnet_50",
            "resnext_50",
            "mobilenet_v2",
            "mobilenet_v3",
            "inception_v3",
            "densenet_121",
            "resnet3d_18",
            "vgg_16",
    ]:
        import torch  # type: ignore
        from torchvision import models  # type: ignore

        if name in ["resnet_18", "resnet_50"]:
            model = getattr(models, name.replace("_", ""))(pretrained=False)
        elif name == "wide_resnet_50":
            model = getattr(models, "wide_resnet50_2")(pretrained=False)
        elif name == "resnext_50":
            model = getattr(models, "resnext50_32x4d")(pretrained=False)
        elif name == "mobilenet_v2":
            model = getattr(models, name)(pretrained=False)
        elif name == "mobilenet_v3":
            model = getattr(models, name + "_large")(pretrained=False)
        elif name == "inception_v3":
            model = getattr(models, name)(pretrained=False, aux_logits=False)
        elif name == "densenet_121":
            model = getattr(models, name.replace("_", ""))(pretrained=False)
        elif name == "resnet3d_18":
            model = models.video.r3d_18(pretrained=False)
        elif name == "vgg_16":
            model = getattr(models, name.replace("_", ""))(pretrained=False)

        dtype = "float32"
        input_data = torch.randn(input_shape).type(  # pylint: disable=no-member
            {
                "float32": torch.float32,  # pylint: disable=no-member
            }[dtype])
        scripted_model = torch.jit.trace(model, input_data).eval()
        input_name = "input0"
        shape_list = [(input_name, input_shape)]
        mod, params = relay.frontend.from_pytorch(scripted_model, shape_list)
        with tvm.transform.PassContext(opt_level=3):
            mod = tvm.transform.Sequential([
                relay.transform.RemoveUnusedFunctions(),
                relay.transform.ConvertLayout({
                    "nn.conv2d": ["NHWC", "default"],
                    "nn.conv3d": ["NDHWC", "default"],
                    "nn.max_pool2d": ["NHWC", "default"],
                    "nn.avg_pool2d": ["NHWC", "default"],
                }),
            ])(mod)
        inputs = (input_name, input_shape, dtype)
    elif name in ["bert_tiny", "bert_base", "bert_medium", "bert_large"]:
        os.environ["TOKENIZERS_PARALLELISM"] = "false"
        # pip3 install transformers==3.5 torch==1.7
        import torch  # type: ignore
        import transformers  # type: ignore

        config_dict = {
            "bert_tiny":
            transformers.BertConfig(
                num_hidden_layers=6,
                hidden_size=512,
                intermediate_size=2048,
                num_attention_heads=8,
                return_dict=False,
            ),
            "bert_base":
            transformers.BertConfig(
                num_hidden_layers=12,
                hidden_size=768,
                intermediate_size=3072,
                num_attention_heads=12,
                return_dict=False,
            ),
            "bert_medium":
            transformers.BertConfig(
                num_hidden_layers=12,
                hidden_size=1024,
                intermediate_size=4096,
                num_attention_heads=16,
                return_dict=False,
            ),
            "bert_large":
            transformers.BertConfig(
                num_hidden_layers=24,
                hidden_size=1024,
                intermediate_size=4096,
                num_attention_heads=16,
                return_dict=False,
            ),
        }
        configuration = config_dict[name]
        model = transformers.BertModel(configuration)
        input_name = "input_ids"
        input_dtype = "int64"
        a = torch.randint(10000, input_shape)  # pylint: disable=no-member
        model.eval()
        scripted_model = torch.jit.trace(model, [a], strict=False)
        input_name = "input_ids"
        shape_list = [(input_name, input_shape)]
        mod, params = relay.frontend.from_pytorch(scripted_model, shape_list)
        mod = relay.transform.FastMath()(mod)
        mod = relay.transform.CombineParallelBatchMatmul()(mod)
        inputs = (input_name, input_shape, input_dtype)
    elif name == "dcgan":
        output_shape = input_shape
        batch_size = output_shape[0]
        oshape = output_shape[1:]
        mod, params = relay.testing.dcgan.get_workload(
            batch_size=batch_size,
            oshape=oshape,
            layout="NHWC",
        )
        inputs = ("data", [100], "float32")
    else:
        raise ValueError("Invalid name: " + name)

    params_bytearray: bytearray = save_param_dict(params)
    return mod, params_bytearray, inputs
 def __init__(self, config: transformers.BertConfig):
     super(WrappedBERT, self).__init__(config)
     self.bert = transformers.BertModel(config)
Beispiel #28
0
 def __init__(self, config):
     super().__init__()
     self.roberta = transformers.BertModel(config)
     self.fc = torch.nn.Linear(config.hidden_size, 1)
Beispiel #29
0
 def __init__(self, config: transformers.BertConfig):
     super(BertSeq2VecEncoderForPairs, self).__init__(config)
     self.bert = transformers.BertModel(config)
     self.dropout = torch.nn.Dropout(0.1)
import torch
import transformers
import turbo_transformers
from turbo_transformers.layers.utils import convert2tt_tensor, try_convert, convert_returns_as_type, ReturnType
import time


cfg = transformers.BertConfig()
model = transformers.BertModel(cfg)
model.eval()
torch.set_grad_enabled(False)

intermediate = torch.quantization.quantize_dynamic(model.encoder.layer[0].intermediate)
qintermediate = turbo_transformers.QBertIntermediate.from_torch(model.encoder.layer[0].intermediate)


lens = [10,20,40,60,80,100,200,300]
loops = 1

for l in lens:
    input = torch.rand(1, l, 768)
    print("seq length =", l)

    start = time.time()
    for i in range(loops):
        res = intermediate(input)
    end = time.time()
    print("torch int8 layer QPS =", loops/(end-start))

    start = time.time()
    for i in range(loops):