Beispiel #1
0
        def init_data(self, use_cuda):
            self.test_device = torch.device('cuda:0') if use_cuda else \
                torch.device('cpu:0')
            if not use_cuda:
                torch.set_num_threads(4)
                turbo_transformers.set_num_threads(4)

            self.cfg = DistilBertConfig(attention_probs_dropout_prob=0.0,
                                        hidden_dropout_prob=0.0)

            torch.set_grad_enabled(False)
            self.torch_model = DistilBertModel(self.cfg)
            self.torch_model.eval()
            if use_cuda:
                self.torch_model.to(self.test_device)

            self.turbo_transformer = turbo_transformers.DistilBertModel.from_torch(
                self.torch_model)
            # (batch_size, input_len, model_dim)
            self.inputs = torch.randint(low=0,
                                        high=self.cfg.vocab_size - 1,
                                        size=(batch_size, input_len),
                                        dtype=torch.long,
                                        device=self.test_device)
            self.attention_mask = torch.ones((batch_size, input_len),
                                             dtype=torch.long,
                                             device=self.test_device)
            self.head_mask = [None] * self.cfg.num_hidden_layers
Beispiel #2
0
    def __init__(self, config, weight=None):
        super(DistilBertForSequenceClassification, self).__init__(config)
        self.num_labels = config.num_labels
        self.weight = weight

        self.distilbert = DistilBertModel(config)
        self.pre_classifier = nn.Linear(config.dim, config.dim)
        self.classifier = nn.Linear(config.dim, config.num_labels)
        self.dropout = nn.Dropout(config.seq_classif_dropout)

        self.init_weights()
    def __init__(self, config, args, intent_label_lst, slot_label_lst):
        super(JointDistilBERT, self).__init__(config)
        self.args = args
        self.num_intent_labels = len(intent_label_lst)
        self.num_slot_labels = len(slot_label_lst)
        self.distilbert = DistilBertModel(
            config=config)  # Load pretrained bert

        self.intent_classifier = IntentClassifier(config.hidden_size,
                                                  self.num_intent_labels,
                                                  args.dropout_rate)
        self.slot_classifier = SlotClassifier(
            config.hidden_size, self.num_intent_labels, self.num_slot_labels,
            self.args.use_intent_context_concat,
            self.args.use_intent_context_attention, self.args.max_seq_len,
            self.args.intent_embedding_size,
            self.args.attention_embedding_size, self.args.attention_type,
            args.dropout_rate)

        if args.use_crf:
            self.crf = CRF(num_tags=self.num_slot_labels, batch_first=True)
Beispiel #4
0
    class TestDistillBertModel(unittest.TestCase):
        def init_data(self, use_cuda):
            self.test_device = torch.device('cuda:0') if use_cuda else \
                torch.device('cpu:0')
            if not use_cuda:
                torch.set_num_threads(4)
                turbo_transformers.set_num_threads(4)

            self.cfg = DistilBertConfig(attention_probs_dropout_prob=0.0,
                                        hidden_dropout_prob=0.0)

            torch.set_grad_enabled(False)
            self.torch_model = DistilBertModel(self.cfg)
            self.torch_model.eval()
            if use_cuda:
                self.torch_model.to(self.test_device)

            self.turbo_transformer = turbo_transformers.DistilBertModel.from_torch(
                self.torch_model)
            # (batch_size, input_len, model_dim)
            self.inputs = torch.randint(low=0,
                                        high=self.cfg.vocab_size - 1,
                                        size=(batch_size, input_len),
                                        dtype=torch.long,
                                        device=self.test_device)
            self.attention_mask = torch.ones((batch_size, input_len),
                                             dtype=torch.long,
                                             device=self.test_device)
            self.head_mask = [None] * self.cfg.num_hidden_layers

        def check_torch_and_turbo(self, use_cuda, num_iter=1):
            self.init_data(use_cuda)
            device = "GPU" if use_cuda else "CPU"

            torch_model = lambda: self.torch_model(self.inputs, self.
                                                   attention_mask)
            torch_res, torch_qps, torch_time_consume = \
                test_helper.run_model(torch_model, use_cuda, num_iter)

            print(
                f"DistillBertModel \"({batch_size}, {input_len:03})\" ",
                f"{device} Torch QPS, {torch_qps}, time, {torch_time_consume}")

            turbo_res = lambda: self.turbo_transformer(
                self.inputs, self.attention_mask, head_mask=self.head_mask)
            with turbo_transformers.pref_guard("gpref_test") as perf:
                turbo_res, turbo_qps, turbo_time_consume = \
                    test_helper.run_model(turbo_res, use_cuda, num_iter)

            print(
                f"DistillBertModel \"({batch_size}, {input_len:03})\" ",
                f"{device} Turbo QPS, {turbo_qps}, time, {turbo_time_consume}")

            self.assertTrue(
                torch.max(torch.abs(torch_res[0] - turbo_res[0])) < 1e-2
                if use_cuda else 1e-3)

            with open(fname, "a") as fh:
                fh.write(
                    f"\"({batch_size},{input_len:03})\", {torch_qps}, {turbo_qps}\n"
                )

        def test_distrill_bert_model(self):
            self.check_torch_and_turbo(use_cuda=False)
            if torch.cuda.is_available() and \
                    turbo_transformers.config.is_compiled_with_cuda():
                self.check_torch_and_turbo(use_cuda=True)