Esempio n. 1
0
    def create_and_check_flaubert_sequence_classif(
        self,
        config,
        input_ids,
        token_type_ids,
        input_lengths,
        sequence_labels,
        token_labels,
        is_impossible_labels,
        input_mask,
    ):
        model = FlaubertForSequenceClassification(config)
        model.to(torch_device)
        model.eval()

        (logits, ) = model(input_ids)
        loss, logits = model(input_ids, labels=sequence_labels)

        result = {
            "loss": loss,
            "logits": logits,
        }

        self.parent.assertListEqual(list(result["loss"].size()), [])
        self.parent.assertListEqual(
            list(result["logits"].size()),
            [self.batch_size, self.type_sequence_label_size])
Esempio n. 2
0
    def create_and_check_flaubert_sequence_classif(
        self,
        config,
        input_ids,
        token_type_ids,
        input_lengths,
        sequence_labels,
        token_labels,
        is_impossible_labels,
        choice_labels,
        input_mask,
    ):
        model = FlaubertForSequenceClassification(config)
        model.to(torch_device)
        model.eval()

        result = model(input_ids)
        result = model(input_ids, labels=sequence_labels)

        self.parent.assertEqual(result.loss.shape, ())
        self.parent.assertEqual(result.logits.shape, (self.batch_size, self.type_sequence_label_size))
Esempio n. 3
0
    def load_class(self):
        # Load the tokenizer.
        if self.verbose == True:
            print('Loading {} class...'.format(self.model_name))
        if self.model_name == 'bert':
            # Load BertForSequenceClassification, the pretrained BERT model with a single
            # linear classification layer on top.
            self.model = BertForSequenceClassification.from_pretrained(
                self.
                model_type,  # Use the 12-layer BERT model, with an uncased vocab.
                # You can increase this for multi-class tasks.
                num_labels=self.num_labels,
                output_attentions=
                False,  # Whether the model returns attentions weights.
                output_hidden_states=
                False,  # Whether the model returns all hidden-states.
            )
        if self.model_name == 'distilbert':
            self.model = DistilBertForSequenceClassification.from_pretrained(
                self.model_type,
                num_labels=self.num_labels,
                output_attentions=False,
                output_hidden_states=False,
            )
        if self.model_name == 'albert':
            self.model = AlbertForSequenceClassification.from_pretrained(
                self.model_type,
                num_labels=self.num_labels,
                output_attentions=False,
                output_hidden_states=False,
            )
        if self.model_name == 'bart':
            if self.task == 'classification':
                self.model = BartForSequenceClassification.from_pretrained(
                    self.model_type,
                    num_labels=self.num_labels,
                    output_attentions=False,
                    output_hidden_states=False,
                )
            if self.task == 'summarize':
                self.model = BartForConditionalGeneration.from_pretrained(
                    self.model_type)

        if self.model_name == 'xlnet':
            self.model = XLNetForSequenceClassification.from_pretrained(
                self.model_type,
                num_labels=self.num_labels,
                output_attentions=False,
                output_hidden_states=False,
            )
        if self.model_name == 'roberta':
            self.model = RobertaForSequenceClassification.from_pretrained(
                self.model_type,
                num_labels=self.num_labels,
                output_attentions=False,
                output_hidden_states=False,
            )
        if self.model_name == 'camenbert':
            self.model = CamembertForSequenceClassification.from_pretrained(
                self.model_type,
                num_labels=self.num_labels,
                output_attentions=False,
                output_hidden_states=False,
            )
        if self.model_name == 'flaubert':
            self.model = FlaubertForSequenceClassification.from_pretrained(
                self.model_type,
                num_labels=self.num_labels,
                output_attentions=False,
                output_hidden_states=False,
            )
        if self.model_name == 'gpt2':
            self.model = GPT2LMHeadModel.from_pretrained(self.model_type)
Esempio n. 4
0
                                          shuffle=False)

test_input_ids, test_attention_masks, test_labels = datatensor(
    listePhraseTEST, listeTagTEST)

test_dataset = TensorDataset(test_input_ids, test_attention_masks, test_labels)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=config.batch_size,
                                          shuffle=False)

print("data finit")

model = FlaubertForSequenceClassification.from_pretrained(
    model_id,  # utiliser la modele de flaubert
    num_labels=
    2  # fot binairy classification,two labels for output 二分类任务的输出标签为 2个.
)

optimizer = AdamW(model.parameters(), lr=5e-5)
#   lr = 1e-5, # args.learning_rate, default 5e-5, - 默认是 5e-5
#   eps = 1e-8 # args.adam_epsilon,default 1e-8,  - 默认是 1e-8, 是为了防止衰减率分母除到0
# )

# number of training steps 的数量: [number of batches] x [number of epochs].
total_steps = len(train_loader) * config.epochs

# 设计 learning rate scheduler.
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=0,  # Default value in run_glue.py