Example #1
0
        def create_and_check_xlnet_token_classif(
                self, config, input_ids_1, input_ids_2, input_ids_q, perm_mask,
                input_mask, target_mapping, segment_ids, lm_labels,
                sequence_labels, is_impossible_labels, token_labels):
            model = XLNetForTokenClassification(config)
            model.to(torch_device)
            model.eval()

            logits, mems_1 = model(input_ids_1)
            loss, logits, mems_1 = model(input_ids_1, labels=token_labels)

            result = {
                "loss": loss,
                "mems_1": mems_1,
                "logits": logits,
            }

            self.parent.assertListEqual(list(result["loss"].size()), [])
            self.parent.assertListEqual(list(result["logits"].size()), [
                self.batch_size, self.seq_length, self.type_sequence_label_size
            ])
            self.parent.assertListEqual(
                list(list(mem.size()) for mem in result["mems_1"]),
                [[self.seq_length, self.batch_size, self.hidden_size]] *
                self.num_hidden_layers)
    def create_and_check_xlnet_token_classif(
        self,
        config,
        input_ids_1,
        input_ids_2,
        input_ids_q,
        perm_mask,
        input_mask,
        target_mapping,
        segment_ids,
        lm_labels,
        sequence_labels,
        is_impossible_labels,
        token_labels,
    ):
        model = XLNetForTokenClassification(config)
        model.to(torch_device)
        model.eval()

        result = model(input_ids_1)
        result = model(input_ids_1, labels=token_labels)

        self.parent.assertEqual(result.loss.shape, ())
        self.parent.assertEqual(
            result.logits.shape,
            (self.batch_size, self.seq_length, self.type_sequence_label_size))
        self.parent.assertListEqual(
            [mem.shape for mem in result.mems],
            [(self.seq_length, self.batch_size, self.hidden_size)] *
            self.num_hidden_layers,
        )
Example #3
0
    for i, j in enumerate(sep_sentence[1:-2]):
        out[i + pad_num + 1] = max(char_label[current_idx:current_idx +
                                              len(j)])

        if j == "<unk>":
            current_idx = current_idx + 1
        else:
            current_idx = current_idx + len(j)

    return out.tolist()


config = AutoConfig.from_pretrained(model_path)
tokenizer = XLNetTokenizer.from_pretrained(model_path, unk_token=unk_token)
model = XLNetForTokenClassification.from_pretrained(model_path, num_labels=13)

if torch.cuda.is_available():
    device = torch.device("cuda")
    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('We will use the GPU:', torch.cuda.get_device_name(0))
else:
    print('No GPU available, using the CPU instead.')
# device = torch.device("cpu")

model.to(device)

train_input_ids = []
train_labels = []
train_masks = []
Example #4
0
        os.makedirs(output_path)

    args.output_file = os.path.join(output_path, suffix)
    if args.started_sentence_id==1 and os.path.exists(args.output_file):
        os.remove(args.output_file)
    print('The output file is ', args.output_file)

    args.input_file = os.path.join(args.input_file, f'''{args.dataset}/{args.keywords}keywords.txt''')

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("device:", device)

    if args.random==0:
        classifier_model_path = '../checkpoints/xlnet_classifier/{}'.format(args.dataset)
        args.classifier_model_path = classifier_model_path
        classifier_model = XLNetForTokenClassification.from_pretrained(classifier_model_path,num_labels=4)
        classifier_model_tokenizer = XLNetTokenizer.from_pretrained(classifier_model_path)

        logger.logger.info('Initialize backward XLNetForTokenClassification from checkpoint {}.'.format(classifier_model_path))
        classifier_model = classifier_model.to(device)
        classifier_model.eval()
    else:
        classifier_model = None
        classifier_model_tokenizer = None


    if args.model_name == 'LSTMLMGenerate':
        forward_lm_path = '../checkpoints/forward_lstm_lm/{}/best.pt'.format(args.dataset)
        backward_lm_path = '../checkpoints/backward_lstm_lm/{}/best.pt'.format(args.dataset)
        args.forward_lm_path = forward_lm_path
        args.backward_lm_path = backward_lm_path
Example #5
0
    args.model_path = model_path
    args.log_path = log_path

    if not os.path.exists(log_path):
        os.makedirs(log_path)
    log_file = '{}/{}.log'.format(log_path, args.dataset)
    print('The log file is ', log_file)
    logger = Logger(log_file)
    logger.logger.info(args)

    if not os.path.exists(model_path):
        os.makedirs(model_path)
    try:
        # load the pre-trained model and tokenizer
        tokenizer = XLNetTokenizer.from_pretrained(args.model_path)
        model = XLNetForTokenClassification.from_pretrained(
            args.model_path, num_labels=args.num_labels)
        logger.logger.info('Initialize XLNet from checkpoint {}.'.format(
            args.model_path))
    except:
        tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')
        model = XLNetForTokenClassification.from_pretrained(
            'xlnet-base-cased', num_labels=args.num_labels)
        logger.logger.info('Initialize XLNet with default parameters.')

    model = XLNetClassifier(model)
    """
    copy: 0
    replace: 1
    insert: 2
    delete: 3
    """