コード例 #1
0
    def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
        inputs_dict = copy.deepcopy(inputs_dict)
        if model_class in MODEL_FOR_MULTIPLE_CHOICE_MAPPING.values():
            inputs_dict = {
                k: v.unsqueeze(1).expand(-1, self.model_tester.num_choices, -1).contiguous()
                if isinstance(v, torch.Tensor) and v.ndim > 1
                else v
                for k, v in inputs_dict.items()
            }

        if return_labels:
            if model_class in MODEL_FOR_MULTIPLE_CHOICE_MAPPING.values():
                inputs_dict["labels"] = torch.ones(self.model_tester.batch_size, dtype=torch.long, device=torch_device)
            elif model_class in MODEL_FOR_QUESTION_ANSWERING_MAPPING.values():
                inputs_dict["start_positions"] = torch.zeros(
                    self.model_tester.batch_size, dtype=torch.long, device=torch_device
                )
                inputs_dict["end_positions"] = torch.zeros(
                    self.model_tester.batch_size, dtype=torch.long, device=torch_device
                )
            elif model_class in MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING.values():
                inputs_dict["labels"] = torch.zeros(
                    self.model_tester.batch_size, dtype=torch.long, device=torch_device
                )
            elif model_class in [
                *MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.values(),
                *MODEL_FOR_CAUSAL_LM_MAPPING.values(),
                *MODEL_FOR_MASKED_LM_MAPPING.values(),
                *MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.values(),
            ]:
                inputs_dict["labels"] = torch.zeros(
                    (self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device
                )
        return inputs_dict
コード例 #2
0
ファイル: run_ner.py プロジェクト: Stanwang1210/HW4_1_1
    AutoModelForTokenClassification,
    AutoTokenizer,
    get_linear_schedule_with_warmup,
)
from utils_ner import convert_examples_to_features, get_labels, read_examples_from_file


try:
    from torch.utils.tensorboard import SummaryWriter
except ImportError:
    from tensorboardX import SummaryWriter


logger = logging.getLogger(__name__)

MODEL_CONFIG_CLASSES = list(MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.keys())
MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES)

ALL_MODELS = sum((tuple(conf.pretrained_config_archive_map.keys()) for conf in MODEL_CONFIG_CLASSES), ())

TOKENIZER_ARGS = ["do_lower_case", "strip_accents", "keep_accents", "use_fast"]


def set_seed(args):
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if args.n_gpu > 0:
        torch.cuda.manual_seed_all(args.seed)

コード例 #3
0
from transformers import CONFIG_MAPPING, MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING, MODEL_NAMES_MAPPING, TOKENIZER_MAPPING
from transformers.convert_slow_tokenizer import SLOW_TO_FAST_CONVERTERS, BertConverter
from transformers.models.auto.modeling_auto import auto_class_factory

from .modeling.layoutlmv2 import (
    LayoutLMv2Config,
    LayoutLMv2ForTokenClassification,
    LayoutLMv2Tokenizer,
    LayoutLMv2TokenizerFast,
)

CONFIG_MAPPING.update([("layoutlmv2", LayoutLMv2Config)])
MODEL_NAMES_MAPPING.update([("layoutlmv2", "LayoutLMv2")])
TOKENIZER_MAPPING.update([(LayoutLMv2Config, (LayoutLMv2Tokenizer,
                                              LayoutLMv2TokenizerFast))])
SLOW_TO_FAST_CONVERTERS.update({"LayoutLMv2Tokenizer": BertConverter})
MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.update([
    (LayoutLMv2Config, LayoutLMv2ForTokenClassification)
])
AutoModelForTokenClassification = auto_class_factory(
    "AutoModelForTokenClassification",
    MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
    head_doc="token classification")
コード例 #4
0
    MarkupLMTokenizer,
    MarkupLMForQuestionAnswering,
    MarkupLMForTokenClassification,
    MarkupLMTokenizerFast,
)

CONFIG_MAPPING.update(
    [
        ("markuplm", MarkupLMConfig),
    ]
)
MODEL_NAMES_MAPPING.update([("markuplm", "MarkupLM")])

TOKENIZER_MAPPING.update(
    [
        (MarkupLMConfig, (MarkupLMTokenizer, MarkupLMTokenizerFast)),
    ]
)

SLOW_TO_FAST_CONVERTERS.update(
    {"MarkupLMTokenizer": RobertaConverter}
)

MODEL_FOR_QUESTION_ANSWERING_MAPPING.update(
    [(MarkupLMConfig, MarkupLMForQuestionAnswering)]
)

MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.update(
    [(MarkupLMConfig, MarkupLMForTokenClassification)]
)