コード例 #1
0
ファイル: albert.py プロジェクト: marmg/NER-Demo
    def __init__(self, path: str ,device: str = 'cpu'):
        """ Init the NER Albert """
        if not os.path.exists(path):
            raise NotADirectoryError(
                f"{os.path.abspath(path)} must be a directory containing the model files: config, tokenizer, weights.")

        files = os.listdir(path)
        if CONFIG_JSON_FILE not in files:
            raise FileNotFoundError(f"{CONFIG_JSON_FILE} must be in {path}.")
        if WEIGHTS_FILE not in files:
            raise FileNotFoundError(f"{WEIGHTS_FILE} must be in {path}.")

        with open(os.path.join(path, CONFIG_JSON_FILE), "r") as f:
            config = json.load(f)
        self.tokenizer = AutoTokenizer.from_pretrained(path)
        weights = torch.load(os.path.join(path, WEIGHTS_FILE),
                                  map_location=lambda storage, loc: storage)
        # Load pretrained model/tokenizer
        config = AlbertConfig.from_dict(config)
        self.model = AlbertForTokenClassification(config)
        self.model.load_state_dict(weights)
        self.model = self.model.eval()
        self.args = albert_args_ner
        if device == "cuda":
            logger.debug("Setting model with CUDA")
            self.args['device'] = 'cuda'
            self.model.to('cuda')
コード例 #2
0
ファイル: train.py プロジェクト: gusalsdmlwlq/LegALBERT
        "attention_probs_dropout_prob": 0,
        "bos_token_id": 2,
        "classifier_dropout_prob": 0.1,
        "embedding_size": 128,
        "eos_token_id": 3,
        "hidden_act": "gelu_new",
        "hidden_dropout_prob": 0,
        "hidden_size": 4096,
        "initializer_range": 0.02,
        "inner_group_num": 1,
        "intermediate_size": 16384,
        "layer_norm_eps": 1e-12,
        "max_position_embeddings": 512,
        "model_type": "albert",
        "num_attention_heads": 64,
        "num_hidden_groups": 1,
        "num_hidden_layers": 12,
        "pad_token_id": 0,
        "type_vocab_size": 2,
        "vocab_size": 30000
    }
    for parameter in albert_config.keys():
        albert_config[parameter] = config_dict[parameter]
    albert_config = AlbertConfig.from_dict(albert_config)

    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    stream_handler = logging.StreamHandler()
    logger.addHandler(stream_handler)

    init_process(0, "gloo", config, albert_config, logger)