def __init__(self, path: str ,device: str = 'cpu'): """ Init the NER Albert """ if not os.path.exists(path): raise NotADirectoryError( f"{os.path.abspath(path)} must be a directory containing the model files: config, tokenizer, weights.") files = os.listdir(path) if CONFIG_JSON_FILE not in files: raise FileNotFoundError(f"{CONFIG_JSON_FILE} must be in {path}.") if WEIGHTS_FILE not in files: raise FileNotFoundError(f"{WEIGHTS_FILE} must be in {path}.") with open(os.path.join(path, CONFIG_JSON_FILE), "r") as f: config = json.load(f) self.tokenizer = AutoTokenizer.from_pretrained(path) weights = torch.load(os.path.join(path, WEIGHTS_FILE), map_location=lambda storage, loc: storage) # Load pretrained model/tokenizer config = AlbertConfig.from_dict(config) self.model = AlbertForTokenClassification(config) self.model.load_state_dict(weights) self.model = self.model.eval() self.args = albert_args_ner if device == "cuda": logger.debug("Setting model with CUDA") self.args['device'] = 'cuda' self.model.to('cuda')
"attention_probs_dropout_prob": 0, "bos_token_id": 2, "classifier_dropout_prob": 0.1, "embedding_size": 128, "eos_token_id": 3, "hidden_act": "gelu_new", "hidden_dropout_prob": 0, "hidden_size": 4096, "initializer_range": 0.02, "inner_group_num": 1, "intermediate_size": 16384, "layer_norm_eps": 1e-12, "max_position_embeddings": 512, "model_type": "albert", "num_attention_heads": 64, "num_hidden_groups": 1, "num_hidden_layers": 12, "pad_token_id": 0, "type_vocab_size": 2, "vocab_size": 30000 } for parameter in albert_config.keys(): albert_config[parameter] = config_dict[parameter] albert_config = AlbertConfig.from_dict(albert_config) logger = logging.getLogger() logger.setLevel(logging.INFO) stream_handler = logging.StreamHandler() logger.addHandler(stream_handler) init_process(0, "gloo", config, albert_config, logger)