def __init__( self, config, class_labels, pretrained_model_path, dropout=0.1, freeze_pretrained_part=True, reinitialize=False, n_layers=6, ): super().__init__(config, class_labels) if reinitialize: logger.info('resetting model weights') config = GPT2Config.from_json_file(pretrained_model_path + '/config.json') config = config.to_dict() config['n_layer'] = n_layers config = GPT2Config.from_dict(config) self.gpt2 = GPT2Model(config) else: self.gpt2 = GPT2Model.from_pretrained(pretrained_model_path) self.dropout = torch.nn.Dropout(dropout) self.fc = torch.nn.Linear(self.gpt2.config.n_embd, self.output_dim) if freeze_pretrained_part: for param in self.gpt2.parameters(): param.requires_grad = False
def __init__(self, model_path, generation_type, use_finetuned=True): self.model_path = model_path self.batch_size = int(args["--batch-size"]) self.tokenizer = GPT2Tokenizer.from_pretrained("gpt2") self.MAX_LEN = { GENERATION_TYPE_SMALL: 20, GENERATION_TYPE_LARGE: 500 }[generation_type] logger.info( f"Using {generation_type} for decoding, MAX_LEN={self.MAX_LEN}") if use_finetuned: logger.info("Using a finetuned model") self.config = GPT2Config.from_pretrained(self.model_path) model = GPT2LMHeadModel.from_pretrained(self.model_path) with open(f"{self.model_path}/special_tokens_map.json", "r") as f: special_tokens = json.load(f) self.tokenizer.add_special_tokens(special_tokens) else: logger.info("NOT using a finetuned model") model = GPT2LMHeadModel(config=GPT2Config.from_pretrained( pretrained_model_name_or_path=self.model_path)) self.model = model.cuda() self.model.eval()
def __init__(self, max_output_length=25, max_input_length=300, device='cpu', tokenizer_type='gpt2', bpe_model="", starter_model=None): if tokenizer_type == "gpt2": self.tokenizer = utils_tokenizer.GPT2Tokenizer() config = GPT2Config.from_pretrained("gpt2") elif tokenizer_type == "bpecap": self.tokenizer = utils_tokenizer.BPETokenizer(bpe_model) config = GPT2Config.from_dict({ "finetuning_task": None, "initializer_range": 0.02, "layer_norm_epsilon": 1e-05, "n_ctx": 1024, "n_embd": 768, "n_head": 12, "n_layer": 12, "n_positions": 1024, "num_labels": 1, "resid_pdrop": 0.1, "use_bfloat16": False, "vocab_size": self.tokenizer.vocab_size }) else: print("Tokenizer unrecognized. Should be gpt2 or bpecap.") exit() self.model = GPT2LMHeadModel(config) self.model.to(device) self.device = device if starter_model is not None: self.reload(starter_model) self.max_output_length = max_output_length self.max_input_length = max_input_length self.model.train() self.mode = "train"
def build_model(args): if args.pretrained_path == '': config = GPT2Config.from_json_file(args.model_config) model = GPT2LMHeadModel(config) tokenizer = BertTokenizerFast(args.vocab) # XXX: must add this, or can't tokenize special token in string to single char tokenizer.sanitize_special_tokens() info = None else: config = GPT2Config.from_pretrained(args.pretrained_path) model, info = GPT2LMHeadModel.from_pretrained(args.pretrained_path, config=config, output_loading_info=True) tokenizer = BertTokenizerFast.from_pretrained(args.pretrained_path) return model, tokenizer, info
def __init__(self, config, dataset): super(GPT2Seq, self).__init__(config, dataset) self.pretrained_model_path = config['pretrained_model_path'] self.tokenizer = GPT2TokenizerFast.from_pretrained( self.pretrained_model_path, pad_token='[PAD]') self.configuration = GPT2Config.from_pretrained( self.pretrained_model_path, pad_token_id=self.padding_token_idx) self.model = GPT2LMHeadModel.from_pretrained( self.pretrained_model_path, config=self.configuration) self.model.resize_token_embeddings(len(self.tokenizer)) if config['task_type'] == "summarization": self.task_text = "TL;DR:" elif config['task_type'] == "translation": self.task_text = "story:" elif config['task_type'] == "multi_dialog": self.task_text = "question:" else: raise NotImplementedError( "Only summarization and translation are supported.") self.loss = nn.CrossEntropyLoss(ignore_index=self.padding_token_idx, reduction='none')
def main(): config = GPT2Config( vocab_size=30000, n_positions=1024, n_ctx=1024, n_embd=2560, n_layer=32, n_head=32, n_inner=4*2560, activation_function="gelu_new", resid_pdrop=0.1, embd_pdrop=0.1, attn_pdrop=0.1, layer_norm_epsilon=1e-5, initializer_range=0.02, summary_type="cls_index", summary_use_proj=True, summary_activation=None, summary_proj_to_labels=True, summary_first_dropout=0.1, bos_token_id=30000, eos_token_id=30000, gradient_checkpointing=False, ) print("initializing model") model = GPT2LMHeadModel(config) convert( model=model, m0_path="model-v1/80000/mp_rank_00_model_states.pt", m1_path="model-v1/80000/mp_rank_01_model_states.pt", save_path="model/CPM/", )
def load_model(train_steps, num_warmup_steps): try: # try to load finetuned model at local. tokenizer = load_tokenizer() config = GPT2Config.from_pretrained(configs.model_path, return_dict=False) model = TFGPT2LMHeadModel.from_pretrained(configs.model_path, return_dict=False) print("model loaded from local!") except Exception as e: tokenizer = BertTokenizer.from_pretrained( "mymusise/gpt2-medium-chinese") model = TFGPT2LMHeadModel.from_pretrained( "mymusise/gpt2-medium-chinese", return_dict=False) print("model loaded from remote!") loss = model.compute_loss optimizer = nlp.optimization.create_optimizer( 5e-5, num_train_steps=train_steps, num_warmup_steps=num_warmup_steps) metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy') model.compile( optimizer=optimizer, loss=[loss, *[None] * model.config.n_layer], # metrics=[metric] ) return model
def __init__(self, model_path): config = GPT2Config.from_pretrained(model_path) config.output_hidden_states=True config.output_attentions = True self.model = GPT2LMHeadModel.from_pretrained(model_path, config=config) self.model.eval() self.context = ''
def load(cls, pretrained_model_name_or_path, language=None, **kwargs): """ Load a pretrained model by supplying * the name of a remote model on s3 ("gpt2" ...) * OR a local path of a model trained via transformers ("some_dir/huggingface_model") * OR a local path of a model trained via FARM ("some_dir/farm_model") :param pretrained_model_name_or_path: The path of the saved pretrained model or its name. :type pretrained_model_name_or_path: str """ gpt2 = cls() if "farm_lm_name" in kwargs: gpt2.name = kwargs["farm_lm_name"] else: gpt2.name = pretrained_model_name_or_path # We need to differentiate between loading model using FARM format and Pytorch-Transformers format farm_lm_config = Path( pretrained_model_name_or_path) / "language_model_config.json" if os.path.exists(farm_lm_config): # FARM style gpt2_config = GPT2Config.from_pretrained(farm_lm_config) farm_lm_model = Path( pretrained_model_name_or_path) / "language_model.bin" gpt2.model = GPT2Model.from_pretrained(farm_lm_model, config=gpt2_config, **kwargs) gpt2.language = gpt2.model.config.language else: # Pytorch-transformer Style gpt2.model = GPT2Model.from_pretrained( str(pretrained_model_name_or_path), **kwargs) gpt2.language = cls._get_or_infer_language_from_name( language, pretrained_model_name_or_path) return gpt2
def __init__( self, batch_size, epochs, t_total=100000, config_path="config/model_config.json", data_path="data/train.json", valid_examples=100, vocab_path="vocab/vocab.txt", max_length=1024, warm_up_steps=0, lr=1e-4, ): super(Net, self).__init__() self.batch_size = batch_size self.epochs = epochs self.t_total = t_total self.warm_up_steps = warm_up_steps self.lr = lr self.model_name = "bert_pretrained_model" self.config = GPT2Config.from_json_file(config_path) self.model = GPT2LMHeadModel(config=self.config) self.data = [json.loads(line.strip()) for line in open(data_path)] self.dataset_train = DS(self.data[:-valid_examples], vocab_path=vocab_path, max_length=max_length) self.dataset_valid = DS(self.data[-valid_examples:], vocab_path=vocab_path, max_length=max_length)
def __init__(self, config): medium_config = GPT2Config(n_embd=1024, n_layer=24, n_head=16) model = GPT2LMHeadModel(medium_config) print("Step 1/3: Downloading weights [823 MB]...") wget.download( "https://convaisharables.blob.core.windows.net/lsp/multiref/medium_ft.pkl", "/tmp/medium_ft.pkl", ) print("Step 2/3: Loading weights...") weights = torch.load("/tmp/medium_ft.pkl") weights["lm_head.weight"] = weights["lm_head.decoder.weight"] weights.pop("lm_head.decoder.weight", None) print("Step 3/3: Loading a model...") model.load_state_dict(weights) device = "cuda" if torch.cuda.is_available() else "cpu" print(f"using device: {device}") model.to(device) model.eval() self.device = device self.model = model self.tokenizer = GPT2Tokenizer.from_pretrained("gpt2") print("Model is ready!")
def main(): parser = argparse.ArgumentParser() ## Required parameters parser.add_argument("--config_path", default="../../models/gpt2/gpt2-config.json", type=str, required=False) parser.add_argument("--model_path", default="../../models/gpt2/gpt2-pytorch_model.bin", type=str, required=False) parser.add_argument("--vocab_path", default="../../models/gpt2/gpt2-vocab.json", type=str, required=False) parser.add_argument("--merges_path", default="../../models/gpt2/gpt2-merges.txt", type=str, required=False) parser.add_argument( "--sentence", default="In this article, I am excited to take you through", type=str, required=False) args = parser.parse_args() config = GPT2Config.from_pretrained(args.config_path) model = GPT2LMHeadModel.from_pretrained(args.model_path, config=config) tokenizer = GPT2Tokenizer(args.vocab_path, args.merges_path) # logging.basicConfig(filename="default.txt", level=logging.DEBUG, filemode='w') # gpt2_generate_greedy(model, tokenizer, sentence=sys.argv[1]) gpt2_generate_beam_search(model, tokenizer, sentence=args.sentence)
def build_model_classifier(model_dir, device1, device2): config = GPT2Config() config = config.from_pretrained('gpt2')#config.from_pretrained('gpt2-medium') config.summary_first_dropout = 0.2 config.summary_type = "cls_index" tokenizer = GPT2Tokenizer.from_pretrained("gpt2")#torch.load(tokenizer_dir) tokenizer.add_special_tokens({'cls_token': '[CLS]'}) # device1 = torch.device("cuda:0") # device2 = torch.device("cuda:1") model_A, model_B = load_model(cfg, "small", tokenizer, device1, device2) # pdb.set_trace() print("model_clf device\n\n\n\n\n\n") print(model_A.device) print(model_B.device) print("here\n\n\n") which_to_train = ["A", "B", "TF"] model_clf = ModelClassifier(config=config, which_to_train=which_to_train, model_A=model_A, model_B=model_B, tokenizer=tokenizer, device1=device1, device2=device2) model_clf.load_model(all_model_dir=model_dir) return model_clf
def __init__(self, start_index): super().__init__() config = GPT2Config(output_hidden_states=True) self.gpt2 = GPT2LMHeadModel.from_pretrained('gpt2', config=config) self.vocab_size = self.gpt2.config.vocab_size self.start_index = start_index
def __init__(self, train_dataloader, val_dataloader=None): """ Initialises Trainer by defining model and GPU Args: train_dataloader: torch.utils.data.DataLoader Dataloader to train model upon, obtained from Dataloader class val_dataloader: Optional torch.utils.data.DataLoader Dataloader to validate model upon obtained from DataLoader class, not required if Trainer is only used for final training """ # Create GPT2 Config config = GPT2Config.from_pretrained("gpt2") # Load language head model and input default config model = GPT2LMHeadModel.from_pretrained("gpt2", config=config) # Recreate tokenizer tokenizer = GPT2Tokenizer.from_pretrained('gpt2', bos_token='<|startoftext|>', eos_token='<|endoftext|>', pad_token='<|pad|>') # Tell model we have added bos, eos, pad token model.resize_token_embeddings(len(tokenizer)) # Tell pytorch to run this model on the GPU. device = torch.device("cuda") model.cuda() self.model = model self.device = device self.train_dataloader = train_dataloader self.val_dataloader = val_dataloader
def __init__(self, args, task): super().__init__(task.target_dictionary) if not has_hf: raise ImportError( '\n\nPlease install huggingface/transformers with:' '\n\n pip install transformers' '\n\nOr to make local edits, install the submodule:' '\n\n git submodule update --init ' 'fairseq/models/huggingface/transformers') config = GPT2Config( vocab_size=len(task.target_dictionary), n_positions=args.max_target_positions + 1, n_ctx=args.max_target_positions, n_embd=args.embed_dim, n_layer=args.num_layers, n_head=args.num_attention_heads, resid_pdrop=args.dropout, embd_pdrop=args.dropout, attn_pdrop=args.attention_dropout, layer_norm_epsilon=1e-6, ) self.model = GPT2LMHeadModel(config) # set zero embedding for padding symbol self.pad_idx = task.target_dictionary.pad() self.model.transformer.wte.weight.data[self.pad_idx].zero_() self.model.transformer.wpe.weight.data[0].zero_()
def get_config(self, gradient_checkpointing=False, scale_attn_by_inverse_layer_idx=False, reorder_and_upcast_attn=False): return GPT2Config( vocab_size=self.vocab_size, n_embd=self.hidden_size, n_layer=self.num_hidden_layers, n_head=self.num_attention_heads, n_inner=self.intermediate_size, activation_function=self.hidden_act, resid_pdrop=self.hidden_dropout_prob, attn_pdrop=self.attention_probs_dropout_prob, n_positions=self.max_position_embeddings, n_ctx=self.max_position_embeddings, type_vocab_size=self.type_vocab_size, initializer_range=self.initializer_range, use_cache=True, bos_token_id=self.bos_token_id, eos_token_id=self.eos_token_id, pad_token_id=self.pad_token_id, gradient_checkpointing=gradient_checkpointing, scale_attn_by_inverse_layer_idx=scale_attn_by_inverse_layer_idx, reorder_and_upcast_attn=reorder_and_upcast_attn, )
def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_mask = None if self.use_input_mask: input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) token_type_ids = None if self.use_token_type_ids: token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) mc_token_ids = None if self.use_mc_token_ids: mc_token_ids = ids_tensor([self.batch_size, self.num_choices], self.seq_length) sequence_labels = None token_labels = None choice_labels = None if self.use_labels: sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) choice_labels = ids_tensor([self.batch_size], self.num_choices) config = GPT2Config( vocab_size=self.vocab_size, n_embd=self.hidden_size, n_layer=self.num_hidden_layers, n_head=self.num_attention_heads, # intermediate_size=self.intermediate_size, # hidden_act=self.hidden_act, # hidden_dropout_prob=self.hidden_dropout_prob, # attention_probs_dropout_prob=self.attention_probs_dropout_prob, n_positions=self.max_position_embeddings, n_ctx=self.max_position_embeddings, # type_vocab_size=self.type_vocab_size, # initializer_range=self.initializer_range bos_token_id=self.bos_token_id, eos_token_id=self.eos_token_id, ) head_mask = ids_tensor( [self.num_hidden_layers, self.num_attention_heads], 2) return ( config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, sequence_labels, token_labels, choice_labels, )
def __init__(self, args, task): try: from transformers import GPT2Config, GPT2LMHeadModel except ImportError: raise ImportError( "\n\nPlease install huggingface/transformers with:" "\n\n pip install transformers") super().__init__(task.target_dictionary) config = GPT2Config( vocab_size=len(task.target_dictionary), n_positions=args.max_target_positions + 1, n_ctx=args.max_target_positions, n_embd=args.embed_dim, n_layer=args.num_layers, n_head=args.num_attention_heads, resid_pdrop=args.dropout, embd_pdrop=args.dropout, attn_pdrop=args.attention_dropout, layer_norm_epsilon=1e-6, ) self.model = GPT2LMHeadModel(config) # set zero embedding for padding symbol self.pad_idx = task.target_dictionary.pad() self.model.transformer.wte.weight.data[self.pad_idx].zero_() self.model.transformer.wpe.weight.data[0].zero_()
def __init__(self, config, dataset): super(GPT2, self).__init__(config, dataset) self.pretrained_model_path = config['pretrained_model_path'] self.tokenizer = GPT2Tokenizer.from_pretrained( self.pretrained_model_path, bos_token=dataset.sos_token, eos_token=dataset.eos_token, pad_token=dataset.padding_token) self.sos_token = self.tokenizer.bos_token self.eos_token = self.tokenizer.eos_token self.sos_token_idx = self.tokenizer.bos_token_id self.eos_token_idx = self.tokenizer.eos_token_id self.padding_token_idx = self.tokenizer.pad_token_id self.max_seq_length = config['max_seq_length'] self.configuration = GPT2Config.from_pretrained( self.pretrained_model_path, bos_token_id=self.sos_token_idx, eos_token_id=self.eos_token_idx, pad_token_id=self.padding_token_idx) self.decoder = GPT2LMHeadModel.from_pretrained( self.pretrained_model_path, config=self.configuration) self.decoder.resize_token_embeddings(len(self.tokenizer)) self.loss = nn.CrossEntropyLoss(ignore_index=self.padding_token_idx, reduction='none')
def test_TFGPT2(self): if enable_full_transformer_test: from transformers import GPT2Config, TFGPT2Model, TFGPT2LMHeadModel, TFGPT2DoubleHeadsModel model_list = [ TFGPT2Model, TFGPT2LMHeadModel, TFGPT2DoubleHeadsModel ] else: from transformers import GPT2Config, TFGPT2Model model_list = [TFGPT2Model] # pretrained_weights = 'gpt2' tokenizer_file = 'gpt2_gpt2.pickle' tokenizer = self._get_tokenzier(tokenizer_file) text, inputs, inputs_onnx = self._prepare_inputs(tokenizer) config = GPT2Config() for model_instance_ in model_list: keras.backend.clear_session() model = model_instance_(config) model._set_inputs(inputs) predictions_original = model(inputs) predictions = [predictions_original[0]] + list( v_.numpy() for v_ in predictions_original[1]) onnx_model = mock_keras2onnx.convert_keras(model, model.name) self.assertTrue( run_onnx_runtime(onnx_model.graph.name, onnx_model, inputs_onnx, predictions, self.model_files, rtol=1.e-2, atol=1.e-4))
def build_model(self): """创建GPT-2生成模型 """ # 使用bert tokenizer # 初始化tokenizer self.tokenizer = BertTokenizer(vocab_file=self.args.vocab_path) # temp = self.tokenizer.convert_tokens_to_ids('') # print(self.tokenizer.convert_ids_to_tokens(temp)) # tokenizer的字典大小 self.vocab_size = len(self.tokenizer) self.pad_id = self.tokenizer.convert_tokens_to_ids(PAD) if self.args.pretrained_model: # 如果指定了预训练的GPT2模型 model = GPT2LMHeadModel.from_pretrained(self.args.pretrained_model) else: # 若没有指定预训练模型,则初始化模型 model_config = GPT2Config(self.args.model_config) model = GPT2LMHeadModel(config=model_config) # 根据tokenizer的vocabulary调整GPT2模型的voca的大小 model.resize_token_embeddings(self.vocab_size) print('model config:\n{}'.format(model.config.to_json_string())) return model, model.config.to_dict().get("n_ctx")
def model_fn(model_dir): logger.info('Loading the model.') vocab_file_path = os.path.join(model_dir, 'vocab.json') merge_file_path = os.path.join(model_dir, 'merges.txt') model_file_path = os.path.join(model_dir, 'lyric_model.bin') tokenizer = MyTokenizer(vocab_file_path, merge_file_path) bos = tokenizer.convert_tokens_to_ids('<s>') eos = tokenizer.convert_tokens_to_ids('</s>') pad = tokenizer.convert_tokens_to_ids('<pad>') unk = tokenizer.convert_tokens_to_ids('<unk>') config = GPT2Config(vocab_size=52003, resid_pdrop=0, embd_pdrop=0, attn_pdrop=0, summary_first_dropout=0) model = GPT2LMHeadModel(config) model.load_state_dict(torch.load(model_file_path, map_location=device), strict=False) model.to(device) return model, tokenizer
def load_model(target_folder, config): # Parse parameters model_size = config.get('model', 'model_size') no_cuda = config.getboolean('model', 'no_cuda') logger.info("Loading the model...") device = torch.device( "cuda" if torch.cuda.is_available() and not no_cuda else "cpu") # Tokenizer tokenizer = GPT2Tokenizer(os.path.join(target_folder, 'vocab.json'), os.path.join(target_folder, 'merges.txt')) # Config config = GPT2Config.from_json_file( os.path.join(target_folder, 'config.json')) # Weights state_dict_path = glob(os.path.join(target_folder, f'*.pkl'))[0] state_dict = torch.load(state_dict_path, map_location=device) if model_size == 'small': for key in list(state_dict.keys()): state_dict[key.replace('module.', '')] = state_dict.pop(key) state_dict['lm_head.weight'] = state_dict['lm_head.decoder.weight'] state_dict.pop("lm_head.decoder.weight", None) # Model model = GPT2LMHeadModel(config) model.load_state_dict(state_dict) model.to(device) model.eval() return model, tokenizer
def __init__(self, hparams): super().__init__() self.hparams = hparams self.d = None self.tokenizer = None # hotfixes if 'unfreeze' not in hparams: self.hparams.unfreeze = False if 'lang' not in hparams: self.hparams.lang = 'nld' autofix_paths(self.hparams) # GPT with LM head and correct embedding size with open(Path('data') / self.hparams.lang / 'config.json') as f: cfg = json.load(f) if self.hparams.unfreeze: self.n_unfreeze = 0 if self.hparams.resume_from_checkpoint is not None: print('Resuming from checkpoint: unfreezing all layers') self.n_unfreeze = None config = GPT2Config.from_pretrained(self.hparams.pretrained_path, **cfg) if self.hparams.unfreeze and self.n_unfreeze is not None: config.torchscript = True self.m = GPT2LMHeadModel.from_pretrained(self.hparams.pretrained_path, config=config) # Resize vocab self.m.resize_token_embeddings(self.hparams.vocab_size)
def _create_model(self, precision): """Construct the model for benchmarking. Args: precision (Precision): precision of model and input data, such as float32, float16. """ self._config = GPT2Config(n_embd=self._args.hidden_size, n_layer=self._args.num_hidden_layers, n_head=self._args.num_attention_heads) try: self._model = GPT2BenchmarkModel(self._config, self._args.num_classes) self._model = self._model.to(dtype=getattr(torch, precision.value)) if self._gpu_available: self._model = self._model.cuda() except BaseException as e: logger.error( 'Create model with specified precision failed - model: {}, precision: {}, message: {}.' .format(self._name, precision, str(e))) return False self._target = torch.LongTensor(self._args.batch_size).random_( self._args.num_classes) if self._gpu_available: self._target = self._target.cuda() return True
def __init__(self, config: Munch): r""" Init a new GPT2 synapse module. Args: config (:obj:`munch.Munch`, `required`): munched config class. """ super(GPT2LMSynapse, self).__init__(config=config) if config == None: config = GPT2LMSynapse.build_config() # Build hugging face config. huggingface_config = GPT2Config( vocab_size=bittensor.__vocab_size__, n_embd=bittensor.__network_dim__, n_layer=config.synapse.n_layer, n_head=config.synapse.n_head, n_inner=config.synapse.n_inner, activation_function=config.synapse.activation_function, resid_pdrop=config.synapse.resid_pdrop, embd_pdrop=config.synapse.embd_pdrop, attn_pdrop=config.synapse.attn_pdrop, layer_norm_epsilon=config.synapse.layer_norm_epsilon, initializer_range=config.synapse.initializer_range, summary_type=config.synapse.summary_type, summary_use_proj=config.synapse.summary_use_proj, summary_activation=config.synapse.summary_activation, summary_proj_to_labels=config.synapse.summary_proj_to_labels, summary_first_dropout=config.synapse.summary_first_dropout, ) # encoder_layer: encodes tokenized sequences to network dim. # [batch_size, sequence_len] -> [batch_size, sequence_len, bittensor.__network_dim__] self.transformer = GPT2Model(huggingface_config) # pooler_layer: pools the hidden units for use by the pkm dendrite rpc query. # [batch_size, bittensor.__network_dim__, sequence_len] -> [batch_size, bittensor.__network_dim__] self.pooler = GPT2Pooler(huggingface_config) # router: (PKM layer) queries network using pooled embeddings as context. # [batch_size, bittensor.__network_dim__] -> topk * [batch_size, bittensor.__network_dim__] self.router = PKMRouter(config, query_dim=bittensor.__network_dim__) # hidden_layer: transforms context and encoding to network_dim hidden units. # [batch_size, sequence_dim, 2 * bittensor.__network_dim__] -> [batch_size, sequence_len, bittensor.__network_dim__] self.hidden_layer = nn.Linear(bittensor.__network_dim__, bittensor.__network_dim__) # target_layer: maps from hidden layer to vocab dimension for each token. Used by MLM loss. # [batch_size, sequence_len, bittensor.__network_dim__] -> [batch_size, sequence_len, bittensor.__vocab_size__] self.target_layer = nn.Linear(bittensor.__network_dim__, bittensor.__vocab_size__, bias=False) # Loss function: MLM cross-entropy loss. # predicted: [batch_size, sequence_len, 1], targets: [batch_size, sequence_len, 1] -> [1] self.loss_fct = nn.CrossEntropyLoss() self.to(self.device)
def __init__(self): super().__init__() self.tokenizer = BertTokenizer(vocab_file=FLAGS.vocab_path) self.config = GPT2Config.from_json_file(FLAGS.model_config) self.model = GPT2LMHeadModel(config=self.config)
def gpt2_model(freeze=True, configuration=None): if configuration is None: configuration = GPT2Config() gp2_model = TFGPT2Model.from_pretrained('gpt2', config=configuration) if freeze: for layer in gp2_model.layers: layer.trainable = False return gp2_model
def convert_gpt2_checkpoint_to_pytorch(gpt2_checkpoint_path, full, gpt2_config_file, pytorch_dump_folder_path): #putting requirements here so users can see usage info before it errors out on missing modules from io import open from shutil import copyfile import logging logging.basicConfig(level=logging.INFO) from pathlib import Path import torch #WEIGHTS_NAME = "pytorch_model.bin" #CONFIG_NAME = "config.json" from transformers import ( CONFIG_NAME, WEIGHTS_NAME, GPT2Config, GPT2Model, load_tf_weights_in_gpt2, ) gpt2_checkpoint_path = Path(gpt2_checkpoint_path) print(gpt2_checkpoint_path.name) if pytorch_dump_folder_path == '': prefix = '32BIT-' if full else '16BIT-' pytorch_dump_folder_path = 'pytorch-' + prefix + gpt2_checkpoint_path.name pytorch_dump_folder_path = Path(pytorch_dump_folder_path) pytorch_dump_folder_path.mkdir(exist_ok=True) # Construct model if gpt2_config_file == "": #This doesn't seem to work. We will use the hparams.json file that seems to be included in #config = GPT2Config() gpt2_config_file = gpt2_checkpoint_path / 'hparams.json' config = GPT2Config.from_json_file(gpt2_config_file) model = GPT2Model(config) # Load weights from numpy load_tf_weights_in_gpt2(model, config, gpt2_checkpoint_path) if not full: model.half() # Save pytorch-model pytorch_weights_dump_path = pytorch_dump_folder_path / WEIGHTS_NAME pytorch_config_dump_path = pytorch_dump_folder_path / CONFIG_NAME print("Save PyTorch model to {}".format(str(pytorch_weights_dump_path))) torch.save(model.state_dict(), pytorch_weights_dump_path) print("Save configuration file to: " + str(pytorch_config_dump_path)) with pytorch_config_dump_path.open("w", encoding="utf-8") as f: f.write(config.to_json_string()) copyfile(gpt2_checkpoint_path / 'vocab.bpe', pytorch_dump_folder_path / 'merges.txt') copyfile(gpt2_checkpoint_path / 'encoder.json', pytorch_dump_folder_path / 'vocab.json')