def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_mask = None if self.use_input_mask: input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) token_type_ids = None if self.use_token_type_ids: token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) mc_token_ids = None if self.use_mc_token_ids: mc_token_ids = ids_tensor([self.batch_size, self.num_choices], self.seq_length) sequence_labels = None token_labels = None choice_labels = None if self.use_labels: sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) choice_labels = ids_tensor([self.batch_size], self.num_choices) config = CTRLConfig( vocab_size=self.vocab_size, n_embd=self.hidden_size, n_layer=self.num_hidden_layers, n_head=self.num_attention_heads, # intermediate_size=self.intermediate_size, # hidden_act=self.hidden_act, # hidden_dropout_prob=self.hidden_dropout_prob, # attention_probs_dropout_prob=self.attention_probs_dropout_prob, n_positions=self.max_position_embeddings, n_ctx=self.max_position_embeddings, # type_vocab_size=self.type_vocab_size, # initializer_range=self.initializer_range, return_dict=True, ) head_mask = ids_tensor( [self.num_hidden_layers, self.num_attention_heads], 2) return ( config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, sequence_labels, token_labels, choice_labels, )
def get_config(self): return CTRLConfig( vocab_size=self.vocab_size, n_embd=self.hidden_size, n_layer=self.num_hidden_layers, n_head=self.num_attention_heads, # intermediate_size=self.intermediate_size, # hidden_act=self.hidden_act, # hidden_dropout_prob=self.hidden_dropout_prob, # attention_probs_dropout_prob=self.attention_probs_dropout_prob, n_positions=self.max_position_embeddings, # type_vocab_size=self.type_vocab_size, # initializer_range=self.initializer_range, pad_token_id=self.pad_token_id, )
required=True, help= 'location of the .data file of the TensorFlow checkpoint. This is NOT the model folder. This could be <path>/seqlen256_v1.ckpt/model.ckpt-413000.data-00000-of-00001' ) parser.add_argument('--pytorch_checkpoint', type=str, default='pytorch_model.bin', help='location of where to write the PyTorch checkpoint') parser.add_argument('--num_layers', type=int, default=48, help='number of layers in the model being converted') args = parser.parse_args() model = CTRLLMHeadModel(CTRLConfig()) if os.path.isfile(args.tf_checkpoint): print('INFO :: Found TensorFlow checkpoint') else: print( 'INFO :: TensorFlow checkpoint not found. Please verify location of the .data file or raise GitHub issue if problem persists.' ) if os.path.isfile(args.pytorch_checkpoint): print( 'PyTorch model already exists. Will not over-write. Please delete old checkpoint or specify different file name' ) sys.exit(1) chkpt_for_reader = '.'.join(args.tf_checkpoint.split('.')[:-1])