예제 #1
0
    def prepare_config_and_inputs(self):
        input_ids = ids_tensor([self.batch_size, self.seq_length],
                               self.vocab_size)

        input_mask = None
        if self.use_input_mask:
            input_mask = ids_tensor([self.batch_size, self.seq_length],
                                    vocab_size=2)

        token_type_ids = None
        if self.use_token_type_ids:
            token_type_ids = ids_tensor([self.batch_size, self.seq_length],
                                        self.type_vocab_size)

        mc_token_ids = None
        if self.use_mc_token_ids:
            mc_token_ids = ids_tensor([self.batch_size, self.num_choices],
                                      self.seq_length)

        sequence_labels = None
        token_labels = None
        choice_labels = None
        if self.use_labels:
            sequence_labels = ids_tensor([self.batch_size],
                                         self.type_sequence_label_size)
            token_labels = ids_tensor([self.batch_size, self.seq_length],
                                      self.num_labels)
            choice_labels = ids_tensor([self.batch_size], self.num_choices)

        config = CTRLConfig(
            vocab_size=self.vocab_size,
            n_embd=self.hidden_size,
            n_layer=self.num_hidden_layers,
            n_head=self.num_attention_heads,
            # intermediate_size=self.intermediate_size,
            # hidden_act=self.hidden_act,
            # hidden_dropout_prob=self.hidden_dropout_prob,
            # attention_probs_dropout_prob=self.attention_probs_dropout_prob,
            n_positions=self.max_position_embeddings,
            n_ctx=self.max_position_embeddings,
            # type_vocab_size=self.type_vocab_size,
            # initializer_range=self.initializer_range,
            return_dict=True,
        )

        head_mask = ids_tensor(
            [self.num_hidden_layers, self.num_attention_heads], 2)

        return (
            config,
            input_ids,
            input_mask,
            head_mask,
            token_type_ids,
            mc_token_ids,
            sequence_labels,
            token_labels,
            choice_labels,
        )
 def get_config(self):
     return CTRLConfig(
         vocab_size=self.vocab_size,
         n_embd=self.hidden_size,
         n_layer=self.num_hidden_layers,
         n_head=self.num_attention_heads,
         # intermediate_size=self.intermediate_size,
         # hidden_act=self.hidden_act,
         # hidden_dropout_prob=self.hidden_dropout_prob,
         # attention_probs_dropout_prob=self.attention_probs_dropout_prob,
         n_positions=self.max_position_embeddings,
         # type_vocab_size=self.type_vocab_size,
         # initializer_range=self.initializer_range,
         pad_token_id=self.pad_token_id,
     )
예제 #3
0
    required=True,
    help=
    'location of the .data file of the TensorFlow checkpoint. This is NOT the model folder. This could be <path>/seqlen256_v1.ckpt/model.ckpt-413000.data-00000-of-00001'
)
parser.add_argument('--pytorch_checkpoint',
                    type=str,
                    default='pytorch_model.bin',
                    help='location of where to write the PyTorch checkpoint')
parser.add_argument('--num_layers',
                    type=int,
                    default=48,
                    help='number of layers in the model being converted')

args = parser.parse_args()

model = CTRLLMHeadModel(CTRLConfig())

if os.path.isfile(args.tf_checkpoint):
    print('INFO :: Found TensorFlow checkpoint')
else:
    print(
        'INFO :: TensorFlow checkpoint not found. Please verify location of the .data file or raise GitHub issue if problem persists.'
    )

if os.path.isfile(args.pytorch_checkpoint):
    print(
        'PyTorch model already exists. Will not over-write. Please delete old checkpoint or specify different file name'
    )
    sys.exit(1)

chkpt_for_reader = '.'.join(args.tf_checkpoint.split('.')[:-1])