コード例 #1
0
        def prepare_config_and_inputs(self):
            input_ids = OpenAIGPTModelTest.ids_tensor([self.batch_size, self.n_choices, self.seq_length], self.vocab_size)

            position_ids = None
            if self.use_position_ids:
                position_ids = OpenAIGPTModelTest.ids_tensor([self.batch_size, self.n_choices, self.seq_length], self.n_positions)

            token_type_ids = None
            if self.use_token_type_ids:
                total_voc = self.vocab_size + self.n_special
                token_type_ids = OpenAIGPTModelTest.ids_tensor([self.batch_size, self.n_choices, self.seq_length], total_voc)

            mc_labels = None
            lm_labels = None
            mc_token_ids = None
            if self.use_labels:
                mc_labels = OpenAIGPTModelTest.ids_tensor([self.batch_size], self.type_sequence_label_size)
                lm_labels = OpenAIGPTModelTest.ids_tensor([self.batch_size, self.n_choices, self.seq_length], self.num_labels)
                mc_token_ids = OpenAIGPTModelTest.ids_tensor([self.batch_size, self.n_choices], self.seq_length)

            config = OpenAIGPTConfig(
                vocab_size_or_config_json_file=self.vocab_size,
                n_positions=self.n_positions,
                n_special=self.n_special,
                n_embd=self.n_embd,
                n_layer=self.n_layer,
                n_head=self.n_head,
                afn=self.afn,
                resid_pdrop=self.resid_pdrop,
                attn_pdrop=self.attn_pdrop,
                embd_pdrop=self.embd_pdrop,
                initializer_range=self.initializer_range)

            return (config, input_ids, token_type_ids, position_ids,
                    mc_labels, lm_labels, mc_token_ids)
コード例 #2
0
 def test_config_to_json_file(self):
     config_first = OpenAIGPTConfig(vocab_size_or_config_json_file=99, n_embd=37)
     json_file_path = "/tmp/config.json"
     config_first.to_json_file(json_file_path)
     config_second = OpenAIGPTConfig.from_json_file(json_file_path)
     os.remove(json_file_path)
     self.assertEqual(config_second.to_dict(), config_first.to_dict())
コード例 #3
0
 def test_config_to_json_string(self):
     config = OpenAIGPTConfig(vocab_size_or_config_json_file=99, n_embd=37)
     obj = json.loads(config.to_json_string())
     self.assertEqual(obj["vocab_size"], 99)
     self.assertEqual(obj["n_embd"], 37)
コード例 #4
0
        # Initialize the weight of the linear classifier layer with xavier normal values
        nn.init.xavier_normal_(self.classifier.weight).to(device)

    # Define the forward function takinng in the necessary arguments
    def forward(self, input_ids, token_type_ids=None):
        # Get the output from the GPT model with the corresponding inputs
        output = self.openai_gpt(input_ids, token_type_ids)
        # Add the dropout layer to the GPT layers
        output = self.dropout(output)
        # Add the linear classifier layer to the dropout to get the outputs
        logits = self.classifier(output)
        return logits


# Create the configuration from the OpenAIGPTConfig class
config = OpenAIGPTConfig()
num_labels = 2
# Instantiate the custom GPT model
model = OpenAIGPT_Classification(num_labels)

# Create a tokenzier object from the OpenAIGPTTokenizer with the weights 'openai-gpt'
tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')

r = requests.get(
    'https://raw.githubusercontent.com/sugi-chan/custom_bert_pipeline/master/IMDB%20Dataset.csv'
).content.decode('utf-8')

with open('imdb_review.csv', 'w') as write_file:
    write_file.write(r)

# Read the csv file and make pandas dataframe from it