Python BertConfig.attention_probs_dropout_prob 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: transformers

클래스/타입: BertConfig

메소드/함수: attention_probs_dropout_prob

hotexamples.com에서의 예제들: 2

Python BertConfig.attention_probs_dropout_prob - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 transformers.BertConfig.attention_probs_dropout_prob에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

from_json_file(30)

from_pretrained(30)

BertConfig(28)

output_hidden_states(18)

vocab_size(13)

num_hidden_layers(9)

max_position_embeddings(7)

get_config_dict(6)

is_decoder(5)

hidden_size(5)

num_attention_heads(4)

save_pretrained(4)

add_cross_attention(4)

checkpoint_output_dir(4)

num_labels(3)

embedding_serialization_factor(3)

from_dict(3)

recompute_checkpoint_every_layer(3)

__init__(2)

layer_norm_eps(2)

output_attentions(2)

attention_probs_dropout_prob(2)

to_dict(2)

update(2)

hidden_dropout_prob(2)

pixel_random_sampling_size(1)

push_to_hub(1)

auto_map(1)

num_masked_blocks(1)

max_length(1)

intermediate_size(1)

CR(1)

from_pertrained(1)

embedding_serialization(1)

dropout_prob(1)

crf_labels(1)

identifier(1)

예제 #1

파일 보기

    def __init__(
        self,
        pretrained_bert_model,
        language,
        name,
        prediction_type,
        output_hidden_states,
        output_attentions,
        attention_length_before=1,
        attention_length_after=1,
        config_path=None,
        max_length=512,
        number_of_sentence=1,
        number_of_sentence_before=0,
        number_of_sentence_after=0,
        seed=1111,
        hidden_dropout_prob=0.,
        attention_probs_dropout_prob=0.,
        stop_attention_at_sent_before=None,
        stop_attention_before_sent=0,
    ):
        super(BertExtractor, self).__init__()
        # Load pre-trained model tokenizer (vocabulary)
        # Crucially, do not do basic tokenization; PTB is tokenized. Just do wordpiece tokenization.
        if config_path is None:
            configuration = BertConfig()
            configuration.hidden_dropout_prob = hidden_dropout_prob
            configuration.attention_probs_dropout_prob = attention_probs_dropout_prob
            configuration.output_hidden_states = output_hidden_states
            configuration.output_attentions = output_attentions
            self.model = BertModel.from_pretrained(
                pretrained_bert_model,
                config=configuration)  #, config=configuration
        else:
            self.model = BertModel.from_pretrained(
                pretrained_bert_model)  #, config=configuration
        self.tokenizer = AutoTokenizer.from_pretrained(pretrained_bert_model)

        self.language = language
        self.attention_length_before = attention_length_before
        self.attention_length_after = attention_length_after
        self.pretrained_bert_model = pretrained_bert_model
        self.NUM_HIDDEN_LAYERS = self.model.config.num_hidden_layers
        self.FEATURE_COUNT = self.model.config.hidden_size
        self.NUM_ATTENTION_HEADS = self.model.config.num_attention_heads
        self.name = name
        self.config = {
            'max_length':
            max_length,
            'seed':
            seed,
            'number_of_sentence':
            number_of_sentence,
            'number_of_sentence_before':
            number_of_sentence_before,
            'number_of_sentence_after':
            number_of_sentence_after,
            'attention_length_before':
            attention_length_before,
            'attention_length_after':
            attention_length_after,
            'stop_attention_at_sent_before':
            stop_attention_at_sent_before,
            'stop_attention_before_sent':
            stop_attention_before_sent,
            'output_hidden_states':
            output_hidden_states,
            'output_attentions':
            output_attentions,
            'model_type':
            'bert',
            'hidden_size':
            self.model.config.hidden_size,
            'hidden_act':
            self.model.config.hidden_act,
            'initializer_range':
            self.model.config.initializer_range,
            'vocab_size':
            self.model.config.vocab_size,
            'hidden_dropout_prob':
            self.model.config.hidden_dropout_prob,
            'num_attention_heads':
            self.model.config.num_attention_heads,
            'type_vocab_size':
            self.model.config.type_vocab_size,
            'max_position_embeddings':
            self.model.config.max_position_embeddings,
            'num_hidden_layers':
            self.model.config.num_hidden_layers,
            'intermediate_size':
            self.model.config.intermediate_size,
            'attention_probs_dropout_prob':
            self.model.config.attention_probs_dropout_prob
        }
        if config_path is not None:
            with open(config_path, 'r') as f:
                self.config.update(json.load(f))

        self.prediction_type = prediction_type  # ['sentence', 'token-level']

예제 #2

파일 보기

파일: cpu_ipu_test.py 프로젝트: WN1695173791/examples

def test_ipu_cpu_match(recompute_checkpoint, embedding_serialization):
    """
    Test that the BERT model ran on IPU approximately matches that same
    model ran on the CPU.
    """
    import warnings
    warnings.filterwarnings("ignore", category=torch.jit.TracerWarning)

    # Config
    args = """
    --config unit_test
    --lr-schedule constant
    --layers-per-ipu 0 3
    --vocab-size 30400
    --batch-size 10
    --batches-per-step 1
    --gradient-accumulation 10
    --enable-half-partials False
    --optimizer AdamW
    --learning-rate 0.001
    """.split()
    config = BertConfig(**(vars(parse_bert_args(args))))
    config.hidden_dropout_prob = 0.0
    config.attention_probs_dropout_prob = 0.0
    config.recompute_checkpoint_every_layer = recompute_checkpoint
    config.embedding_serialization = embedding_serialization

    # Models and options
    opts = get_options(config)
    opts.anchorMode(poptorch.AnchorMode.Final)
    model_cpu = PipelinedBertWithLoss(config).train()
    model_ipu = PipelinedBertWithLoss(config).train()
    model_ipu.load_state_dict(model_cpu.state_dict())

    # Check that copy was successful
    assert model_ipu is not model_cpu
    assert all([(a == b).all() for a, b in zip(
        model_cpu.parameters(), model_ipu.parameters())]) is True

    optimizer_cpu = torch.optim.AdamW(model_cpu.parameters(), lr=0.001)
    optimizer_ipu = poptorch.optim.AdamW(model_ipu.parameters(), lr=0.001, loss_scaling=1.0)
    poptorch_model = poptorch.trainingModel(model_ipu, opts, optimizer=optimizer_ipu)

    # Input
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    inputs = tokenizer("Hello, my dog is cute Hello, my dog is cute Hello, my dog is cute Hello, my dog is cute Hello, my dog is cute yo"
                       "Hello, my dog is cute Hello, my dog is cute Hello, my dog is cute Hello, my dog is cute Hello, my dog is cute yo"
                       "Hello, my dog is cute Hello, my dog is cute Hello, my dog is cute Hello, my dog is cute Hello, my dog is cute yo"
                       "Hello, my dog is cute Hello, my dog is cute Hello, my dog is cute Hello, my dog is cute Hello, my dog is cute", return_tensors="pt")
    inputs['labels'] = torch.randint(0, config.vocab_size, [1, config.mask_tokens], dtype=torch.long)
    inputs['next_sentence_label'] = torch.randint(0, 1, [1], dtype=torch.long)
    inputs['masked_lm_positions'] = torch.randint(0, config.sequence_length, [1, config.mask_tokens], dtype=torch.long)

    batch_size = config.batch_size

    batch = (inputs['input_ids'].repeat(batch_size, 1),
             inputs['attention_mask'].repeat(batch_size, 1),
             inputs['token_type_ids'].repeat(batch_size, 1),
             inputs['masked_lm_positions'].repeat(batch_size, 1),
             inputs['labels'].repeat(batch_size, 1),
             inputs['next_sentence_label'].repeat(batch_size, 1))

    batch_cpu = (inputs['input_ids'].repeat(1, 1),
                 inputs['attention_mask'].repeat(1, 1),
                 inputs['token_type_ids'].repeat(1, 1),
                 inputs['masked_lm_positions'].repeat(1, 1),
                 inputs['labels'].repeat(1, 1),
                 inputs['next_sentence_label'].repeat(1, 1))

    # Training Loop
    for step in range(10):
        # Step CPU model
        optimizer_cpu.zero_grad()
        for b in range(batch_size):
            cpu_output = model_cpu(*batch_cpu)
            cpu_loss = cpu_output[0]
            cpu_loss.div(batch_size).backward()
        optimizer_cpu.step()

        # Step IPU Model
        ipu_output = poptorch_model(*batch)
        ipu_loss = ipu_output[0]

        with torch.no_grad():
            print(f"CPU Loss: {cpu_loss}, IPU Loss: {ipu_loss}")
            # Check the losses are approximately equal
            assert np.allclose(cpu_loss.numpy(), ipu_loss.numpy(), atol=1e-6)