예제 #1
0
               hidden_dropout_prob=0.1,
               attention_probs_dropout_prob=0.1,
               num_labels=2))
model.to(DEVICE)

PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]
UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]

model.bert.embeddings.word_embeddings.weight.data[PAD_IDX] = torch.zeros(512)
model.bert.embeddings.word_embeddings.weight.data[UNK_IDX] = torch.zeros(512)

print(
    f'Parameter: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}'
)

optimizer = RMSprop(model.parameters(), lr=0.000005)

itr = 1
epochs = 20
total_loss = 0
total_len = 0
total_correct = 0

for epoch in range(epochs):
    model.train()
    print(f"Epoch {epoch + 1}/{epochs}")
    for batch in train_iterator:
        optimizer.zero_grad()

        outputs = model(batch.text, labels=batch.label)
        loss, logits = outputs
예제 #2
0
        max_position_embeddings=512,
        intermediate_size=1024,
        hidden_size=512,
        num_attention_heads=8,
        num_hidden_layers=6,
        type_vocab_size=5,
        hidden_dropout_prob=0.1,
        attention_probs_dropout_prob=0.1,
        num_labels=2
    )
)
model.to(device)

print(f'모델의 파라미터 수는 {sum(p.numel() for p in model.parameters() if p.requires_grad):,} 개 입니다.')

optimizer = RMSprop(model.parameters(), lr=1e-6)

itr = 1
epochs = 30
total_loss = 0
total_len = 0
total_correct = 0

model.train()
for epoch in range(epochs):
    for batch in train_iterator:
        optimizer.zero_grad()

        outputs = model(batch.text, labels=batch.label)
        loss, logits = outputs