예제 #1
0
# load dataset
dataset = Task71Dataset("train", tokenizer=tokenizer)

collator_fn = Task71aCollatorFeatures(device='cpu')
loader = DataLoader(dataset,
                    batch_size=options.batch_size,
                    drop_last=False,
                    shuffle=True,
                    collate_fn=collator_fn)

# create model
encoder = BertModel.from_pretrained('bert-base-uncased')

# change config if you want
# encoder.config.output_hidden_states = True
model = BertClassificationHead(encoder,
                               encoder.config.hidden_size,
                               num_classes=2,
                               drop=0.2)
if options.modelckpt is not None:
    state_dict = torch.load(options.modelckpt, map_location='cpu')
    model.load_state_dict(state_dict)

model.to(DEVICE)

res_dict = get_features(loader, model, DEVICE)
if not os.path.exists('./features_train/'):
    os.makedirs('./features_train')
pickle.dump(res_dict, open("./features_train/bert_features.pkl", "wb"))
예제 #2
0
options = parser.parse_args()

# make transforms using only bert tokenizer!
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# load dataset
test_dataset = Task723Dataset("dev", tokenizer=tokenizer)

collator_fn = Task723CollatorTest(device='cpu')
test_loader = DataLoader(test_dataset,
                         batch_size=options.batch_size,
                         drop_last=False,
                         shuffle=True,
                         collate_fn=collator_fn)

# create model
model = BertModel.from_pretrained('bert-base-uncased')
model = BertClassificationHead(model,
                               model.config.hidden_size,
                               num_classes=2,
                               drop=0.2,
                               act='sigmoid')

if options.modelckpt is not None:
    state_dict = torch.load(options.modelckpt, map_location='cpu')
    model.load_state_dict(state_dict)

model.to(DEVICE)

create_submition_file(options.outfolder, model, test_loader, DEVICE)
예제 #3
0
                          shuffle=True,
                          collate_fn=collator_fn)
val_loader = DataLoader(val_dataset,
                        batch_size=options.batch_size,
                        drop_last=False,
                        shuffle=True,
                        collate_fn=collator_fn)

# create model
encoder = BertModel.from_pretrained('bert-base-uncased')

# change config if you want
# encoder.config.output_hidden_states = True
model = BertClassificationHead(encoder,
                               encoder.config.hidden_size,
                               act='relu',
                               num_classes=1,
                               drop=0.2)
if options.modelckpt is not None:
    state_dict = torch.load(options.modelckpt, map_location='cpu')
    model.load_state_dict(state_dict)

model.to(DEVICE)

# params and optimizer
numparams = sum([p.numel() for p in model.parameters()])
train_numparams = sum(
    [p.numel() for p in model.parameters() if p.requires_grad])
print('Total Parameters: {}'.format(numparams))
print('Trainable Parameters: {}'.format(train_numparams))
optimizer = Adam([p for p in model.parameters() if p.requires_grad],
예제 #4
0
val_loader = DataLoader(val_dataset,
                        batch_size=options.batch_size,
                        drop_last=False,
                        shuffle=True,
                        collate_fn=collator_fn)

# create model
if options.modelckpt is not None:
    model = BertModel.from_pretrained(options.modelckpt)
else:
    model = BertModel.from_pretrained('bert-base-uncased')

# change config if you want
# model.config.output_hidden_states = True
model = BertClassificationHead(model,
                               model.config.hidden_size,
                               num_classes=2,
                               drop=0.2)

model.to(DEVICE)

# params and optimizer
numparams = sum([p.numel() for p in model.parameters()])
train_numparams = sum(
    [p.numel() for p in model.parameters() if p.requires_grad])
print('Total Parameters: {}'.format(numparams))
print('Trainable Parameters: {}'.format(train_numparams))
optimizer = Adam([p for p in model.parameters() if p.requires_grad],
                 lr=options.lr,
                 weight_decay=1e-6)
criterion = nn.CrossEntropyLoss(ignore_index=-100)
metrics = ['f1-score', 'accuracy']