def __init__(self, dataset, config): self.config = config self.device = get_device() self.dataset = dataset self.test_loader = self.dataset.get_test_data_loaders() self.retrieval_config = config["Retrieval"] print("test dataset len: ", self.dataset.test_dataset.__len__()) if (config['model_name'] == 'ViT'): model = VisionTransformerSimCLR(config).to(self.device) else: model = BiTSimCLR(config).to(self.device) self.model = self._load_pre_trained_weights(model)
time_elapsed = time.time() - since print('Training {} complete in {:.0f}m {:.0f}s'.format( model_name, time_elapsed // 60, time_elapsed % 60)) print('Best test Top-1 Acc: {:4f}'.format(best_top1_acc)) print('Best test Top-5 Acc: {:4f}'.format(best_top5_acc)) # load best model weights model.load_state_dict(best_model_weights) return model, loss_dict, top1_acc_dict, top5_acc_dict if __name__ == '__main__': flops_params() device = util.get_device() # device = 'cpu' data_loaders, data_sizes = load_data('../data/pascal-voc') print(data_loaders) print(data_sizes) res_loss = dict() res_top1_acc = dict() res_top5_acc = dict() num_classes = 20 num_epochs = 100 for name in ['resnet-50', 'resnet-34', 'resnet-18']: if name == 'resnet-50': model = res_net.resnet50(num_classes=num_classes) elif name == 'resnet-34':
def main(config, bert_vocab_file, do_prediction=False): if not os.path.exists(config.output_dir): os.makedirs(config.output_dir) # --gpu_ids: [1,2,3]-- gpu_ids = [int(device_id) for device_id in config.gpu_ids.split(',')] print("gpu_ids:{}".format(gpu_ids)) device, n_gpu = get_device(gpu_ids[0]) if n_gpu > 1: n_gpu = len(gpu_ids) #label_list = ["0", "1"] criterion = nn.CrossEntropyLoss() criterion = criterion.to(device) if not do_prediction: # 数据准备 train_file = os.path.join(config.data_dir, "train.csv") dev_file = os.path.join(config.data_dir, "valid.csv") train_dataloader, train_len = load_data(train_file, config.batch_size, train=True) print("Num train_set: {}".format(train_len)) valid_train_dataloader, valid_train_len = load_data( train_file, config.batch_size) print("Num valid_train_set: {}".format(valid_train_len)) dev_dataloader, dev_len = load_data(dev_file, config.batch_size) print("Num dev_set: {}".format(dev_len)) num_train_steps = int(train_len / config.batch_size / config.gradient_accumulation_steps * config.num_train_epochs) if config.model_name == "BertOrigin": from BertOrigin.BertOrigin import BertOrigin model = BertOrigin(config, num_classes=2) model.to(device) if n_gpu > 1: model = nn.DataParallel(model, device_ids=gpu_ids) no_decay = ['bias', 'gamma', 'beta'] optimizer_parameters = [{ 'params': [ p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay) ], 'weight_decay_rate': 0.01 }, { 'params': [ p for n, p in model.named_parameters() if any(nd in n for nd in no_decay) ], 'weight_decay_rate': 0.0 }] optimizer = AdamW(optimizer_parameters, lr=config.learning_rate, betas=(0.9, 0.999), weight_decay=1e-8, correct_bias=False) # bert里的小技巧, bert里的learning rate是不断变化的,先往上升再往下降,这个scheduler就是用来设置这个 scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=config.num_warmup_steps, num_training_steps=num_train_steps) best_model_state_dict = train( config.num_train_epochs, n_gpu, train_dataloader, dev_dataloader, valid_train_dataloader, model, optimizer, criterion, config.gradient_accumulation_steps, config.max_grad_norm, device, scheduler, config.output_dir) torch.save(best_model_state_dict, config.best_model_file) else: print('---**Enter Test**---') #dev_dataloader, dev_examples, dev_features, dev_labels = dev[:-1] test_file = os.path.join(config.data_dir, "test.csv") test_dataloader, test_len = load_data(test_file, config.batch_size) print('Num test_set: {}'.format(test_len)) if config.model_name == "BertOrigin": from BertOrigin.BertOrigin import BertOrigin test_model = BertOrigin(config, num_classes=2) pretrained_model_dict = torch.load(config.best_model_file) new_state_dict = OrderedDict() for k, value in pretrained_model_dict.items(): #name = k[7:] # remove `module.` new_state_dict[k] = value test_model.load_state_dict(new_state_dict, strict=True) test_model.to(device) if n_gpu > 1: test_model = nn.DataParallel(test_model, device_ids=gpu_ids) test_acc, test_f1 = evaluate(test_model, test_dataloader, device) print(f'\t Acc: {test_acc*100: .3f}% | f1: {test_f1*100: .3f}%')