def __init__(self, dataset, config):

        self.config = config
        self.device = get_device()
        self.dataset = dataset
        self.test_loader = self.dataset.get_test_data_loaders()
        self.retrieval_config = config["Retrieval"]
        print("test dataset len: ", self.dataset.test_dataset.__len__())

        if (config['model_name'] == 'ViT'):
            model = VisionTransformerSimCLR(config).to(self.device)
        else:
            model = BiTSimCLR(config).to(self.device)

        self.model = self._load_pre_trained_weights(model)
Exemple #2
0
    time_elapsed = time.time() - since
    print('Training {} complete in {:.0f}m {:.0f}s'.format(
        model_name, time_elapsed // 60, time_elapsed % 60))
    print('Best test Top-1 Acc: {:4f}'.format(best_top1_acc))
    print('Best test Top-5 Acc: {:4f}'.format(best_top5_acc))

    # load best model weights
    model.load_state_dict(best_model_weights)
    return model, loss_dict, top1_acc_dict, top5_acc_dict


if __name__ == '__main__':
    flops_params()

    device = util.get_device()
    # device = 'cpu'

    data_loaders, data_sizes = load_data('../data/pascal-voc')
    print(data_loaders)
    print(data_sizes)

    res_loss = dict()
    res_top1_acc = dict()
    res_top5_acc = dict()
    num_classes = 20
    num_epochs = 100
    for name in ['resnet-50', 'resnet-34', 'resnet-18']:
        if name == 'resnet-50':
            model = res_net.resnet50(num_classes=num_classes)
        elif name == 'resnet-34':
def main(config, bert_vocab_file, do_prediction=False):

    if not os.path.exists(config.output_dir):
        os.makedirs(config.output_dir)

    # --gpu_ids: [1,2,3]--
    gpu_ids = [int(device_id) for device_id in config.gpu_ids.split(',')]
    print("gpu_ids:{}".format(gpu_ids))

    device, n_gpu = get_device(gpu_ids[0])

    if n_gpu > 1:
        n_gpu = len(gpu_ids)

    #label_list = ["0", "1"]

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.to(device)

    if not do_prediction:
        # 数据准备
        train_file = os.path.join(config.data_dir, "train.csv")
        dev_file = os.path.join(config.data_dir, "valid.csv")

        train_dataloader, train_len = load_data(train_file,
                                                config.batch_size,
                                                train=True)
        print("Num train_set: {}".format(train_len))

        valid_train_dataloader, valid_train_len = load_data(
            train_file, config.batch_size)
        print("Num valid_train_set: {}".format(valid_train_len))

        dev_dataloader, dev_len = load_data(dev_file, config.batch_size)
        print("Num dev_set: {}".format(dev_len))

        num_train_steps = int(train_len / config.batch_size /
                              config.gradient_accumulation_steps *
                              config.num_train_epochs)

        if config.model_name == "BertOrigin":
            from BertOrigin.BertOrigin import BertOrigin
            model = BertOrigin(config, num_classes=2)

        model.to(device)
        if n_gpu > 1:
            model = nn.DataParallel(model, device_ids=gpu_ids)

        no_decay = ['bias', 'gamma', 'beta']

        optimizer_parameters = [{
            'params': [
                p for n, p in model.named_parameters()
                if not any(nd in n for nd in no_decay)
            ],
            'weight_decay_rate':
            0.01
        }, {
            'params': [
                p for n, p in model.named_parameters()
                if any(nd in n for nd in no_decay)
            ],
            'weight_decay_rate':
            0.0
        }]

        optimizer = AdamW(optimizer_parameters,
                          lr=config.learning_rate,
                          betas=(0.9, 0.999),
                          weight_decay=1e-8,
                          correct_bias=False)

        # bert里的小技巧, bert里的learning rate是不断变化的,先往上升再往下降,这个scheduler就是用来设置这个
        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=config.num_warmup_steps,
            num_training_steps=num_train_steps)

        best_model_state_dict = train(
            config.num_train_epochs, n_gpu, train_dataloader, dev_dataloader,
            valid_train_dataloader, model, optimizer, criterion,
            config.gradient_accumulation_steps, config.max_grad_norm, device,
            scheduler, config.output_dir)

        torch.save(best_model_state_dict, config.best_model_file)

    else:
        print('---**Enter Test**---')

        #dev_dataloader, dev_examples, dev_features, dev_labels = dev[:-1]

        test_file = os.path.join(config.data_dir, "test.csv")
        test_dataloader, test_len = load_data(test_file, config.batch_size)

        print('Num test_set: {}'.format(test_len))

        if config.model_name == "BertOrigin":
            from BertOrigin.BertOrigin import BertOrigin
            test_model = BertOrigin(config, num_classes=2)

        pretrained_model_dict = torch.load(config.best_model_file)
        new_state_dict = OrderedDict()
        for k, value in pretrained_model_dict.items():
            #name = k[7:] # remove `module.`
            new_state_dict[k] = value

        test_model.load_state_dict(new_state_dict, strict=True)
        test_model.to(device)

        if n_gpu > 1:
            test_model = nn.DataParallel(test_model, device_ids=gpu_ids)

        test_acc, test_f1 = evaluate(test_model, test_dataloader, device)

        print(f'\t  Acc: {test_acc*100: .3f}% | f1: {test_f1*100: .3f}%')