Example #1
0
def main():
    torch.manual_seed(2020)
    # model
    model = Network(cfg)
    print('=> Load model')

    model.cuda()
    print('=> Cuda used')


    test_dataset = MyDataLoader(root=cfg.dataset, split="test")

    test_loader = DataLoader(test_dataset, batch_size=1,
                        num_workers=1, drop_last=True,shuffle=False)

    if args.mode == "test":
        assert isfile(cfg.resume), "No checkpoint is found at '{}'".format(cfg.resume)

        model.load_checkpoint()
        test(cfg, model, test_loader, save_dir = join(TMP_DIR, "test", "sing_scale_test"))

        if cfg.multi_aug:
            multiscale_test(model, test_loader, save_dir = join(TMP_DIR, "test", "multi_scale_test"))

    else:
        train_dataset = MyDataLoader(root=cfg.dataset, split="train", transform=True)

        train_loader = DataLoader(train_dataset, batch_size=cfg.batch_size,
                            num_workers=1, drop_last=True,shuffle=True)

        model.init_weight()

        if cfg.resume:
            model.load_checkpoint()

        model.train()

        # optimizer
        optim, scheduler = Optimizer(cfg)(model)

        # log
        log = Logger(join(TMP_DIR, "%s-%d-log.txt" %("sgd",cfg.lr)))
        sys.stdout = log

        train_loss = []
        train_loss_detail = []

        for epoch in range(0, cfg.max_epoch):

            tr_avg_loss, tr_detail_loss = train(cfg,
                train_loader, model, optim, scheduler, epoch,
                save_dir = join(TMP_DIR, "train", "epoch-%d-training-record" % epoch))

            test(cfg, model, test_loader, save_dir = join(TMP_DIR, "train", "epoch-%d-testing-record-view" % epoch))

            log.flush()

            train_loss.append(tr_avg_loss)
            train_loss_detail += tr_detail_loss
def main():
    model = Net()
    if torch.cuda.is_available():
        model.cuda()
    else:
        pass
    model.apply(weights_init)

    if args.resume:
        if isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}'"
                  .format(args.resume))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    # 数据处理
    # 直接在train里面处理
    # dataParser = DataParser(batch_size)
    loss_function = nn.L1Loss()
    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4)
    # train_scheduler = optim.lr_scheduler.MultiStepLR(optimizer,milestones=settings.MILESTONES,gamma=0.2)#learning rate decay
    scheduler = lr_scheduler.StepLR(optimizer, step_size=args.stepsize, gamma=args.gamma)

    log = Logger(join(TMP_DIR, '%s-%d-log.txt' % ('Adam', args.lr)))
    sys.stdout = log
    train_loss = []
    train_loss_detail = []

    for epoch in range(args.start_epoch, args.maxepoch):
        if epoch == 0:
            print("Performing initial testing...")
            # 暂时空着

        tr_avg_loss, tr_detail_loss = train(model = model,optimizer = optimizer,epoch= epoch,save_dir=join(TMP_DIR, 'epoch-%d-training-record' % epoch))
        test()

        log.flush()
        # Save checkpoint
        save_file = os.path.join(TMP_DIR, 'checkpoint_epoch{}.pth'.format(epoch))
        save_checkpoint({'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()})

        scheduler.step()  # 自动调整学习率
        train_loss.append(tr_avg_loss)
        train_loss_detail += tr_detail_loss
Example #3
0
def main():
    args.cuda = True
    model = NRCNN(4, 64)
    model.cuda()
    if isfile(args.resume):
        print("=> loading checkpoint '{}'".format(args.resume))
        checkpoint = torch.load(args.resume)
        model.load_state_dict(checkpoint['state_dict'])
        print("=> loaded checkpoint '{}'".format(args.resume))
    else:
        print("=> no checkpoint found at '{}'".format(args.resume))

    net = model

    log = Logger('test_result.txt')
    sys.stdout = log
    test(model)
    log.flush()
Example #4
0
def main():
    # model
    model = Extened_NRCNN(args.res_block, 64)
    model.cuda()
    #model.apply(weights_init)
    if args.resume:
        if isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}'".format(args.resume))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    #tune lr
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)
    scheduler = lr_scheduler.StepLR(optimizer,
                                    step_size=args.stepsize,
                                    gamma=args.gamma)

    # log
    if not isdir(args.save_path):
        os.makedirs(args.save_path)
    log = Logger(join(args.save_path, '%s-%d-log.txt' % ('sgd', args.lr)))
    sys.stdout = log

    for epoch in range(args.start_epoch, args.maxepoch):
        if epoch == 0:
            print("Performing initial testing...")

        train(trainloader,
              model,
              optimizer,
              epoch,
              save_dir=join(args.save_path,
                            'epoch-%d-training-record' % epoch))
        log.flush()  # write log
        scheduler.step()  # will adjust learning rate

    writer.close()
Example #5
0
def main():
    args.cuda = True
    # dataset
    train_dataset = BSDSLoader(root=args.dataset, split="train")
    test_dataset = BSDSLoader(root=args.dataset, split="test")
    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              num_workers=8,
                              drop_last=True,
                              shuffle=True)
    test_loader = DataLoader(test_dataset,
                             batch_size=args.batch_size,
                             num_workers=8,
                             drop_last=True,
                             shuffle=False)
    with open('data/HED-BSDS/test.lst', 'r') as f:
        test_list = f.readlines()
    test_list = [split(i.rstrip())[1] for i in test_list]
    assert len(test_list) == len(test_loader), "%d vs %d" % (len(test_list),
                                                             len(test_loader))

    # model
    model = HED()
    model.cuda()
    model.apply(weights_init)
    load_vgg16pretrain(model)

    if args.resume:
        if isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}'".format(args.resume))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    #tune lr
    net_parameters_id = {}
    net = model
    for pname, p in net.named_parameters():
        if pname in [
                'conv1_1.weight', 'conv1_2.weight', 'conv2_1.weight',
                'conv2_2.weight', 'conv3_1.weight', 'conv3_2.weight',
                'conv3_3.weight', 'conv4_1.weight', 'conv4_2.weight',
                'conv4_3.weight'
        ]:
            #print(pname, 'lr:1 de:1')
            if 'conv1-4.weight' not in net_parameters_id:
                net_parameters_id['conv1-4.weight'] = []
            net_parameters_id['conv1-4.weight'].append(p)
        elif pname in [
                'conv1_1.bias', 'conv1_2.bias', 'conv2_1.bias', 'conv2_2.bias',
                'conv3_1.bias', 'conv3_2.bias', 'conv3_3.bias', 'conv4_1.bias',
                'conv4_2.bias', 'conv4_3.bias'
        ]:
            #print(pname, 'lr:2 de:0')
            if 'conv1-4.bias' not in net_parameters_id:
                net_parameters_id['conv1-4.bias'] = []
            net_parameters_id['conv1-4.bias'].append(p)
        elif pname in ['conv5_1.weight', 'conv5_2.weight', 'conv5_3.weight']:
            #print(pname, 'lr:100 de:1')
            if 'conv5.weight' not in net_parameters_id:
                net_parameters_id['conv5.weight'] = []
            net_parameters_id['conv5.weight'].append(p)
        elif pname in ['conv5_1.bias', 'conv5_2.bias', 'conv5_3.bias']:
            #print(pname, 'lr:200 de:0')
            if 'conv5.bias' not in net_parameters_id:
                net_parameters_id['conv5.bias'] = []
            net_parameters_id['conv5.bias'].append(p)

        elif pname in [
                'score_dsn1.weight', 'score_dsn2.weight', 'score_dsn3.weight',
                'score_dsn4.weight', 'score_dsn5.weight'
        ]:
            #print(pname, 'lr:0.01 de:1')
            if 'score_dsn_1-5.weight' not in net_parameters_id:
                net_parameters_id['score_dsn_1-5.weight'] = []
            net_parameters_id['score_dsn_1-5.weight'].append(p)
        elif pname in [
                'score_dsn1.bias', 'score_dsn2.bias', 'score_dsn3.bias',
                'score_dsn4.bias', 'score_dsn5.bias'
        ]:
            #print(pname, 'lr:0.02 de:0')
            if 'score_dsn_1-5.bias' not in net_parameters_id:
                net_parameters_id['score_dsn_1-5.bias'] = []
            net_parameters_id['score_dsn_1-5.bias'].append(p)
        elif pname in ['score_final.weight']:
            #print(pname, 'lr:0.001 de:1')
            if 'score_final.weight' not in net_parameters_id:
                net_parameters_id['score_final.weight'] = []
            net_parameters_id['score_final.weight'].append(p)
        elif pname in ['score_final.bias']:
            #print(pname, 'lr:0.002 de:0')
            if 'score_final.bias' not in net_parameters_id:
                net_parameters_id['score_final.bias'] = []
            net_parameters_id['score_final.bias'].append(p)

    optimizer = torch.optim.SGD([
        {
            'params': net_parameters_id['conv1-4.weight'],
            'lr': args.lr * 1,
            'weight_decay': args.weight_decay
        },
        {
            'params': net_parameters_id['conv1-4.bias'],
            'lr': args.lr * 2,
            'weight_decay': 0.
        },
        {
            'params': net_parameters_id['conv5.weight'],
            'lr': args.lr * 100,
            'weight_decay': args.weight_decay
        },
        {
            'params': net_parameters_id['conv5.bias'],
            'lr': args.lr * 200,
            'weight_decay': 0.
        },
        {
            'params': net_parameters_id['score_dsn_1-5.weight'],
            'lr': args.lr * 0.01,
            'weight_decay': args.weight_decay
        },
        {
            'params': net_parameters_id['score_dsn_1-5.bias'],
            'lr': args.lr * 0.02,
            'weight_decay': 0.
        },
        {
            'params': net_parameters_id['score_final.weight'],
            'lr': args.lr * 0.001,
            'weight_decay': args.weight_decay
        },
        {
            'params': net_parameters_id['score_final.bias'],
            'lr': args.lr * 0.002,
            'weight_decay': 0.
        },
    ],
                                lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    scheduler = lr_scheduler.StepLR(optimizer,
                                    step_size=args.stepsize,
                                    gamma=args.gamma)

    # log
    log = Logger(join(TMP_DIR, '%s-%d-log.txt' % ('Adam', args.lr)))
    sys.stdout = log

    train_loss = []
    train_loss_detail = []
    for epoch in range(args.start_epoch, args.maxepoch):
        #if epoch == 0:
        #   print("Performing initial testing...")
        #  test(model, test_loader, epoch=epoch, test_list=test_list,
        #      save_dir = join(TMP_DIR, 'initial-testing-record'))

        tr_avg_loss, tr_detail_loss = train(
            train_loader,
            model,
            optimizer,
            epoch,
            save_dir=join(TMP_DIR, 'epoch-%d-training-record' % epoch))
        test(model,
             test_loader,
             epoch=epoch,
             test_list=test_list,
             save_dir=join(TMP_DIR, 'epoch-%d-testing-record' % epoch))
        log.flush()  # write log
        # Save checkpoint
        save_file = os.path.join(TMP_DIR,
                                 'checkpoint_epoch{}.pth'.format(epoch))
        save_checkpoint(
            {
                'epoch': epoch,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict()
            },
            filename=save_file)
        scheduler.step()  # will adjust learning rate
        # save train/val loss/accuracy, save every epoch in case of early stop
        train_loss.append(tr_avg_loss)
        train_loss_detail += tr_detail_loss
Example #6
0
def main():
    parser = argparse.ArgumentParser()

    # Settings
    parser.add_argument('-d',
                        '--dataset',
                        choices=dataset_attributes.keys(),
                        required=True)
    parser.add_argument('-s',
                        '--shift_type',
                        choices=shift_types,
                        required=True)
    # Confounders
    parser.add_argument('-t', '--target_name')
    parser.add_argument('-c', '--confounder_names', nargs='+')
    # Resume?
    parser.add_argument('--resume', default=False, action='store_true')
    # Label shifts
    parser.add_argument('--minority_fraction', type=float)
    parser.add_argument('--imbalance_ratio', type=float)
    # Data
    parser.add_argument('--fraction', type=float, default=1.0)
    parser.add_argument('--root_dir', default=None)
    parser.add_argument('--subsample_to_minority',
                        action='store_true',
                        default=False)
    parser.add_argument('--reweight_groups',
                        action='store_true',
                        default=False)
    parser.add_argument('--augment_data', action='store_true', default=False)
    parser.add_argument('--val_fraction', type=float, default=0.1)
    # Objective
    parser.add_argument('--robust', default=False, action='store_true')
    parser.add_argument('--alpha', type=float, default=0.2)
    parser.add_argument('--generalization_adjustment', default="0.0")
    parser.add_argument('--automatic_adjustment',
                        default=False,
                        action='store_true')
    parser.add_argument('--robust_step_size', default=0.01, type=float)
    parser.add_argument('--use_normalized_loss',
                        default=False,
                        action='store_true')
    parser.add_argument('--btl', default=False, action='store_true')
    parser.add_argument('--hinge', default=False, action='store_true')

    # Model
    parser.add_argument('--model',
                        choices=model_attributes.keys(),
                        default='resnet50')
    parser.add_argument('--train_from_scratch',
                        action='store_true',
                        default=False)
    parser.add_argument('--resnet_width', type=int, default=None)

    # Optimization
    parser.add_argument('--n_epochs', type=int, default=4)
    parser.add_argument('--batch_size', type=int, default=32)
    parser.add_argument('--lr', type=float, default=0.001)
    parser.add_argument('--scheduler', action='store_true', default=False)
    parser.add_argument('--weight_decay', type=float, default=5e-5)
    parser.add_argument('--gamma', type=float, default=0.1)
    parser.add_argument('--minimum_variational_weight', type=float, default=0)
    # Misc
    parser.add_argument('--seed', type=int, default=0)
    parser.add_argument('--show_progress', default=False, action='store_true')
    parser.add_argument('--log_dir', default='./logs')
    parser.add_argument('--log_every', default=50, type=int)
    parser.add_argument('--save_step', type=int, default=10)
    parser.add_argument('--save_best', action='store_true', default=False)
    parser.add_argument('--save_last', action='store_true', default=False)
    parser.add_argument('--model_test', type=str)
    parser.add_argument('--gpu', type=str)

    args = parser.parse_args()
    check_args(args)
    model_test = args.model_test
    gpu = args.gpu
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu

    # BERT-specific configs copied over from run_glue.py
    if args.model == 'bert':
        args.max_grad_norm = 1.0
        args.adam_epsilon = 1e-8
        args.warmup_steps = 0

    if os.path.exists(args.log_dir) and args.resume:
        resume = True
        mode = 'a'
    else:
        resume = False
        mode = 'w'

    ## Initialize logs
    if not os.path.exists(args.log_dir):
        os.makedirs(args.log_dir)

    logger = Logger(os.path.join(args.log_dir, model_test + '_log.txt'), mode)
    # Record args
    log_args(args, logger)

    set_seed(args.seed)

    # Data
    # Test data for label_shift_step is not implemented yet
    test_data = None
    test_loader = None
    if args.shift_type == 'confounder':
        train_data, val_data, test_data = prepare_data(args, train=True)
    elif args.shift_type == 'label_shift_step':
        train_data, val_data = prepare_data(args, train=True)

    loader_kwargs = {
        'batch_size': args.batch_size,
        'num_workers': 12,
        'pin_memory': True
    }
    train_loader = train_data.get_loader(train=True,
                                         reweight_groups=args.reweight_groups,
                                         **loader_kwargs)
    val_loader = val_data.get_loader(train=False,
                                     reweight_groups=None,
                                     **loader_kwargs)
    if test_data is not None:
        test_loader = test_data.get_loader(train=False,
                                           reweight_groups=None,
                                           **loader_kwargs)

    data = {}
    data['train_loader'] = train_loader
    data['val_loader'] = val_loader
    data['test_loader'] = test_loader
    data['train_data'] = train_data
    data['val_data'] = val_data
    data['test_data'] = test_data
    n_classes = train_data.n_classes

    log_data(data, logger)

    ## Initialize model
    pretrained = not args.train_from_scratch
    if resume:
        model = torch.load(os.path.join(args.log_dir, model_test))
        d = train_data.input_size()[0]
    elif model_attributes[args.model]['feature_type'] in ('precomputed',
                                                          'raw_flattened'):
        assert pretrained
        # Load precomputed features
        d = train_data.input_size()[0]
        model = nn.Linear(d, n_classes)
        model.has_aux_logits = False
    elif args.model == 'resnet50':
        model = torchvision.models.resnet50(pretrained=pretrained)
        d = model.fc.in_features
        model.fc = nn.Linear(d, n_classes)
    elif args.model == 'resnet34':
        model = torchvision.models.resnet34(pretrained=pretrained)
        d = model.fc.in_features
        model.fc = nn.Linear(d, n_classes)
    elif args.model == 'wideresnet50':
        model = torchvision.models.wide_resnet50_2(pretrained=pretrained)
        d = model.fc.in_features
        model.fc = nn.Linear(d, n_classes)
    elif args.model == 'resnet50vw':
        assert not pretrained
        assert args.resnet_width is not None
        model = resnet50vw(args.resnet_width, num_classes=n_classes)
    elif args.model == 'resnet18vw':
        assert not pretrained
        assert args.resnet_width is not None
        model = resnet18vw(args.resnet_width, num_classes=n_classes)
    elif args.model == 'resnet10vw':
        assert not pretrained
        assert args.resnet_width is not None
        model = resnet10vw(args.resnet_width, num_classes=n_classes)
    elif args.model == 'bert':
        assert args.dataset == 'MultiNLI'

        from pytorch_transformers import BertConfig, BertForSequenceClassification
        config_class = BertConfig
        model_class = BertForSequenceClassification

        config = config_class.from_pretrained('bert-base-uncased',
                                              num_labels=3,
                                              finetuning_task='mnli')
        model = model_class.from_pretrained('bert-base-uncased',
                                            from_tf=False,
                                            config=config)
    else:
        raise ValueError('Model not recognized.')

    logger.flush()

    ## Define the objective
    if args.hinge:
        assert args.dataset in ['CelebA', 'CUB']  # Only supports binary

        def hinge_loss(yhat, y):
            # The torch loss takes in three arguments so we need to split yhat
            # It also expects classes in {+1.0, -1.0} whereas by default we give them in {0, 1}
            # Furthermore, if y = 1 it expects the first input to be higher instead of the second,
            # so we need to swap yhat[:, 0] and yhat[:, 1]...
            torch_loss = torch.nn.MarginRankingLoss(margin=1.0,
                                                    reduction='none')
            y = (y.float() * 2.0) - 1.0
            return torch_loss(yhat[:, 1], yhat[:, 0], y)

        criterion = hinge_loss
    else:
        criterion = torch.nn.CrossEntropyLoss(reduction='none')

    if False:
        df = pd.read_csv(os.path.join(args.log_dir, 'test.csv'))
        epoch_offset = df.loc[len(df) - 1, 'epoch'] + 1
        logger.write(f'starting from epoch {epoch_offset}')
    else:
        epoch_offset = 0
    train_csv_logger = CSVBatchLogger(os.path.join(args.log_dir, 'train.csv'),
                                      train_data.n_groups,
                                      mode=mode)
    val_csv_logger = CSVBatchLogger(os.path.join(args.log_dir, 'val.csv'),
                                    train_data.n_groups,
                                    mode=mode)
    test_csv_logger = CSVBatchLogger(os.path.join(args.log_dir, 'test.csv'),
                                     train_data.n_groups,
                                     mode=mode)

    train(model,
          criterion,
          data,
          logger,
          train_csv_logger,
          val_csv_logger,
          test_csv_logger,
          args,
          epoch_offset=epoch_offset)

    train_csv_logger.close()
    val_csv_logger.close()
    test_csv_logger.close()
Example #7
0
def main():
    ################################################
    # I. Miscellaneous.
    ################################################
    # Create the output directory.
    current_dir = abspath(dirname(__file__))
    output_dir = join(current_dir, args.output)
    if not isdir(output_dir):
        os.makedirs(output_dir)

    # Set logger.
    now_str = datetime.now().strftime('%y%m%d-%H%M%S')
    log = Logger(join(output_dir, 'log-{}.txt'.format(now_str)))
    sys.stdout = log  # Overwrite the standard output.

    ################################################
    # II. Datasets.
    ################################################
    # Datasets and dataloaders.
    train_dataset = BsdsDataset(dataset_dir=args.dataset, split='train')
    test_dataset = BsdsDataset(dataset_dir=args.dataset, split='test')
    train_loader = DataLoader(train_dataset,
                              batch_size=args.train_batch_size,
                              num_workers=4,
                              drop_last=True,
                              shuffle=True)
    test_loader = DataLoader(test_dataset,
                             batch_size=args.test_batch_size,
                             num_workers=4,
                             drop_last=False,
                             shuffle=False)

    ################################################
    # III. Network and optimizer.
    ################################################
    # Create the network in GPU.
    net = nn.DataParallel(HED(device))
    net.to(device)

    # Initialize the weights for HED model.
    def weights_init(m):
        """ Weight initialization function. """
        if isinstance(m, nn.Conv2d):
            # Initialize: m.weight.
            if m.weight.data.shape == torch.Size([1, 5, 1, 1]):
                # Constant initialization for fusion layer in HED network.
                torch.nn.init.constant_(m.weight, 0.2)
            else:
                # Zero initialization following official repository.
                # Reference: hed/docs/tutorial/layers.md
                m.weight.data.zero_()
            # Initialize: m.bias.
            if m.bias is not None:
                # Zero initialization.
                m.bias.data.zero_()

    net.apply(weights_init)

    # Optimizer settings.
    net_parameters_id = defaultdict(list)
    for name, param in net.named_parameters():
        if name in [
                'module.conv1_1.weight', 'module.conv1_2.weight',
                'module.conv2_1.weight', 'module.conv2_2.weight',
                'module.conv3_1.weight', 'module.conv3_2.weight',
                'module.conv3_3.weight', 'module.conv4_1.weight',
                'module.conv4_2.weight', 'module.conv4_3.weight'
        ]:
            print('{:26} lr:    1 decay:1'.format(name))
            net_parameters_id['conv1-4.weight'].append(param)
        elif name in [
                'module.conv1_1.bias', 'module.conv1_2.bias',
                'module.conv2_1.bias', 'module.conv2_2.bias',
                'module.conv3_1.bias', 'module.conv3_2.bias',
                'module.conv3_3.bias', 'module.conv4_1.bias',
                'module.conv4_2.bias', 'module.conv4_3.bias'
        ]:
            print('{:26} lr:    2 decay:0'.format(name))
            net_parameters_id['conv1-4.bias'].append(param)
        elif name in [
                'module.conv5_1.weight', 'module.conv5_2.weight',
                'module.conv5_3.weight'
        ]:
            print('{:26} lr:  100 decay:1'.format(name))
            net_parameters_id['conv5.weight'].append(param)
        elif name in [
                'module.conv5_1.bias', 'module.conv5_2.bias',
                'module.conv5_3.bias'
        ]:
            print('{:26} lr:  200 decay:0'.format(name))
            net_parameters_id['conv5.bias'].append(param)
        elif name in [
                'module.score_dsn1.weight', 'module.score_dsn2.weight',
                'module.score_dsn3.weight', 'module.score_dsn4.weight',
                'module.score_dsn5.weight'
        ]:
            print('{:26} lr: 0.01 decay:1'.format(name))
            net_parameters_id['score_dsn_1-5.weight'].append(param)
        elif name in [
                'module.score_dsn1.bias', 'module.score_dsn2.bias',
                'module.score_dsn3.bias', 'module.score_dsn4.bias',
                'module.score_dsn5.bias'
        ]:
            print('{:26} lr: 0.02 decay:0'.format(name))
            net_parameters_id['score_dsn_1-5.bias'].append(param)
        elif name in ['module.score_final.weight']:
            print('{:26} lr:0.001 decay:1'.format(name))
            net_parameters_id['score_final.weight'].append(param)
        elif name in ['module.score_final.bias']:
            print('{:26} lr:0.002 decay:0'.format(name))
            net_parameters_id['score_final.bias'].append(param)

    # Create optimizer.
    opt = torch.optim.SGD([
        {
            'params': net_parameters_id['conv1-4.weight'],
            'lr': args.lr * 1,
            'weight_decay': args.weight_decay
        },
        {
            'params': net_parameters_id['conv1-4.bias'],
            'lr': args.lr * 2,
            'weight_decay': 0.
        },
        {
            'params': net_parameters_id['conv5.weight'],
            'lr': args.lr * 100,
            'weight_decay': args.weight_decay
        },
        {
            'params': net_parameters_id['conv5.bias'],
            'lr': args.lr * 200,
            'weight_decay': 0.
        },
        {
            'params': net_parameters_id['score_dsn_1-5.weight'],
            'lr': args.lr * 0.01,
            'weight_decay': args.weight_decay
        },
        {
            'params': net_parameters_id['score_dsn_1-5.bias'],
            'lr': args.lr * 0.02,
            'weight_decay': 0.
        },
        {
            'params': net_parameters_id['score_final.weight'],
            'lr': args.lr * 0.001,
            'weight_decay': args.weight_decay
        },
        {
            'params': net_parameters_id['score_final.bias'],
            'lr': args.lr * 0.002,
            'weight_decay': 0.
        },
    ],
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    # Note: In train_val.prototxt and deploy.prototxt, the learning rates of score_final.weight/bias are different.

    # Learning rate scheduler.
    lr_schd = lr_scheduler.StepLR(opt,
                                  step_size=args.lr_stepsize,
                                  gamma=args.lr_gamma)

    ################################################
    # IV. Pre-trained parameters.
    ################################################
    # Load parameters from pre-trained VGG-16 Caffe model.
    if args.vgg16_caffe:
        load_vgg16_caffe(net, args.vgg16_caffe)

    # Resume the checkpoint.
    if args.checkpoint:
        load_checkpoint(net, opt, args.checkpoint)  # Omit the returned values.

    # Resume the HED Caffe model.
    if args.caffe_model:
        load_pretrained_caffe(net, args.caffe_model)

    ################################################
    # V. Training / testing.
    ################################################
    if args.test is True:
        # Only test.
        test(test_loader, net, save_dir=join(output_dir, 'test'))
    else:
        # Train.
        train_epoch_losses = []
        for epoch in range(args.max_epoch):
            # Initial test.
            if epoch == 0:
                print('Initial test...')
                test(test_loader,
                     net,
                     save_dir=join(output_dir, 'initial-test'))
            # Epoch training and test.
            train_epoch_loss = \
                train(train_loader, net, opt, lr_schd, epoch, save_dir=join(output_dir, 'epoch-{}-train'.format(epoch)))
            test(test_loader,
                 net,
                 save_dir=join(output_dir, 'epoch-{}-test'.format(epoch)))
            # Write log.
            log.flush()
            # Save checkpoint.
            save_checkpoint(state={
                'net': net.state_dict(),
                'opt': opt.state_dict(),
                'epoch': epoch
            },
                            path=os.path.join(
                                output_dir,
                                'epoch-{}-checkpoint.pt'.format(epoch)))
            # Collect losses.
            train_epoch_losses.append(train_epoch_loss)
Example #8
0
def runTrain(sess,d,rnn,msg):
    sess.run(tf.global_variables_initializer()) 
    
    experiment='{}_{}_{}'.format(rnn.name,datanum,Time.now())
    model_path="model/{}".format(experiment)
    log_path="SAVE_Logs/{}.txt".format(experiment)
    stat_path="SAVE_Logs/{}.stat".format(experiment)
        
    logger=Logger(log_path)
    stat={"tests":0}
    stat_lowAbs={"dist":100}
        
    total_number_of_batch=0
    for number in trainRange:
        total_number_of_batch+=d[number].numberBatch
        
    total_number_of_batch_test=0
    for number in testRange:
        total_number_of_batch_test+=d[number].numberBatch
            
    num_epoch=100
    totalTime=Time()
    for curr_epoch in range(0,num_epoch):
        cost_sum=0
        test_cost_sum=0
        trainTime=Time()
        for number in trainRange:
            for index in range(d[number].numberBatch):
                cost,_=rnn.Train(d[number]._MFCC[index],d[number]._LABEL[index],0.8)
                cost_sum+=cost
        
        avg_cost=cost_sum/total_number_of_batch    
        acc1=0.0
        acc0=0.0
        for number in trainRange:
            for index in range(d[number].numberBatch):
                ac1,ac0=rnn.Accuracy(d[number]._MFCC[index],d[number]._LABEL[index])            
                acc1+=ac1
                acc0+=ac0
        avg_train_accuracy= (acc1/total_number_of_batch+acc0/total_number_of_batch)/2
       
        acc1=0.0
        acc0=0.0
        test_cost_sum=0
        resultMatrix=np.zeros([2,2],int)
        for number in testRange:
            for index in range(d[number].numberBatch):
                ac1,ac0=rnn.Accuracy(d[number]._MFCC[index],d[number]._LABEL[index])
                test_cost_sum+=rnn.Cost(d[number]._MFCC[index],d[number]._LABEL[index])
                resultMatrix+=rnn.return_ResultMatrix(d[number]._MFCC[index],d[number]._LABEL[index])
                acc1+=ac1
                acc0+=ac0
        avg_test_accuracy= (acc1/total_number_of_batch_test+acc0/total_number_of_batch_test)/2
        test_distance=np.abs(acc1/total_number_of_batch_test-acc0/total_number_of_batch_test)
        avg_test_cost=test_cost_sum/total_number_of_batch_test
        
        if(avg_test_accuracy>stat["tests"]):
            stat['tests']=avg_test_accuracy
            stat['trains']=avg_train_accuracy
            stat['epoch']=curr_epoch
            stat['cost']=avg_cost
            stat['traincost']=avg_test_cost
            stat['resultMatrix']=resultMatrix
            stat['dist']=test_distance
            rnn.Save(model_path)
    
        if(test_distance<stat_lowAbs['dist']):
            stat_lowAbs['tests']=avg_test_accuracy
            stat_lowAbs['trains']=avg_train_accuracy
            stat_lowAbs['epoch']=curr_epoch
            stat_lowAbs['cost']=avg_cost
            stat_lowAbs['traincost']=avg_test_cost
            stat_lowAbs['resultMatrix']=resultMatrix
            stat_lowAbs['dist']=test_distance
            rnn.Save(model_path+'lowdist')
    
        log="Epoch {}/{}, l_rate:{:.10f}, cost = {:>7.4f},train cost={:>7.4f}, accracy(train,test/best):({:.4f}, {:.4f}/{:.4f}), test_distance ={:.4f} ,time = {}/{}\n".format(
        		    curr_epoch, num_epoch, rnn.learning_rate,avg_cost,avg_test_cost,
        			avg_train_accuracy,avg_test_accuracy,stat['tests'],test_distance ,trainTime.duration(), totalTime.duration())
        logger.write(log)
    summary ="""
    {}.{}.{}
            learning_rate : {} train_data_ratio : {}  num_epoch : {}  batch_size : {}   windowsize : {} windowshift : {}		
            Best evaulation based on test_data  :  Accuracy_train  : {}    Accuracy_test :  {}  at epoch :{}
            Best evaulation based on test_data at lowest distance : Accuracy_train  : {}    Accuracy_test :  {} at epoch :{} \n
            best Result Matrix : \n{}{}\n
            best Reuslt Matrix at lowest distance : \n{}{}\n
            """.format(
        	rnn.name,experiment,msg,
        	rnn.learning_rate, train_rate, num_epoch,a.batch_size,a.windowsize,a.windowstep,		
        					stat["trains"],stat["tests"],stat['epoch'],stat_lowAbs['trains'],stat_lowAbs['tests'],stat_lowAbs['epoch'],
                            stat['resultMatrix'],matrixAccuracy(stat['resultMatrix']),stat_lowAbs['resultMatrix'],matrixAccuracy(stat_lowAbs['resultMatrix']))
    print(summary)
    logger.flush()
    logger.close()  
        
    plot_static(log_path)

    with open("SAVE_Logs/log.txt","a") as f:
        f.write(summary)
Example #9
0
def main():
    args.cuda = True
    # dataset
    train_dataset = BSDS_RCFLoader(root=args.dataset, split="train")
    test_dataset = BSDS_RCFLoader(root=args.dataset + "/HED-BSDS",
                                  split="test")
    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              num_workers=8,
                              drop_last=True,
                              shuffle=True)
    test_loader = DataLoader(test_dataset,
                             batch_size=args.batch_size,
                             num_workers=8,
                             drop_last=True,
                             shuffle=False)

    # model
    model = RCF()
    model.cuda()
    model.apply(weights_init)
    load_vgg16pretrain(model)
    if args.resume:
        if isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}'".format(args.resume))
        else:
            raise Exception()
    else:
        raise Exception()

    #tune lr
    net_parameters_id = {}
    net = model
    for pname, p in net.named_parameters():
        if pname in [
                'conv1_1.weight', 'conv1_2.weight', 'conv2_1.weight',
                'conv2_2.weight', 'conv3_1.weight', 'conv3_2.weight',
                'conv3_3.weight', 'conv4_1.weight', 'conv4_2.weight',
                'conv4_3.weight'
        ]:
            print(pname, 'lr:1 de:1')
            if 'conv1-4.weight' not in net_parameters_id:
                net_parameters_id['conv1-4.weight'] = []
            net_parameters_id['conv1-4.weight'].append(p)
        elif pname in [
                'conv1_1.bias', 'conv1_2.bias', 'conv2_1.bias', 'conv2_2.bias',
                'conv3_1.bias', 'conv3_2.bias', 'conv3_3.bias', 'conv4_1.bias',
                'conv4_2.bias', 'conv4_3.bias'
        ]:
            print(pname, 'lr:2 de:0')
            if 'conv1-4.bias' not in net_parameters_id:
                net_parameters_id['conv1-4.bias'] = []
            net_parameters_id['conv1-4.bias'].append(p)
        elif pname in ['conv5_1.weight', 'conv5_2.weight', 'conv5_3.weight']:
            print(pname, 'lr:100 de:1')
            if 'conv5.weight' not in net_parameters_id:
                net_parameters_id['conv5.weight'] = []
            net_parameters_id['conv5.weight'].append(p)
        elif pname in ['conv5_1.bias', 'conv5_2.bias', 'conv5_3.bias']:
            print(pname, 'lr:200 de:0')
            if 'conv5.bias' not in net_parameters_id:
                net_parameters_id['conv5.bias'] = []
            net_parameters_id['conv5.bias'].append(p)
        elif pname in [
                'conv1_1_down.weight', 'conv1_2_down.weight',
                'conv2_1_down.weight', 'conv2_2_down.weight',
                'conv3_1_down.weight', 'conv3_2_down.weight',
                'conv3_3_down.weight', 'conv4_1_down.weight',
                'conv4_2_down.weight', 'conv4_3_down.weight',
                'conv5_1_down.weight', 'conv5_2_down.weight',
                'conv5_3_down.weight'
        ]:
            print(pname, 'lr:0.1 de:1')
            if 'conv_down_1-5.weight' not in net_parameters_id:
                net_parameters_id['conv_down_1-5.weight'] = []
            net_parameters_id['conv_down_1-5.weight'].append(p)
        elif pname in [
                'conv1_1_down.bias', 'conv1_2_down.bias', 'conv2_1_down.bias',
                'conv2_2_down.bias', 'conv3_1_down.bias', 'conv3_2_down.bias',
                'conv3_3_down.bias', 'conv4_1_down.bias', 'conv4_2_down.bias',
                'conv4_3_down.bias', 'conv5_1_down.bias', 'conv5_2_down.bias',
                'conv5_3_down.bias'
        ]:
            print(pname, 'lr:0.2 de:0')
            if 'conv_down_1-5.bias' not in net_parameters_id:
                net_parameters_id['conv_down_1-5.bias'] = []
            net_parameters_id['conv_down_1-5.bias'].append(p)
        elif pname in [
                'score_dsn1.weight', 'score_dsn2.weight', 'score_dsn3.weight',
                'score_dsn4.weight', 'score_dsn5.weight'
        ]:
            print(pname, 'lr:0.01 de:1')
            if 'score_dsn_1-5.weight' not in net_parameters_id:
                net_parameters_id['score_dsn_1-5.weight'] = []
            net_parameters_id['score_dsn_1-5.weight'].append(p)
        elif pname in [
                'score_dsn1.bias', 'score_dsn2.bias', 'score_dsn3.bias',
                'score_dsn4.bias', 'score_dsn5.bias'
        ]:
            print(pname, 'lr:0.02 de:0')
            if 'score_dsn_1-5.bias' not in net_parameters_id:
                net_parameters_id['score_dsn_1-5.bias'] = []
            net_parameters_id['score_dsn_1-5.bias'].append(p)
        elif pname in ['score_final.weight']:
            print(pname, 'lr:0.001 de:1')
            if 'score_final.weight' not in net_parameters_id:
                net_parameters_id['score_final.weight'] = []
            net_parameters_id['score_final.weight'].append(p)
        elif pname in ['score_final.bias']:
            print(pname, 'lr:0.002 de:0')
            if 'score_final.bias' not in net_parameters_id:
                net_parameters_id['score_final.bias'] = []
            net_parameters_id['score_final.bias'].append(p)

    optimizer = torch.optim.SGD([
        {
            'params': net_parameters_id['conv1-4.weight'],
            'lr': args.lr * 1,
            'weight_decay': args.weight_decay
        },
        {
            'params': net_parameters_id['conv1-4.bias'],
            'lr': args.lr * 2,
            'weight_decay': 0.
        },
        {
            'params': net_parameters_id['conv5.weight'],
            'lr': args.lr * 100,
            'weight_decay': args.weight_decay
        },
        {
            'params': net_parameters_id['conv5.bias'],
            'lr': args.lr * 200,
            'weight_decay': 0.
        },
        {
            'params': net_parameters_id['conv_down_1-5.weight'],
            'lr': args.lr * 0.1,
            'weight_decay': args.weight_decay
        },
        {
            'params': net_parameters_id['conv_down_1-5.bias'],
            'lr': args.lr * 0.2,
            'weight_decay': 0.
        },
        {
            'params': net_parameters_id['score_dsn_1-5.weight'],
            'lr': args.lr * 0.01,
            'weight_decay': args.weight_decay
        },
        {
            'params': net_parameters_id['score_dsn_1-5.bias'],
            'lr': args.lr * 0.02,
            'weight_decay': 0.
        },
        {
            'params': net_parameters_id['score_final.weight'],
            'lr': args.lr * 0.001,
            'weight_decay': args.weight_decay
        },
        {
            'params': net_parameters_id['score_final.bias'],
            'lr': args.lr * 0.002,
            'weight_decay': 0.
        },
    ],
                                lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    scheduler = lr_scheduler.StepLR(optimizer,
                                    step_size=args.stepsize,
                                    gamma=args.gamma)

    # log
    log = Logger(join(TMP_DIR, '%s-%d-log.txt' % ('sgd', args.lr)))
    sys.stdout = log

    for epoch in range(args.start_epoch, args.maxepoch):

        tr_avg_loss, tr_detail_loss = train(
            train_loader,
            model,
            optimizer,
            epoch,
            save_dir=join(TMP_DIR, 'epoch-%d-training-record' % epoch))

        # with torch.no_grad():
        #     # test(model, test_loader, epoch=epoch,
        #     #     save_dir = join(TMP_DIR, 'epoch-%d-testing-record-view' % epoch))

        #     # multiscale_test(model, test_loader, epoch=epoch,
        #     #     save_dir = join(TMP_DIR, 'epoch-%d-testing-record' % epoch))

        log.flush()  # write log

        # Save checkpoint
        save_file = os.path.join(TMP_DIR, 'checkpoint.pth')
        save_checkpoint(
            {
                'epoch': epoch,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict()
            },
            filename=save_file)

        scheduler.step()  # will adjust learning rate
Example #10
0
def main():
    parser = argparse.ArgumentParser()

    # Settings
    parser.add_argument('-d',
                        '--dataset',
                        choices=dataset_attributes.keys(),
                        required=True)
    parser.add_argument('-s',
                        '--shift_type',
                        choices=shift_types,
                        required=True)
    # Confounders
    parser.add_argument('-t', '--target_name')
    parser.add_argument('-c', '--confounder_names', nargs='+')
    # Resume?
    parser.add_argument('--resume', default=False, action='store_true')
    # Label shifts
    parser.add_argument('--minority_fraction', type=float)
    parser.add_argument('--imbalance_ratio', type=float)
    # Data
    parser.add_argument('--fraction', type=float, default=1.0)
    parser.add_argument('--root_dir', default=None)
    parser.add_argument('--subsample_to_minority',
                        action='store_true',
                        default=False)
    parser.add_argument('--reweight_groups',
                        action='store_true',
                        default=False)
    parser.add_argument('--augment_data', action='store_true', default=False)
    parser.add_argument('--val_fraction', type=float, default=0.1)
    # Objective
    parser.add_argument('--robust', default=False, action='store_true')
    parser.add_argument('--alpha', type=float, default=0.2)
    parser.add_argument('--generalization_adjustment', default="0.0")
    parser.add_argument('--automatic_adjustment',
                        default=False,
                        action='store_true')
    parser.add_argument('--robust_step_size', default=0.01, type=float)
    parser.add_argument('--use_normalized_loss',
                        default=False,
                        action='store_true')
    parser.add_argument('--btl', default=False, action='store_true')
    parser.add_argument('--hinge', default=False, action='store_true')

    # Model
    parser.add_argument('--model',
                        choices=model_attributes.keys(),
                        default='resnet50')
    parser.add_argument('--train_from_scratch',
                        action='store_true',
                        default=False)
    parser.add_argument('--resnet_width', type=int, default=None)

    # Optimization
    parser.add_argument('--n_epochs', type=int, default=4)
    parser.add_argument('--batch_size', type=int, default=32)
    parser.add_argument('--lr', type=float, default=0.001)
    parser.add_argument('--scheduler', action='store_true', default=False)
    parser.add_argument('--weight_decay', type=float, default=5e-5)
    parser.add_argument('--gamma', type=float, default=0.1)
    parser.add_argument('--minimum_variational_weight', type=float, default=0)
    # Misc
    parser.add_argument('--seed', type=int, default=0)
    parser.add_argument('--show_progress', default=False, action='store_true')
    parser.add_argument('--log_dir', default='./logs')
    parser.add_argument('--log_every', default=50, type=int)
    parser.add_argument('--save_step', type=int, default=10)
    parser.add_argument('--save_best', action='store_true', default=False)
    parser.add_argument('--save_last', action='store_true', default=True)
    parser.add_argument('--student_width', type=int)
    parser.add_argument('--teacher_dir', type=str)
    parser.add_argument('--teacher_width', type=int)
    parser.add_argument('--gpu', type=str)
    parser.add_argument('--temp', type=str)

    args = parser.parse_args()
    gpu = args.gpu
    temp = args.temp
    check_args(args)
    teacher_dir = args.teacher_dir
    student_width = args.student_width
    teacher_width = args.teacher_width
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu

    def DistillationLoss(temperature):
        cross_entropy = torch.nn.CrossEntropyLoss()

        def loss(student_logits, teacher_logits, target):
            last_dim = len(student_logits.shape) - 1
            p_t = nn.functional.softmax(teacher_logits / temperature,
                                        dim=last_dim)
            log_p_s = nn.functional.log_softmax(student_logits / temperature,
                                                dim=last_dim)
            return cross_entropy(student_logits, target) - (p_t * log_p_s).sum(
                dim=last_dim).mean() * temperature**2

        return loss

    # BERT-specific configs copied over from run_glue.py
    if args.model == 'bert':
        args.max_grad_norm = 1.0
        args.adam_epsilon = 1e-8
        args.warmup_steps = 0

    if os.path.exists(args.log_dir) and args.resume:
        resume = True
        mode = 'a'
    else:
        resume = False
        mode = 'w'

    ## Initialize logs
    if not os.path.exists(args.log_dir):
        os.makedirs(args.log_dir)

    logger = Logger(os.path.join(args.log_dir, 'log.txt'), mode)
    # Record args
    log_args(args, logger)

    set_seed(args.seed)
    print("starting prep")
    # Data
    # Test data for label_shift_step is not implemented yet
    test_data = None
    test_loader = None
    if args.shift_type == 'confounder':
        train_data, val_data, test_data = prepare_data(args, train=True)
    elif args.shift_type == 'label_shift_step':
        train_data, val_data = prepare_data(args, train=True)
    print("done prep")
    loader_kwargs = {
        'batch_size': args.batch_size,
        'num_workers': 16,
        'pin_memory': True
    }
    train_loader = train_data.get_loader(train=True,
                                         reweight_groups=args.reweight_groups,
                                         **loader_kwargs)
    val_loader = val_data.get_loader(train=False,
                                     reweight_groups=None,
                                     **loader_kwargs)
    if test_data is not None:
        test_loader = test_data.get_loader(train=False,
                                           reweight_groups=None,
                                           **loader_kwargs)

    data = {}
    data['train_loader'] = train_loader
    data['val_loader'] = val_loader
    data['test_loader'] = test_loader
    data['train_data'] = train_data
    data['val_data'] = val_data
    data['test_data'] = test_data
    n_classes = train_data.n_classes

    log_data(data, logger)
    logger.flush()

    ## Define the objective
    if args.hinge:
        assert args.dataset in ['CelebA', 'CUB']  # Only supports binary

        def hinge_loss(yhat, y):
            # The torch loss takes in three arguments so we need to split yhat
            # It also expects classes in {+1.0, -1.0} whereas by default we give them in {0, 1}
            # Furthermore, if y = 1 it expects the first input to be higher instead of the second,
            # so we need to swap yhat[:, 0] and yhat[:, 1]...
            torch_loss = torch.nn.MarginRankingLoss(margin=1.0,
                                                    reduction='none')
            y = (y.float() * 2.0) - 1.0
            return torch_loss(yhat[:, 1], yhat[:, 0], y)

        criterion = hinge_loss
    else:
        criterion = torch.nn.CrossEntropyLoss(reduction='none')

    if resume:
        df = pd.read_csv(os.path.join(args.log_dir, 'test.csv'))
        epoch_offset = df.loc[len(df) - 1, 'epoch'] + 1
        logger.write(f'starting from epoch {epoch_offset}')
    else:
        epoch_offset = 0

    train_csv_logger = CSVBatchLogger(os.path.join(args.log_dir, 'train.csv'),
                                      train_data.n_groups,
                                      mode=mode)
    val_csv_logger = CSVBatchLogger(os.path.join(args.log_dir, 'val.csv'),
                                    train_data.n_groups,
                                    mode=mode)
    test_csv_logger = CSVBatchLogger(os.path.join(args.log_dir, 'test.csv'),
                                     train_data.n_groups,
                                     mode=mode)
    strain_csv_logger = CSVBatchLogger(os.path.join(args.log_dir,
                                                    'strain.csv'),
                                       train_data.n_groups,
                                       mode=mode)
    sval_csv_logger = CSVBatchLogger(os.path.join(args.log_dir, 'sval.csv'),
                                     train_data.n_groups,
                                     mode=mode)
    stest_csv_logger = CSVBatchLogger(os.path.join(args.log_dir, 'stest.csv'),
                                      train_data.n_groups,
                                      mode=mode)

    teacher = resnet10vw(teacher_width, num_classes=n_classes)
    teacher_old = torch.load(teacher_dir + "/10_model.pth")
    for k, m in teacher_old.named_modules():
        m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatability
    teacher.load_state_dict(teacher_old.state_dict())
    teacher = teacher.to('cuda')
    #    def DistillationLoss(temperature):
    #        cross_entropy = torch.nn.CrossEntropyLoss()
    #
    #        def loss(student_logits, teacher_logits, target):
    #            last_dim = len(student_logits.shape) - 1
    #
    #            p_t = nn.functional.softmax(teacher_logits/temperature, dim=last_dim)
    #            log_p_s = nn.functional.log_softmax(student_logits/temperature, dim=last_dim)
    #
    #            return cross_entropy(student_logits, target) - (p_t * log_p_s).sum(dim=last_dim).mean()
    #
    #        return loss

    distill_criterion = DistillationLoss(float(temp))
    student = resnet10vw(int(student_width), num_classes=n_classes).to('cuda')

    #student.to(device)
    train(teacher,
          student,
          criterion,
          distill_criterion,
          data,
          logger,
          train_csv_logger,
          val_csv_logger,
          test_csv_logger,
          strain_csv_logger,
          sval_csv_logger,
          test_csv_logger,
          args,
          epoch_offset=epoch_offset)
    train_csv_logger.close()
    val_csv_logger.close()
    test_csv_logger.close()
    strain_csv_logger.close()
    sval_csv_logger.close()
    stest_csv_logger.close()
Example #11
0
def main():
    args.cuda = True
    # dataset
    train_dataset = BSDSLoader(root=args.dataset, dataSplit="train")
    test_dataset = BSDSLoader(root=args.dataset, dataSplit="test")
    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              num_workers=8,
                              drop_last=True,
                              shuffle=True)
    test_loader = DataLoader(test_dataset,
                             batch_size=args.batch_size,
                             num_workers=8,
                             drop_last=True,
                             shuffle=False)
    with open(join(args.dataset, 'test.lst'), 'r') as f:
        test_list = f.readlines()
    test_list = [split(i.rstrip())[1] for i in test_list]
    assert len(test_list) == len(test_loader), "%d vs %d" % (len(test_list),
                                                             len(test_loader))

    # model
    model = HED()

    model.apply(weights_init)

    pretrained_dict = torch.load(args.model_path)
    pretrained_dict = convert_vgg(pretrained_dict)

    model_dict = model.state_dict()
    model_dict.update(pretrained_dict)
    model.load_state_dict(model_dict)

    model.cuda()

    if args.resume:
        if isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}'".format(args.resume))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    if args.resume:
        if isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}'".format(args.resume))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    #tune lr
    tuned_lrs = tune_lrs(model, args.lr, args.weight_decay)

    optimizer = torch.optim.SGD(tuned_lrs,
                                lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    scheduler = lr_scheduler.StepLR(optimizer,
                                    step_size=args.stepsize,
                                    gamma=args.gamma)

    # log
    log = Logger(join(TMP_DIR, '%s-%d-log.txt' % ('Adam', args.lr)))
    sys.stdout = log

    train_loss = []
    train_loss_detail = []
    for epoch in range(args.start_epoch, args.maxepoch):
        if epoch == 0:
            print("Performing initial testing...")
            validate(model,
                     test_loader,
                     epoch=epoch,
                     test_list=test_list,
                     save_dir=join(TMP_DIR, 'initial-testing-record'))

        tr_avg_loss, tr_detail_loss = train(
            train_loader,
            model,
            optimizer,
            epoch,
            save_dir=join(TMP_DIR, 'epoch-%d-training-record' % epoch))
        validate(model,
                 test_loader,
                 epoch=epoch,
                 test_list=test_list,
                 save_dir=join(TMP_DIR, 'epoch-%d-testing-record' % epoch))
        log.flush()  # write log
        # Save checkpoint
        save_file = join(TMP_DIR, 'checkpoint_epoch{}.pth'.format(epoch))
        save_checkpoint(
            {
                'epoch': epoch,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict()
            },
            filename=save_file)
        scheduler.step()  # will adjust learning rate
        # save train/val loss/accuracy, save every epoch in case of early stop
        train_loss.append(tr_avg_loss)
        train_loss_detail += tr_detail_loss
Example #12
0
def main(args):
    if args.wandb:
        wandb.init(project=f"{args.project_name}_{args.dataset}")
        wandb.config.update(args)

    # BERT-specific configs copied over from run_glue.py
    if (args.model.startswith("bert") and args.use_bert_params): 
        args.max_grad_norm = 1.0
        args.adam_epsilon = 1e-8
        args.warmup_steps = 0

    if os.path.exists(args.log_dir) and args.resume:
        resume = True
        mode = "a"
    else:
        resume = False
        mode = "w"

    ## Initialize logs
    if not os.path.exists(args.log_dir):
        os.makedirs(args.log_dir)

    logger = Logger(os.path.join(args.log_dir, "log.txt"), mode)
    # Record args
    log_args(args, logger)

    set_seed(args.seed)

    # Data
    # Test data for label_shift_step is not implemented yet
    test_data = None
    test_loader = None
    if args.shift_type == "confounder":
        train_data, val_data, test_data = prepare_data(
            args,
            train=True,
        )

    elif args.shift_type == "label_shift_step":
        raise NotImplementedError
        train_data, val_data = prepare_data(args, train=True)

    #########################################################################
    ###################### Prepare data for our method ######################
    #########################################################################

    # Should probably not be upweighting if folds are specified.
    assert not args.fold or not args.up_weight

    # Fold passed. Use it as train and valid.
    if args.fold:
        train_data, val_data = folds.get_fold(
            train_data,
            args.fold,
            cross_validation_ratio=(1 / args.num_folds_per_sweep),
            num_valid_per_point=args.num_sweeps,
            seed=args.seed,
        )

    if args.up_weight != 0:
        assert args.aug_col is not None
        # Get points that should be upsampled
        metadata_df = pd.read_csv(args.metadata_path)
        if args.dataset == "jigsaw":
            train_col = metadata_df[metadata_df["split"] == "train"]
        else:
            train_col = metadata_df[metadata_df["split"] == 0]
        aug_indices = np.where(train_col[args.aug_col] == 1)[0]
        print("len", len(train_col), len(aug_indices))
        if args.up_weight == -1:
            up_weight_factor = int(
                (len(train_col) - len(aug_indices)) / len(aug_indices)) - 1
        else:
            up_weight_factor = args.up_weight

        print(f"Up-weight factor: {up_weight_factor}")
        upsampled_points = Subset(train_data,
                                  list(aug_indices) * up_weight_factor)
        # Convert to DRODataset
        train_data = dro_dataset.DRODataset(
            ConcatDataset([train_data, upsampled_points]),
            process_item_fn=None,
            n_groups=train_data.n_groups,
            n_classes=train_data.n_classes,
            group_str_fn=train_data.group_str,
        )
    elif args.aug_col is not None:
        print("\n"*2 + "WARNING: aug_col is not being used." + "\n"*2)

    #########################################################################
    #########################################################################
    #########################################################################

    loader_kwargs = {
        "batch_size": args.batch_size,
        "num_workers": 4,
        "pin_memory": True,
    }
    train_loader = dro_dataset.get_loader(train_data,
                                          train=True,
                                          reweight_groups=args.reweight_groups,
                                          **loader_kwargs)

    val_loader = dro_dataset.get_loader(val_data,
                                        train=False,
                                        reweight_groups=None,
                                        **loader_kwargs)

    if test_data is not None:
        test_loader = dro_dataset.get_loader(test_data,
                                             train=False,
                                             reweight_groups=None,
                                             **loader_kwargs)

    data = {}
    data["train_loader"] = train_loader
    data["val_loader"] = val_loader
    data["test_loader"] = test_loader
    data["train_data"] = train_data
    data["val_data"] = val_data
    data["test_data"] = test_data

    n_classes = train_data.n_classes

    log_data(data, logger)

    ## Initialize model
    model = get_model(
        model=args.model,
        pretrained=not args.train_from_scratch,
        resume=resume,
        n_classes=train_data.n_classes,
        dataset=args.dataset,
        log_dir=args.log_dir,
    )
    if args.wandb:
        wandb.watch(model)

    logger.flush()

    ## Define the objective
    if args.hinge:
        assert args.dataset in ["CelebA", "CUB"]  # Only supports binary
        criterion = hinge_loss
    else:
        criterion = torch.nn.CrossEntropyLoss(reduction="none")

    if resume:
        raise NotImplementedError  # Check this implementation.
        df = pd.read_csv(os.path.join(args.log_dir, "test.csv"))
        epoch_offset = df.loc[len(df) - 1, "epoch"] + 1
        logger.write(f"starting from epoch {epoch_offset}")
    else:
        epoch_offset = 0

    
    train_csv_logger = CSVBatchLogger(os.path.join(args.log_dir, f"train.csv"),
                                      train_data.n_groups,
                                      mode=mode)
    val_csv_logger = CSVBatchLogger(os.path.join(args.log_dir, f"val.csv"),
                                    val_data.n_groups,
                                    mode=mode)
    test_csv_logger = CSVBatchLogger(os.path.join(args.log_dir, f"test.csv"),
                                     test_data.n_groups,
                                     mode=mode)
    train(
        model,
        criterion,
        data,
        logger,
        train_csv_logger,
        val_csv_logger,
        test_csv_logger,
        args,
        epoch_offset=epoch_offset,
        csv_name=args.fold,
        wandb=wandb if args.wandb else None,
    )

    train_csv_logger.close()
    val_csv_logger.close()
    test_csv_logger.close()
Example #13
0
def main():
    args.cuda = True
    # dataset
    train_dataset = BSDSLoader(root=args.dataset, split="train")
    test_dataset = BSDSLoader(root=args.dataset, split="test")
    train_loader = DataLoader(
        train_dataset, batch_size=args.batch_size,
        num_workers=4, drop_last=True,shuffle=True)
    test_loader = DataLoader(
        test_dataset, batch_size=args.batch_size,
        num_workers=4, drop_last=True,shuffle=False)
    with open(join(args.dataset, 'test.lst'), 'r') as f:
        test_list = f.readlines()
    test_list = [split(i.rstrip())[1] for i in test_list]
    assert len(test_list) == len(test_loader), "%d vs %d" % (len(test_list), len(test_loader))

    # default hyperparameters
    if args.use_cfg:
        if args.pretrained and not args.small:
            args.stepsize = 2
            args.lr = 0.001 if args.harmonic else 0.0002
        elif args.small:
            args.stepsize = 6
            args.lr = 0.005 if args.harmonic else 0.001
        else:
            args.stepsize = 4
            args.lr = 0.0005 if args.harmonic else 0.0002
        args.maxepoch = args.stepsize + 1
        
    # model
    model = HEDSmall(harmonic=args.harmonic) if args.small else HED(harmonic=args.harmonic)
    model.cuda()
    model.apply(weights_init)
    if args.pretrained and not args.small:
        if args.harmonic:    
            load_harm_vgg16pretrain(model)
        else:
            load_vgg16pretrain(model)
    
    #tune lr
    net_parameters_id = {}
    
    if args.pretrained and not args.small:
        for pname, p in model.named_parameters():
            if pname in ['conv1_1.weight','conv1_2.weight',
                         'conv2_1.weight','conv2_2.weight',
                         'conv3_1.weight','conv3_2.weight','conv3_3.weight',
                         'conv4_1.weight','conv4_2.weight','conv4_3.weight',
                         'conv5_1.weight','conv5_2.weight','conv5_3.weight']:
                print(pname, 'lr:1 de:1')
                if 'conv1-5.weight' not in net_parameters_id:
                    net_parameters_id['conv1-5.weight'] = []
                net_parameters_id['conv1-5.weight'].append(p)
            elif pname in ['conv1_1.bias','conv1_2.bias',
                           'conv2_1.bias','conv2_2.bias',
                           'conv3_1.bias','conv3_2.bias','conv3_3.bias',
                           'conv4_1.bias','conv4_2.bias','conv4_3.bias',
                           'conv5_1.bias','conv5_2.bias','conv5_3.bias']:
                print(pname, 'lr:2 de:0')
                if 'conv1-5.bias' not in net_parameters_id:
                    net_parameters_id['conv1-5.bias'] = []
                net_parameters_id['conv1-5.bias'].append(p)     
            elif pname in ['score_dsn1.weight','score_dsn2.weight','score_dsn3.weight',
                           'score_dsn4.weight','score_dsn5.weight']:
                print(pname, 'lr:0.01 de:1')
                if 'score_dsn_1-5.weight' not in net_parameters_id:
                    net_parameters_id['score_dsn_1-5.weight'] = []
                net_parameters_id['score_dsn_1-5.weight'].append(p)
            elif pname in ['score_dsn1.bias','score_dsn2.bias','score_dsn3.bias',
                           'score_dsn4.bias','score_dsn5.bias']:
                print(pname, 'lr:0.02 de:0')
                if 'score_dsn_1-5.bias' not in net_parameters_id:
                    net_parameters_id['score_dsn_1-5.bias'] = []
                net_parameters_id['score_dsn_1-5.bias'].append(p)
            elif pname in ['score_final.weight']:
                print(pname, 'lr:0.001 de:1')
                if 'score_final.weight' not in net_parameters_id:
                    net_parameters_id['score_final.weight'] = []
                net_parameters_id['score_final.weight'].append(p)
            elif pname in ['score_final.bias']:
                print(pname, 'lr:0.002 de:0')
                if 'score_final.bias' not in net_parameters_id:
                    net_parameters_id['score_final.bias'] = []
                net_parameters_id['score_final.bias'].append(p)
        param_groups = [
                {'params': net_parameters_id['conv1-5.weight']      , 'lr': args.lr*1    , 'weight_decay': args.weight_decay},
                {'params': net_parameters_id['conv1-5.bias']        , 'lr': args.lr*2    , 'weight_decay': 0.},
                {'params': net_parameters_id['score_dsn_1-5.weight'], 'lr': args.lr*0.01 , 'weight_decay': args.weight_decay},
                {'params': net_parameters_id['score_dsn_1-5.bias']  , 'lr': args.lr*0.02 , 'weight_decay': 0.},
                {'params': net_parameters_id['score_final.weight']  , 'lr': args.lr*0.001, 'weight_decay': args.weight_decay},
                {'params': net_parameters_id['score_final.bias']    , 'lr': args.lr*0.002, 'weight_decay': 0.}
            ]
    else:
        net_parameters_id = {'weights': [], 'biases': []}
        for pname, p in model.named_parameters():
            if 'weight' in pname:
                net_parameters_id['weights'].append(p)
            elif 'bias' in pname:
                net_parameters_id['biases'].append(p)
        param_groups = [
                {'params': net_parameters_id['weights'], 'weight_decay': args.weight_decay},
                {'params': net_parameters_id['biases'], 'weight_decay': 0.}
            ]

    optimizer = torch.optim.Adam(param_groups, lr=args.lr, weight_decay=args.weight_decay)
    scheduler = lr_scheduler.StepLR(optimizer, step_size=args.stepsize, gamma=args.gamma)
    

    if args.resume:
        if isfile(args.resume): 
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}'"
                  .format(args.resume))
            optimizer.load_state_dict(checkpoint['optimizer'])
            args.start_epoch = checkpoint['epoch']
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    # log
    log = Logger(join(OUT_DIR, 'log.txt'))
    sys.stdout = log

    train_loss = []
    train_loss_detail = []
    for epoch in range(args.start_epoch, args.maxepoch):
        if epoch == 0:
            print("Performing initial testing...")
            test(model, test_loader, epoch=epoch, test_list=test_list,
                 save_dir = join(OUT_DIR, 'initial-testing-record'))

        tr_avg_loss, tr_detail_loss = train(
            train_loader, model, optimizer, epoch,
            save_dir = join(OUT_DIR, 'epoch-%d-training-record' % epoch))
        test(model, test_loader, epoch=epoch, test_list=test_list,
            save_dir = join(OUT_DIR, 'epoch-%d-testing-record' % epoch))
        log.flush() # write log
        # Save checkpoint
        save_file = os.path.join(OUT_DIR, 'checkpoint_epoch{}.pth'.format(epoch))
        save_checkpoint({
            'epoch': epoch,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict()
                         }, filename=save_file)
        scheduler.step() # will adjust learning rate
        # save train/val loss/accuracy, save every epoch in case of early stop
        train_loss.append(tr_avg_loss)
        train_loss_detail += tr_detail_loss
Example #14
0
def main():
    print("Loading and checking args...")
    args = parse_args()
    check_args(args)
    # BERT-specific configs copied over from run_glue.py
    if args.model.startswith('bert'):
        args.max_grad_norm = 1.0
        args.adam_epsilon = 1e-8
        args.warmup_steps = 0

    #Write for logging; assumes no existing logs.
    mode = 'w'

    ## Initialize logs
    if not os.path.exists(args.log_dir):
        os.makedirs(args.log_dir)

    logger = Logger(os.path.join(args.log_dir, 'log.txt'), mode)
    # Record args
    log_args(args, logger)
    set_seed(args.seed)

    # Data
    print("Preparing data")
    train_data, val_data, test_data = prepare_data(args, train=True)

    print("Setting up loader")
    loader_kwargs = {
        'batch_size': args.batch_size,
        'num_workers': 4,
        'pin_memory': True
    }
    train_loader = train_data.get_loader(train=True,
                                         reweight_groups=args.reweight_groups,
                                         **loader_kwargs)
    val_loader = val_data.get_loader(train=False,
                                     reweight_groups=None,
                                     **loader_kwargs)
    test_loader = test_data.get_loader(train=False,
                                       reweight_groups=None,
                                       **loader_kwargs)

    data = {}
    data['train_loader'] = train_loader
    data['val_loader'] = val_loader
    data['test_loader'] = test_loader
    data['train_data'] = train_data
    data['val_data'] = val_data
    data['test_data'] = test_data
    n_classes = train_data.n_classes

    log_data(data, logger)

    ## Initialize model
    if args.model == 'resnet50':
        model = torchvision.models.resnet50(pretrained=True)
        d = model.fc.in_features
        model.fc = nn.Linear(d, n_classes)
        if args.mc_dropout:
            model = add_dropout(model, 'fc')
    elif args.model == 'densenet121':
        model = torchvision.models.densenet121(pretrained=True)
        d = model.classifier.in_features
        model.classifier = nn.Linear(d, n_classes)
        if args.mc_dropout:
            model = add_dropout(model, 'classifier')
    elif args.model == 'bert-base-uncased':
        print("Loading bert")
        model = BertForSequenceClassification.from_pretrained(
            args.model, num_labels=n_classes)
    else:
        raise ValueError('Model not recognized.')

    logger.flush()
    criterion = torch.nn.CrossEntropyLoss(reduction='none')
    print("Getting loggers")
    train_csv_logger = CSVBatchLogger(os.path.join(args.log_dir, 'train.csv'),
                                      train_data.n_groups,
                                      mode=mode)
    val_csv_logger = CSVBatchLogger(os.path.join(args.log_dir, 'val.csv'),
                                    train_data.n_groups,
                                    mode=mode)
    test_csv_logger = CSVBatchLogger(os.path.join(args.log_dir, 'test.csv'),
                                     train_data.n_groups,
                                     mode=mode)

    print("Starting to train...")
    train(model,
          criterion,
          data,
          logger,
          train_csv_logger,
          val_csv_logger,
          test_csv_logger,
          args,
          epoch_offset=0,
          train=True)

    train_csv_logger.close()
    val_csv_logger.close()
    test_csv_logger.close()

    if args.save_preds:
        save_preds(model, data, args)
        return