def main(args):
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    down_layers, up_layers = create_unet(args.cfg_file)
    print(down_layers, up_layers)
    unet = GeneralUnet(down_layers, up_layers)
    unet = unet.to(device)

    input_transform = transforms.Compose([transforms.Resize((args.resize, args.resize)),
        transforms.ToTensor()])

    label_transform = transforms.Compose([transforms.Resize((args.resize, args.resize)),
        transforms.ToTensor()])

    train_dataset = ReconstructionDataset.ReconstructionDataset(args.train_csv, args.train_input_dir, args.train_gt_dir, input_transforms=input_transform, label_transforms= label_transform)
    val_dataset = ReconstructionDataset.ReconstructionDataset(args.val_csv, args.val_input_dir, args.val_gt_dir, input_transforms=input_transform, label_transforms= label_transform)

    train_loader = torch.utils.data.DataLoader(
    train_dataset, 
    batch_size= args.batch_size, 
    shuffle= True, 
    num_workers= 6,
    pin_memory= True
    )

    val_loader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size= args.batch_size, 
    shuffle= True, 
    num_workers= 6,
    pin_memory= True
    )

    # optimizer = torch.optim.Adam(unet.parameters(), lr=0.5, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
    optimizer = torch.optim.SGD(unet.parameters(), lr= .05, momentum=.9, weight_decay= 0)
    criterion = torch.nn.MSELoss().to(device)

    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[5,10,15,20,25,30,35])

    best_loss = float('inf')

    for epoch in range(1, args.epochs + 1):
        scheduler.step()
        train(unet, optimizer, criterion, device, train_loader, epoch, args.log_interval)
        test_loss = test(unet, device, val_loader, epoch, args.log_interval)

        if test_loss < best_loss:
            best_loss = test_loss
            is_best = True

        # save the model every epoch
        save_checkpoint({
            'epoch' : epoch,
            'model_state_dict': unet.state_dict(),
            'optimizer' : optimizer.state_dict(),
            'loss' : test_loss
            }, is_best)

        is_best = False
Example #2
0
def train_rnn_model(train_loader, val_loader, num_features, num_epochs,
                    use_cuda, path_output):
    """
    Use train and validation loader to train the variable RNN model
    Input: train_loader, val_loader
    Output: trained best model
    """
    device = torch.device(
        "cuda" if torch.cuda.is_available() and use_cuda else "cpu")
    torch.manual_seed(1)
    if device.type == "cuda":
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

    model = VariableRNN(num_features)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters())

    model.to(device)
    criterion.to(device)

    best_val_acc = 0.0

    train_losses, train_accuracies = [], []
    valid_losses, valid_accuracies = [], []

    for epoch in range(num_epochs):

        train_loss, train_accuracy = train(model, device, train_loader,
                                           criterion, optimizer, epoch)
        valid_loss, valid_accuracy, valid_results = evaluate(
            model, device, val_loader, criterion)

        train_losses.append(train_loss)
        valid_losses.append(valid_loss)

        train_accuracies.append(train_accuracy)
        valid_accuracies.append(valid_accuracy)

        is_best = valid_accuracy > best_val_acc

        if is_best:
            best_val_acc = valid_accuracy
            torch.save(
                model,
                os.path.join(path_output, "VariableRNN.pth"),
                _use_new_zipfile_serialization=False,
            )

    best_model = torch.load(os.path.join(path_output, "VariableRNN.pth"))
    return (
        best_model,
        train_losses,
        valid_losses,
        train_accuracies,
        valid_accuracies,
        valid_results,
    )
Example #3
0
def main(args):

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    model = Model(args.cfg_file).to(device)
    # model = models.resnet18().to(device)

    data_transforms = transforms.Compose(
        [transforms.Resize((args.resize, args.resize)),
        transforms.ToTensor()]
        )

    train_dataset = ClassificationDataset.ClassificationDataset(args.train_csv, 
        args.root_dir, transform= data_transforms)
    val_dataset = ClassificationDataset.ClassificationDataset(args.val_csv, 
        args.root_dir, transform= data_transforms)

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size= args.batch_size,
        shuffle= True,
        num_workers= 6,
        pin_memory= True
        )
    val_loader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size= args.batch_size,
        shuffle= True,
        num_workers= 6,
        pin_memory= True
        )

    optimizer = torch.optim.SGD(model.parameters(), lr= args.lr, momentum= .9)
    criterion = torch.nn.CrossEntropyLoss()

    for epoch in range(args.epochs):
        train(model, optimizer, criterion, device, train_loader, epoch, args.log_interval)
        test_loss = test(model, device, val_loader, epoch, args.log_interval, loss_fn= 'ce')
Example #4
0
def pytorch_model():
    depth = [3, 5, 9]
    hidden_nodes = [5, 10, 25, 50, 100]
    init_params = [['tanh', 'xavier'], ['relu', 'he']]
    epoch = 10

    train_dataset = BankNoteDataset('./bank-note/train.csv', 4)
    test_dataset = BankNoteDataset('./bank-note/test.csv', 4)

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=12,
                                               shuffle=True,
                                               num_workers=6,
                                               pin_memory=True)

    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=12,
                                              shuffle=True,
                                              num_workers=6,
                                              pin_memory=True)

    for (activation, initialization) in init_params:
        for d in depth:
            for nodes in hidden_nodes:
                print('Depth:', d, ', Nodes:', nodes, ', Activation:',
                      activation, ', Initialization:', initialization)
                model = torch_nn(4, 2, nodes, d, activation, initialization)
                optimizer = torch.optim.Adam(model.parameters())
                criterion = torch.nn.CrossEntropyLoss()
                train_loss = 0
                test_loss = 0
                for epoch in range(1, epoch + 1):
                    train_loss += train(model, optimizer, criterion,
                                        train_loader, epoch)
                    test_loss += test(model, test_loader, epoch)

                print('Train Loss: {:.6f}'.format(train_loss))
                print('Test Loss: {:.6f}'.format(test_loss))
Example #5
0
    hparams = {
        'name': 'knrm',
        'batch_size': 100,
        'title_size': 20,
        'his_size': 50,
        'npratio': 4,
        'embedding_dim': 300,
        'kernel_num': 11,
        'metrics': 'group_auc,ndcg@5,ndcg@10,mean_mrr',
        'attrs': ['title'],
    }

    hparams = load_hparams(hparams)
    device = torch.device(hparams['device'])

    vocab, loader_train, loader_test, loader_validate = prepare(hparams,
                                                                validate=True)
    knrmModel = KNRMModel(vocab=vocab, hparams=hparams).to(device)

    if hparams['mode'] == 'test':
        knrmModel.load_state_dict(torch.load(hparams['save_path']))
        print("testing...")
        evaluate(knrmModel, hparams, loader_test)

    elif hparams['mode'] == 'train':
        train(knrmModel,
              hparams,
              loader_train,
              loader_test,
              loader_validate,
              tb=True)
Example #6
0
def main():
    global args
    parser = arg_parser()
    args = parser.parse_args()
    cudnn.benchmark = True

    num_classes, train_list_name, val_list_name, test_list_name, filename_seperator, image_tmpl, filter_video, label_file = get_dataset_config(
        args.dataset, args.use_lmdb)

    args.num_classes = num_classes

    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

    if args.modality == 'rgb':
        args.input_channels = 3
    elif args.modality == 'flow':
        args.input_channels = 2 * 5

    model, arch_name = build_model(args)
    mean = model.mean(args.modality)
    std = model.std(args.modality)

    # overwrite mean and std if they are presented in command
    if args.mean is not None:
        if args.modality == 'rgb':
            if len(args.mean) != 3:
                raise ValueError(
                    "When training with rgb, dim of mean must be three.")
        elif args.modality == 'flow':
            if len(args.mean) != 1:
                raise ValueError(
                    "When training with flow, dim of mean must be three.")
        mean = args.mean

    if args.std is not None:
        if args.modality == 'rgb':
            if len(args.std) != 3:
                raise ValueError(
                    "When training with rgb, dim of std must be three.")
        elif args.modality == 'flow':
            if len(args.std) != 1:
                raise ValueError(
                    "When training with flow, dim of std must be three.")
        std = args.std

    model = model.cuda()
    model.eval()

    if args.threed_data:
        dummy_data = (3, args.groups, args.input_size, args.input_size)
    else:
        dummy_data = (3 * args.groups, args.input_size, args.input_size)

    model_summary = torchsummary.summary(model, input_size=dummy_data)
    torch.cuda.empty_cache()

    if args.show_model:
        print(model)
        print(model_summary)
        return 0

    model = torch.nn.DataParallel(model).cuda()

    if args.pretrained is not None:
        print("=> using pre-trained model '{}'".format(arch_name))
        checkpoint = torch.load(args.pretrained, map_location='cpu')
        if args.transfer:
            new_dict = {}
            for k, v in checkpoint['state_dict'].items():
                # TODO: a better approach:
                if k.replace("module.", "").startswith("fc"):
                    continue
                new_dict[k] = v
        else:
            new_dict = checkpoint['state_dict']
        model.load_state_dict(new_dict, strict=False)
    else:
        print("=> creating model '{}'".format(arch_name))

    # define loss function (criterion) and optimizer
    train_criterion = nn.CrossEntropyLoss().cuda()
    val_criterion = nn.CrossEntropyLoss().cuda()

    # Data loading code
    video_data_cls = VideoDataSetLMDB if args.use_lmdb else VideoDataSet
    val_list = os.path.join(args.datadir, val_list_name)
    val_augmentor = get_augmentor(False,
                                  args.input_size,
                                  mean,
                                  std,
                                  args.disable_scaleup,
                                  threed_data=args.threed_data,
                                  version=args.augmentor_ver,
                                  scale_range=args.scale_range)
    val_dataset = video_data_cls(args.datadir,
                                 val_list,
                                 args.groups,
                                 args.frames_per_group,
                                 num_clips=args.num_clips,
                                 modality=args.modality,
                                 image_tmpl=image_tmpl,
                                 dense_sampling=args.dense_sampling,
                                 transform=val_augmentor,
                                 is_train=False,
                                 test_mode=False,
                                 seperator=filename_seperator,
                                 filter_video=filter_video)

    val_loader = build_dataflow(val_dataset,
                                is_train=False,
                                batch_size=args.batch_size,
                                workers=args.workers)

    log_folder = os.path.join(args.logdir, arch_name)
    if not os.path.exists(log_folder):
        os.makedirs(log_folder)

    if args.evaluate:
        logfile = open(os.path.join(log_folder, 'evaluate_log.log'), 'a')
        flops, params = extract_total_flops_params(model_summary)
        print(model_summary)
        val_top1, val_top5, val_losses, val_speed = validate(
            val_loader, model, val_criterion)
        print(
            'Val@{}: \tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch\tFlops: {}\tParams: {}'
            .format(args.input_size, val_losses, val_top1, val_top5,
                    val_speed * 1000.0, flops, params),
            flush=True)
        print(
            'Val@{}: \tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch\tFlops: {}\tParams: {}'
            .format(args.input_size, val_losses, val_top1, val_top5,
                    val_speed * 1000.0, flops, params),
            flush=True,
            file=logfile)
        return

    train_list = os.path.join(args.datadir, train_list_name)

    train_augmentor = get_augmentor(True,
                                    args.input_size,
                                    mean,
                                    std,
                                    threed_data=args.threed_data,
                                    version=args.augmentor_ver,
                                    scale_range=args.scale_range)
    train_dataset = video_data_cls(args.datadir,
                                   train_list,
                                   args.groups,
                                   args.frames_per_group,
                                   num_clips=args.num_clips,
                                   modality=args.modality,
                                   image_tmpl=image_tmpl,
                                   dense_sampling=args.dense_sampling,
                                   transform=train_augmentor,
                                   is_train=True,
                                   test_mode=False,
                                   seperator=filename_seperator,
                                   filter_video=filter_video)

    train_loader = build_dataflow(train_dataset,
                                  is_train=True,
                                  batch_size=args.batch_size,
                                  workers=args.workers)

    sgd_polices = model.parameters()
    optimizer = torch.optim.SGD(sgd_polices,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay,
                                nesterov=args.nesterov)

    if args.lr_scheduler == 'step':
        scheduler = lr_scheduler.StepLR(optimizer, args.lr_steps[0], gamma=0.1)
    elif args.lr_scheduler == 'multisteps':
        scheduler = lr_scheduler.MultiStepLR(optimizer,
                                             args.lr_steps,
                                             gamma=0.1)
    elif args.lr_scheduler == 'cosine':
        scheduler = lr_scheduler.CosineAnnealingLR(optimizer,
                                                   args.epochs,
                                                   eta_min=0)
    elif args.lr_scheduler == 'plateau':
        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
                                                   'min',
                                                   verbose=True)

    best_top1 = 0.0
    tensorboard_logger.configure(os.path.join(log_folder))
    # optionally resume from a checkpoint
    if args.resume:
        logfile = open(os.path.join(log_folder, 'log.log'), 'a')
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_top1 = checkpoint['best_top1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            try:
                scheduler.load_state_dict(checkpoint['scheduler'])
            except:
                pass
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            raise ValueError("Checkpoint is not found: {}".format(args.resume))
    else:
        if os.path.exists(os.path.join(log_folder, 'log.log')):
            shutil.copyfile(
                os.path.join(log_folder, 'log.log'),
                os.path.join(log_folder,
                             'log.log.{}'.format(int(time.time()))))
        logfile = open(os.path.join(log_folder, 'log.log'), 'w')

    command = " ".join(sys.argv)
    print(command, flush=True)
    print(args, flush=True)
    print(model, flush=True)
    print(model_summary, flush=True)

    print(command, file=logfile, flush=True)
    print(args, file=logfile, flush=True)

    if args.resume == '':
        print(model, file=logfile, flush=True)
        print(model_summary, flush=True, file=logfile)

    for epoch in range(args.start_epoch, args.epochs):
        if args.lr_scheduler == 'plateau':
            scheduler.step(val_losses, epoch)
        else:
            scheduler.step(epoch)
        try:
            # get_lr get all lrs for every layer of current epoch, assume the lr for all layers are identical
            lr = scheduler.optimizer.param_groups[0]['lr']
        except:
            lr = None
        # set current learning rate
        # train for one epoch
        train_top1, train_top5, train_losses, train_speed, speed_data_loader, train_steps = \
            train(train_loader, model, train_criterion, optimizer, epoch + 1,
                  display=args.print_freq,
                  label_smoothing=args.label_smoothing, clip_gradient=args.clip_gradient)
        print(
            'Train: [{:03d}/{:03d}]\tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch\tData loading: {:.2f} ms/batch'
            .format(epoch + 1, args.epochs, train_losses, train_top1,
                    train_top5, train_speed * 1000.0,
                    speed_data_loader * 1000.0),
            file=logfile,
            flush=True)
        print(
            'Train: [{:03d}/{:03d}]\tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch\tData loading: {:.2f} ms/batch'
            .format(epoch + 1, args.epochs, train_losses, train_top1,
                    train_top5, train_speed * 1000.0,
                    speed_data_loader * 1000.0),
            flush=True)

        # evaluate on validation set
        val_top1, val_top5, val_losses, val_speed = validate(
            val_loader, model, val_criterion)
        print(
            'Val  : [{:03d}/{:03d}]\tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch'
            .format(epoch + 1, args.epochs, val_losses, val_top1, val_top5,
                    val_speed * 1000.0),
            file=logfile,
            flush=True)
        print(
            'Val  : [{:03d}/{:03d}]\tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch'
            .format(epoch + 1, args.epochs, val_losses, val_top1, val_top5,
                    val_speed * 1000.0),
            flush=True)
        # remember best prec@1 and save checkpoint
        is_best = val_top1 > best_top1
        best_top1 = max(val_top1, best_top1)

        save_dict = {
            'epoch': epoch + 1,
            'arch': arch_name,
            'state_dict': model.state_dict(),
            'best_top1': best_top1,
            'optimizer': optimizer.state_dict(),
            'scheduler': scheduler.state_dict()
        }

        save_checkpoint(save_dict, is_best, filepath=log_folder)

        if lr is not None:
            tensorboard_logger.log_value('learning-rate', lr, epoch + 1)
        tensorboard_logger.log_value('val-top1', val_top1, epoch + 1)
        tensorboard_logger.log_value('val-loss', val_losses, epoch + 1)
        tensorboard_logger.log_value('train-top1', train_top1, epoch + 1)
        tensorboard_logger.log_value('train-loss', train_losses, epoch + 1)
        tensorboard_logger.log_value('best-val-top1', best_top1, epoch + 1)

    logfile.close()
Example #7
0
        test_loss, test_acc = evaluation(args, 1, model, loader_test, dataset='test')
    print('\nvalidation acc : {:.5f}'.format(np.mean(val_acc)))
    print('test acc : {:.5f}'.format(np.mean(test_acc)))

elif args.command == 'train':
    # Step 3: Train and validation
    print("\nTraining...")
    print('Total epoch : ', args.n_epochs)
    best_acc = 0.0
    best_loss = np.inf
    wait = 0

    for ep in range(args.epoch_start, args.n_epochs):
        if 'cycle' in args.alg:
            ## training
            tr_loss_orig, tr_acc, tr_loss_reverse = train(args, ep, model, model_scheduler, loader_train)
            tr_loss = np.mean(tr_loss_orig) + np.mean(tr_loss_reverse)
            ## validation
            val_loss_orig, val_acc, val_loss_reverse = evaluation(args, ep, model, loader_val, dataset='val')
            val_loss = np.mean(val_loss_orig) + np.mean(val_loss_reverse)
        else:
            ## training
            tr_loss, tr_acc = train(args, ep, model, model_scheduler, loader_train)
            ## validation
            val_loss, val_acc = evaluation(args, ep, model, loader_val, dataset='val')

        print('\nepoch:{}, tr_loss:{:.5f}, tr_acc:{:.5f}, val_loss:{:.5f}, val_acc:{:.5f}'
              .format(ep+1, np.mean(tr_loss), np.mean(tr_acc), np.mean(val_loss), np.mean(val_acc)))

        # Model Save and Stop Criterion
        # save : val_acc가 best 보다 잘 나왔을 때
def main_worker(gpu, ngpus_per_node, args):
    cudnn.benchmark = args.cudnn_benchmark
    args.gpu = gpu

    num_classes, train_list_name, val_list_name, test_list_name, filename_seperator, image_tmpl, filter_video, label_file = get_dataset_config(
        args.dataset, args.use_lmdb)
    args.num_classes = num_classes

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size,
                                rank=args.rank)

    if args.modality == 'rgb':
        args.input_channels = 3
    elif args.modality == 'flow':
        args.input_channels = 2 * 5

    model, arch_name = build_model(args)
    mean = model.mean(args.modality)
    std = model.std(args.modality)

    # overwrite mean and std if they are presented in command
    if args.mean is not None:
        if args.modality == 'rgb':
            if len(args.mean) != 3:
                raise ValueError(
                    "When training with rgb, dim of mean must be three.")
        elif args.modality == 'flow':
            if len(args.mean) != 1:
                raise ValueError(
                    "When training with flow, dim of mean must be three.")
        mean = args.mean

    if args.std is not None:
        if args.modality == 'rgb':
            if len(args.std) != 3:
                raise ValueError(
                    "When training with rgb, dim of std must be three.")
        elif args.modality == 'flow':
            if len(args.std) != 1:
                raise ValueError(
                    "When training with flow, dim of std must be three.")
        std = args.std

    model = model.cuda(args.gpu)
    model.eval()

    if args.threed_data:
        dummy_data = (args.input_channels, args.groups, args.input_size,
                      args.input_size)
    else:
        dummy_data = (args.input_channels * args.groups, args.input_size,
                      args.input_size)

    if args.rank == 0:
        model_summary = torchsummary.summary(model, input_size=dummy_data)
        torch.cuda.empty_cache()

    if args.show_model and args.rank == 0:
        print(model)
        print(model_summary)
        return 0

    if args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            # the batch size should be divided by number of nodes as well
            args.batch_size = int(args.batch_size / args.world_size)
            args.workers = int(args.workers / ngpus_per_node)

            if args.sync_bn:
                process_group = torch.distributed.new_group(
                    list(range(args.world_size)))
                model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(
                    model, process_group)

            model = torch.nn.parallel.DistributedDataParallel(
                model, device_ids=[args.gpu])
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
    else:
        # DataParallel will divide and allocate batch_size to all available GPUs
        # assign rank to 0
        model = torch.nn.DataParallel(model).cuda()
        args.rank = 0

    if args.pretrained is not None:
        if args.rank == 0:
            print("=> using pre-trained model '{}'".format(arch_name))
        if args.gpu is None:
            checkpoint = torch.load(args.pretrained, map_location='cpu')
        else:
            checkpoint = torch.load(args.pretrained,
                                    map_location='cuda:{}'.format(args.gpu))
        if args.transfer:
            new_dict = {}
            for k, v in checkpoint['state_dict'].items():
                # TODO: a better approach:
                if k.replace("module.", "").startswith("fc"):
                    continue
                new_dict[k] = v
        else:
            new_dict = checkpoint['state_dict']
        model.load_state_dict(new_dict, strict=False)
        del checkpoint  # dereference seems crucial
        torch.cuda.empty_cache()
    else:
        if args.rank == 0:
            print("=> creating model '{}'".format(arch_name))

    # define loss function (criterion) and optimizer
    train_criterion = nn.CrossEntropyLoss().cuda(args.gpu)
    val_criterion = nn.CrossEntropyLoss().cuda(args.gpu)

    # Data loading code
    val_list = os.path.join(args.datadir, val_list_name)

    val_augmentor = get_augmentor(
        False,
        args.input_size,
        scale_range=args.scale_range,
        mean=mean,
        std=std,
        disable_scaleup=args.disable_scaleup,
        threed_data=args.threed_data,
        is_flow=True if args.modality == 'flow' else False,
        version=args.augmentor_ver)

    video_data_cls = VideoDataSetLMDB if args.use_lmdb else VideoDataSet
    val_dataset = video_data_cls(args.datadir,
                                 val_list,
                                 args.groups,
                                 args.frames_per_group,
                                 num_clips=args.num_clips,
                                 modality=args.modality,
                                 image_tmpl=image_tmpl,
                                 dense_sampling=args.dense_sampling,
                                 transform=val_augmentor,
                                 is_train=False,
                                 test_mode=False,
                                 seperator=filename_seperator,
                                 filter_video=filter_video)

    val_loader = build_dataflow(val_dataset,
                                is_train=False,
                                batch_size=args.batch_size,
                                workers=args.workers,
                                is_distributed=args.distributed)

    log_folder = os.path.join(args.logdir, arch_name)
    if args.rank == 0:
        if not os.path.exists(log_folder):
            os.makedirs(log_folder)

    if args.evaluate:
        val_top1, val_top5, val_losses, val_speed = validate(val_loader,
                                                             model,
                                                             val_criterion,
                                                             gpu_id=args.gpu)
        if args.rank == 0:
            logfile = open(os.path.join(log_folder, 'evaluate_log.log'), 'a')
            flops, params = extract_total_flops_params(model_summary)
            print(
                'Val@{}: \tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch\tFlops: {}\tParams: {}'
                .format(args.input_size, val_losses, val_top1, val_top5,
                        val_speed * 1000.0, flops, params),
                flush=True)
            print(
                'Val@{}: \tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch\tFlops: {}\tParams: {}'
                .format(args.input_size, val_losses, val_top1, val_top5,
                        val_speed * 1000.0, flops, params),
                flush=True,
                file=logfile)
        return

    train_list = os.path.join(args.datadir, train_list_name)

    train_augmentor = get_augmentor(
        True,
        args.input_size,
        scale_range=args.scale_range,
        mean=mean,
        std=std,
        disable_scaleup=args.disable_scaleup,
        threed_data=args.threed_data,
        is_flow=True if args.modality == 'flow' else False,
        version=args.augmentor_ver)

    train_dataset = video_data_cls(args.datadir,
                                   train_list,
                                   args.groups,
                                   args.frames_per_group,
                                   num_clips=args.num_clips,
                                   modality=args.modality,
                                   image_tmpl=image_tmpl,
                                   dense_sampling=args.dense_sampling,
                                   transform=train_augmentor,
                                   is_train=True,
                                   test_mode=False,
                                   seperator=filename_seperator,
                                   filter_video=filter_video)

    train_loader = build_dataflow(train_dataset,
                                  is_train=True,
                                  batch_size=args.batch_size,
                                  workers=args.workers,
                                  is_distributed=args.distributed)

    sgd_polices = model.parameters()
    optimizer = torch.optim.SGD(sgd_polices,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay,
                                nesterov=args.nesterov)

    if args.lr_scheduler == 'step':
        scheduler = lr_scheduler.StepLR(optimizer, args.lr_steps[0], gamma=0.1)
    elif args.lr_scheduler == 'multisteps':
        scheduler = lr_scheduler.MultiStepLR(optimizer,
                                             args.lr_steps,
                                             gamma=0.1)
    elif args.lr_scheduler == 'cosine':
        scheduler = lr_scheduler.CosineAnnealingLR(optimizer,
                                                   args.epochs,
                                                   eta_min=0)
    elif args.lr_scheduler == 'plateau':
        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
                                                   'min',
                                                   verbose=True)

    best_top1 = 0.0
    # optionally resume from a checkpoint
    if args.resume:
        if args.rank == 0:
            logfile = open(os.path.join(log_folder, 'log.log'), 'a')
        if os.path.isfile(args.resume):
            if args.rank == 0:
                print("=> loading checkpoint '{}'".format(args.resume))
            if args.gpu is None:
                checkpoint = torch.load(args.resume, map_location='cpu')
            else:
                checkpoint = torch.load(args.resume,
                                        map_location='cuda:{}'.format(
                                            args.gpu))
            args.start_epoch = checkpoint['epoch']
            # TODO: handle distributed version
            best_top1 = checkpoint['best_top1']
            if args.gpu is not None:
                if not isinstance(best_top1, float):
                    best_top1 = best_top1.to(args.gpu)
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            try:
                scheduler.load_state_dict(checkpoint['scheduler'])
            except:
                pass
            if args.rank == 0:
                print("=> loaded checkpoint '{}' (epoch {})".format(
                    args.resume, checkpoint['epoch']))
            del checkpoint  # dereference seems crucial
            torch.cuda.empty_cache()
        else:
            raise ValueError("Checkpoint is not found: {}".format(args.resume))
    else:
        if os.path.exists(os.path.join(log_folder,
                                       'log.log')) and args.rank == 0:
            shutil.copyfile(
                os.path.join(log_folder, 'log.log'),
                os.path.join(log_folder,
                             'log.log.{}'.format(int(time.time()))))
        if args.rank == 0:
            logfile = open(os.path.join(log_folder, 'log.log'), 'w')

    if args.rank == 0:
        command = " ".join(sys.argv)
        tensorboard_logger.configure(os.path.join(log_folder))
        print(command, flush=True)
        print(args, flush=True)
        print(model, flush=True)
        print(command, file=logfile, flush=True)
        print(model_summary, flush=True)
        print(args, file=logfile, flush=True)

    if args.resume == '' and args.rank == 0:
        print(model, file=logfile, flush=True)
        print(model_summary, flush=True, file=logfile)

    for epoch in range(args.start_epoch, args.epochs):
        # train for one epoch
        train_top1, train_top5, train_losses, train_speed, speed_data_loader, train_steps = \
            train(train_loader, model, train_criterion, optimizer, epoch + 1,
                  display=args.print_freq, label_smoothing=args.label_smoothing,
                  clip_gradient=args.clip_gradient, gpu_id=args.gpu, rank=args.rank)
        if args.distributed:
            dist.barrier()

        # evaluate on validation set
        val_top1, val_top5, val_losses, val_speed = validate(val_loader,
                                                             model,
                                                             val_criterion,
                                                             gpu_id=args.gpu)

        # update current learning rate
        if args.lr_scheduler == 'plateau':
            scheduler.step(val_losses)
        else:
            scheduler.step(epoch + 1)

        if args.distributed:
            dist.barrier()

        # only logging at rank 0
        if args.rank == 0:
            print(
                'Train: [{:03d}/{:03d}]\tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch\tData loading: {:.2f} ms/batch'
                .format(epoch + 1, args.epochs, train_losses, train_top1,
                        train_top5, train_speed * 1000.0,
                        speed_data_loader * 1000.0),
                file=logfile,
                flush=True)
            print(
                'Train: [{:03d}/{:03d}]\tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch\tData loading: {:.2f} ms/batch'
                .format(epoch + 1, args.epochs, train_losses, train_top1,
                        train_top5, train_speed * 1000.0,
                        speed_data_loader * 1000.0),
                flush=True)
            print(
                'Val  : [{:03d}/{:03d}]\tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch'
                .format(epoch + 1, args.epochs, val_losses, val_top1, val_top5,
                        val_speed * 1000.0),
                file=logfile,
                flush=True)
            print(
                'Val  : [{:03d}/{:03d}]\tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch'
                .format(epoch + 1, args.epochs, val_losses, val_top1, val_top5,
                        val_speed * 1000.0),
                flush=True)

            # remember best prec@1 and save checkpoint
            is_best = val_top1 > best_top1
            best_top1 = max(val_top1, best_top1)

            save_dict = {
                'epoch': epoch + 1,
                'arch': arch_name,
                'state_dict': model.state_dict(),
                'best_top1': best_top1,
                'optimizer': optimizer.state_dict(),
                'scheduler': scheduler.state_dict()
            }

            save_checkpoint(save_dict, is_best, filepath=log_folder)
            try:
                # get_lr get all lrs for every layer of current epoch, assume the lr for all layers are identical
                lr = scheduler.optimizer.param_groups[0]['lr']
            except Exception as e:
                lr = None
            if lr is not None:
                tensorboard_logger.log_value('learning-rate', lr, epoch + 1)
            tensorboard_logger.log_value('val-top1', val_top1, epoch + 1)
            tensorboard_logger.log_value('val-loss', val_losses, epoch + 1)
            tensorboard_logger.log_value('train-top1', train_top1, epoch + 1)
            tensorboard_logger.log_value('train-loss', train_losses, epoch + 1)
            tensorboard_logger.log_value('best-val-top1', best_top1, epoch + 1)

        if args.distributed:
            dist.barrier()

    if args.rank == 0:
        logfile.close()
Example #9
0
def main():
    parser = argparse.ArgumentParser(description="Generate 汉字 via generative adversarial network.")

    # Dataset
    parser.add_argument("--size", type=int, default=32, help="Font size.")
    parser.add_argument("--from_unicode", type=int, help="Starting point of the unicode.")
    parser.add_argument("--to_unicode", type=int, help="Ending point of the unicode.")
    parser.add_argument("--font", type=str, required=True, help="Path to the font file.")
    parser.add_argument("--num_workers", type=int, default=4, help="Number of data loading workers.")
    # Optimization
    parser.add_argument("--epochs", type=int, default=100, help="Number of epochs.")
    parser.add_argument("--batch_size", type=int, default=32, help="Batch size.")
    parser.add_argument("--gpu_ids", type=str, default='', help="GPUs for running this script.")
    parser.add_argument("--rand_dim", type=int, default=128, help="Dimension of the random vector.")
    parser.add_argument("--num_fakes", type=int, default=16,
                        help="Use num_fakes generated images to train the discriminator.")
    parser.add_argument("--flip_rate", type=float, default=0.8, help="Label flipping rate.")
    parser.add_argument("--g_lr", type=float, default=0.01, help="Learning rate for generator.")
    parser.add_argument("--d_lr", type=float, default=0.01, help="Learning rate for discriminator.")
    parser.add_argument("--factor", type=float, default=0.2, help="Factor by which the learning rate will be reduced.")
    parser.add_argument("--patience", type=int, default=10,
                        help="Number of epochs with no improvement after which learning rate will be reduced.")
    parser.add_argument("--threshold", type=float, default=0.1,
                        help="Threshold for measuring the new optimum, to only focus on significant changes. ")
    # Misc
    parser.add_argument("--log_dir", type=str, default="../run/", help="Where to save the log?")
    parser.add_argument("--log_name", type=str, required=True, help="Name of the log folder.")
    parser.add_argument("--show_freq", type=int, default=64, help="How frequently to show generated images?")
    parser.add_argument("--seed", type=int, default=0, help="Random seed.")

    args = parser.parse_args()
    assert args.show_freq > 0
    assert 0.0 <= args.flip_rate <= 1.0

    # Check before run.
    if not os.path.exists(args.log_dir):
        os.mkdir(args.log_dir)
    log_dir = os.path.join(args.log_dir, args.log_name)

    # Setting up logger
    log_file = datetime.now().strftime("%Y-%m-%d-%H-%M-%S.log")
    sys.stdout = Logger(os.path.join(log_dir, log_file))
    print(args)

    for s in args.gpu_ids:
        try:
            int(s)
        except ValueError as e:
            print("Invalid gpu id:{}".format(s))
            raise ValueError

    os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(args.gpu_ids)

    if args.gpu_ids:
        if torch.cuda.is_available():
            use_gpu = True
            torch.cuda.manual_seed_all(args.seed)
        else:
            use_gpu = False
    else:
        use_gpu = False

    torch.manual_seed(args.seed)

    dataloader, size = build_dataloader(args.batch_size, args.num_workers, use_gpu, args.font, args.size,
                                        args.from_unicode, args.to_unicode)
    model = GAN(args.num_fakes, args.rand_dim, size, use_gpu)
    criterion = BCELoss()
    d_optimizer = torch.optim.SGD(model.discriminator.parameters(), lr=args.d_lr, momentum=0.9)
    g_optimizer = torch.optim.SGD(model.generator.parameters(), lr=args.g_lr, momentum=0.9)
    d_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(d_optimizer, mode="min", factor=args.factor,
                                                             patience=args.patience, verbose=True,
                                                             threshold=args.threshold)
    g_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(g_optimizer, mode="min", factor=args.factor,
                                                             patience=args.patience, verbose=True,
                                                             threshold=args.threshold)

    optimizer = d_optimizer, g_optimizer
    scheduler = d_scheduler, g_scheduler

    if use_gpu:
        model = model.cuda()
        model = torch.nn.DataParallel(model)

    print("Start training...")
    start = datetime.now()
    with SummaryWriter(log_dir) as writer:
        for epoch in range(args.epochs):
            for i, param_group in enumerate(d_optimizer.param_groups):
                d_learning_rate = float(param_group["lr"])
                writer.add_scalar("d_lr_group_{0}".format(i), d_learning_rate, global_step=epoch)
            for i, param_group in enumerate(g_optimizer.param_groups):
                g_learning_rate = float(param_group["lr"])
                writer.add_scalar("g_lr_group_{0}".format(i), g_learning_rate, global_step=epoch)
            train(model, dataloader, criterion, optimizer, use_gpu, writer, epoch, scheduler, args.num_fakes,
                  args.flip_rate, args.show_freq)

    torch.save(model, os.path.join(log_dir, "latest.pth"))

    elapsed_time = str(datetime.now() - start)
    print("Finish training. Total elapsed time %s." % elapsed_time)
Example #10
0
    # print('Pool Size: {}'.format(pool_size))
    print('Epochs: {}'.format(epochs))
    print('Seed: {}'.format(seed))
    print('=' * 30 + '\n')
    for epoch in range(epochs):
        print('=' * 30)
        print('Starting epoch {} of {}'.format(epoch, epochs))
        X_train, y_train = shuffle(X_train, y_train)

        loss_train = 0.0
        loss_test = 0.0
        acc_train = 0.0
        acc_test = 0.0

        for i, (image, label) in enumerate(zip(X_train, y_train)):
            loss, acc = train(image, label, model, alpha)
            loss_train += loss
            acc_train += acc

        for i, (image, label) in enumerate(zip(X_test, y_test)):
            _, loss, acc = forward(image, label, model)
            loss_test += loss
            acc_test += acc

        loss_train_seq.append(loss_train / train_examples)
        loss_test_seq.append(loss_test / test_examples)
        acc_train_seq.append(acc_train / train_examples)
        acc_test_seq.append(acc_test / test_examples)

        print('Test Loss: {}\nTest Accuracy: {} %'.format(
            loss_test_seq[-1], acc_test_seq[-1] * 100))
Example #11
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='CIFAR10 Example')
    parser.add_argument('--root',
                        type=str,
                        metavar='S',
                        help='Path to the root.')
    parser.add_argument('--init-num-labelled',
                        type=int,
                        default=None,
                        metavar='N',
                        help='Initial number of labelled examples.')
    parser.add_argument('--batch-size',
                        type=int,
                        default=100,
                        metavar='N',
                        help='total batch size for training (default: 100)')
    parser.add_argument('--init-epochs',
                        type=int,
                        metavar='N',
                        help='number of epochs to train for active learning.')
    parser.add_argument('--train-on-updated',
                        default=False,
                        type=str2bool,
                        metavar='BOOL',
                        help='Train on updated data? (default: False)')
    parser.add_argument('--active-learning',
                        default=False,
                        type=str2bool,
                        metavar='BOOL',
                        help='Run proposed active learning? (default: False)')
    parser.add_argument(
        '--skip',
        type=int,
        default=0,
        metavar='N',
        help=
        'Skip the first N epochs when computing the accumulated prediction changes.'
    )

    parser.add_argument('--test-batch-size',
                        type=int,
                        default=500,
                        metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs',
                        type=int,
                        metavar='N',
                        help='number of epochs to train.')
    parser.add_argument('--lr',
                        type=float,
                        default=0.1,
                        metavar='LR',
                        help='learning rate (default: 0.01)')
    parser.add_argument('--momentum',
                        type=float,
                        default=0.0,
                        metavar='M',
                        help='SGD momentum (default: 0.0)')
    parser.add_argument('--seed',
                        type=int,
                        metavar='S',
                        help='Seed for random number generator.')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=1,
        metavar='N',
        help='how many batches to wait before logging training status')
    parser.add_argument('--num-workers',
                        type=int,
                        default=1,
                        metavar='N',
                        help='Number of workers for dataloader (default: 1)')

    parser.add_argument('--num-to-sample',
                        type=int,
                        metavar='N',
                        help='Number of unlabelled exmples to be sampled')
    parser.add_argument(
        '--validate',
        default=False,
        type=str2bool,
        metavar='BOOL',
        help='Use validation set instead of test set? (default: False)')
    parser.add_argument('--output',
                        default='default_ouput.csv',
                        type=str,
                        metavar='S',
                        help='File name for the output.')

    args = parser.parse_args()
    torch.manual_seed(args.seed)  # set seed for pytorch
    use_cuda = torch.cuda.is_available()

    args.num_classes = 10

    device = torch.device("cuda" if use_cuda else "cpu")
    kwargs = {
        'num_workers': args.num_workers,
        'pin_memory': True
    } if use_cuda else {}

    ##################### Active learning sampling using prediction change (fluctuation) ###############################
    if args.active_learning:
        train_dataset = cifar10.CIFAR10(root=args.root,
                                        dataset='train',
                                        init_n_labeled=args.init_num_labelled,
                                        seed=args.seed,
                                        download=True,
                                        transform=transforms.Compose(
                                            [transforms.ToTensor()]),
                                        target_transform=None,
                                        indices_name=None)  #initialise indices

        train_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=args.batch_size,
            shuffle=False,
            sampler=SubsetRandomSampler(train_dataset.l_indices),
            **kwargs)

        test_loader = torch.utils.data.DataLoader(
            cifar10.CIFAR10(args.root,
                            'test',
                            seed=args.seed,
                            transform=transforms.Compose(
                                [transforms.ToTensor()])),
            batch_size=args.test_batch_size,
            shuffle=False,
            **kwargs)

        # train on the initial labelled set
        global_step = 0
        model = resnet18(pretrained=False,
                         progress=False,
                         num_classes=args.num_classes).to(device)

        optimizer = optim.SGD(model.parameters(),
                              lr=args.lr,
                              momentum=args.momentum)
        method = PredictionChange(u_indices=train_dataset.u_indices,
                                  model=model,
                                  dataset=train_dataset,
                                  data_name='cifar10')

        logs = {
            "train_losses": [],
            "train_acces": [],
            "test_acces": [],
            "pred_changes": []
        }
        logger = Logger(logs)

        for epoch in range(1, args.init_epochs + 1):
            start = time.time()
            global_step, train_loss, train_acc = utils.train(
                args, model, device, train_loader, optimizer, epoch,
                global_step)
            print('Training one epoch took: {:.4f} seconds.\n'.format(
                time.time() - start))
            test_acc, _ = utils.test(model, device, test_loader)

            print('Computing prediction changes...')
            pred_change = method.compute_pred_changes(model)

            logger.append(train_losses=train_loss,
                          train_acces=train_acc,
                          test_acces=test_acc,
                          pred_changes=pred_change)

        # save the logs
        train_dataset.save_logs(logger.logs)


############################### Training on updated indices #########################################
    if args.train_on_updated:
        import os
        # create Dataset object and load initial indices.
        train_dataset = cifar10.CIFAR10(
            root=args.root,
            dataset='train',
            init_n_labeled=args.init_num_labelled,
            seed=args.seed,
            download=False,
            transform=transforms.Compose([transforms.ToTensor()]),
            target_transform=None,
            indices_name="init_indices.npz")  #load initial indices from file

        logs_path = os.path.join(train_dataset.init_folder, 'logs.npz')
        print("Updating indices using log file: {}...".format(logs_path))
        start = time.time()
        # sampling using proposed prediction change method
        method = PredictionChange(u_indices=train_dataset.u_indices,
                                  dataset=train_dataset,
                                  data_name='CIFAR-10')

        sample = method.select_batch_from_logs(N=args.num_to_sample,
                                               skip=args.skip,
                                               path=logs_path,
                                               key="pred_changes")
        # update and save updated indices
        filename_updated_indices = "updated_indices_N_{}_skip_{}".format(
            args.num_to_sample, args.skip)
        method.update_indices(dataset=train_dataset,
                              indices=sample,
                              filename=filename_updated_indices)
        print('Active learning sampling took: {:.4f} seconds.\n'.format(
            time.time() - start))

        print("Training on updated labelled training set...")
        train_dataset = cifar10.CIFAR10(
            root=args.root,
            dataset='train',
            init_n_labeled=args.init_num_labelled,
            seed=args.seed,
            download=False,
            transform=transforms.Compose([transforms.ToTensor()]),
            target_transform=None,
            indices_name=filename_updated_indices +
            ".npz")  #load updated indices from file

        train_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=args.batch_size,
            shuffle=False,
            sampler=SubsetRandomSampler(train_dataset.l_indices),
            **kwargs)

        if args.validate:
            test_or_validate = 'validation'
        else:
            test_or_validate = 'test'
        test_loader = torch.utils.data.DataLoader(
            cifar10.CIFAR10(args.root,
                            test_or_validate,
                            seed=args.seed,
                            transform=transforms.Compose(
                                [transforms.ToTensor()])),
            batch_size=args.test_batch_size,
            shuffle=False,
            **kwargs)

        model = resnet18(pretrained=False,
                         progress=False,
                         num_classes=args.num_classes).to(device)

        optimizer = optim.SGD(model.parameters(),
                              lr=args.lr,
                              momentum=args.momentum)
        global_step = 0

        for epoch in range(1, args.epochs + 1):
            start = time.time()
            ###
            global_step, _, _ = utils.train(args, model, device, train_loader,
                                            optimizer, epoch, global_step)
            print('\nTraining one epoch took: {:.4f} seconds.\n'.format(
                time.time() - start))
            ###
            test_acc, _ = utils.test(model, device, test_loader)

        with open(args.output, 'a') as write_file:
            writer = csv.writer(write_file)
            writer.writerow([args.seed, test_acc])
Example #12
0
File: main.py Project: mindis/mfgcn
#from model import model
from utils import utils
from data_load import Data_loading
from evaluation import evaluation
from visualization import visualization
import numpy as np

if __name__ == "__main__":
    """
  utils input arguments: (option for data set, option for using different model, option for doing different tasks,
  option for choosing random walk strategy,option of whether to add structure)
  """
    utils = utils(6, 3, 1, 2, 1)

    utils.config_train_test()
    utils.config_model()
    utils.init_walk_prob()
    utils.train()
    evl = evaluation(utils, 1)
    #evl.evaluate(utils)
    #vis = visualization(utils,evl)
    #vis.get_2d_rep()
    #vis.plot_2d()
Example #13
0
def main():
    parser = argparse.ArgumentParser(description="Standalone Center Loss.")

    # Dataset
    parser.add_argument("--dataset", type=str, default="fashion-mnist", choices=["mnist", "fashion-mnist", "cifar-10"])
    parser.add_argument("--num_workers", type=int, default=4, help="Number of data loading workers.")
    # Optimization
    parser.add_argument("--epochs", type=int, default=100, help="Number of epochs.")
    parser.add_argument("--batch_size", type=int, default=128, help="Batch size.")
    parser.add_argument("--gpu_ids", type=str, default='', help="GPUs for running this script.")
    parser.add_argument("--lr", type=float, default=0.01, help="Learning rate for gradient descent.")
    parser.add_argument("--weight_intra", type=float, default=1.0, help="Weight for intra loss.")
    parser.add_argument('--weight_inter', type=float, default=0.1, help="Weight for inter loss.")
    parser.add_argument("--factor", type=float, default=0.2, help="Factor by which the learning rate will be reduced.")
    parser.add_argument("--patience", type=int, default=10,
                        help="Number of epochs with no improvement after which learning rate will be reduced.")
    parser.add_argument("--threshold", type=float, default=0.1,
                        help="Threshold for measuring the new optimum, to only focus on significant changes. ")
    # Model
    parser.add_argument("--model", type=str, default="resnet", choices=["resnet"])
    parser.add_argument("--feat_dim", type=int, default=128, help="Dimension of the feature.")
    # Misc
    parser.add_argument("--log_dir", type=str, default="./run/", help="Where to save the log?")
    parser.add_argument("--log_name", type=str, required=True, help="Name of the log folder.")
    parser.add_argument("--seed", type=int, default=0, help="Random seed.")
    parser.add_argument("--eval_freq", type=int, default=1, help="How frequently to evaluate the model?")
    parser.add_argument("--vis", action="store_true", help="Whether to visualize the features?")

    args = parser.parse_args()

    # Check before run.
    if not os.path.exists(args.log_dir):
        os.mkdir(args.log_dir)
    log_dir = os.path.join(args.log_dir, args.log_name)

    # Setting up logger
    log_file = datetime.now().strftime("%Y-%m-%d-%H-%M-%S_{}.log".format(args.dataset))
    sys.stdout = Logger(os.path.join(log_dir, log_file))
    print(args)

    for s in args.gpu_ids:
        try:
            int(s)
        except ValueError as e:
            print("Invalid gpu id:{}".format(s))
            raise ValueError

    os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(args.gpu_ids)

    if args.gpu_ids:
        if torch.cuda.is_available():
            use_gpu = True
            cudnn.benchmark = True
            torch.cuda.manual_seed_all(args.seed)
        else:
            use_gpu = False
    else:
        use_gpu = False

    torch.manual_seed(args.seed)

    trainloader, testloader, input_shape, classes = load_dataset(args.dataset, args.batch_size, use_gpu,
                                                                 args.num_workers)
    model = build_model(args.model, input_shape, args.feat_dim, len(classes))

    criterion = StandaloneCenterLoss(len(classes), feat_dim=args.feat_dim, use_gpu=use_gpu)
    optimizer = torch.optim.SGD(list(model.parameters()) + list(criterion.parameters()), lr=args.lr, momentum=0.9)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="max", factor=args.factor,
                                                           patience=args.patience, verbose=True,
                                                           threshold=args.threshold)

    if use_gpu:
        model = model.cuda()
        model = torch.nn.DataParallel(model)

    print("Start training...")
    start = datetime.now()
    with SummaryWriter(log_dir) as writer:
        for epoch in range(args.epochs):
            train(model, trainloader, criterion, args.weight_intra, args.weight_inter, optimizer, use_gpu, writer,
                  epoch, args.epochs, args.vis, args.feat_dim, classes)

            if epoch % args.eval_freq == 0 or epoch == args.epochs - 1:
                eval(model, testloader, criterion, scheduler, use_gpu, writer, epoch, args.epochs, args.vis,
                     args.feat_dim, classes)

    elapsed_time = str(datetime.now() - start)
    print("Finish training. Total elapsed time %s." % elapsed_time)
Example #14
0
def main(args):
    if args.seed is not None:
        paddle.fluid.Program.random_seed = args.seed
        np.random.seed(args.seed)

    if args.gpu < 0:
        device = "cpu"
    else:
        device = "cuda:{args.gpu}"

    # Load dataset
    # data = load_data(device, args)
    # g, labels, num_classes, train_nid, val_nid, test_nid = data
    labels = np.load("./data/lables.npy")
    num_classes = np.load("./data/num_classes.npy")
    train_nid = np.load("./data/train_nid.npy")
    val_nid = np.load("./data/val_nid.npy")
    test_nid = np.load("./data/test_nid.npy")
    evaluator = get_evaluator(args.dataset)

    # Preprocess neighbor-averaged features over sampled relation subgraphs
    rel_subsets = []
    with paddle.no_grad():
        feats = []
        for i in range(args.R + 1):
            #数据集请自行在OGB官网下载,并按照官网教程生产训练集,或者在AiStudio上查询data88697
            feature = np.load(f'../data/data88697/feat{i}.npy')
            feats.append(paddle.to_tensor(feature))
        # feats = preprocess_features(g, rel_subsets, args, device)
        print("Done preprocessing")
    # labels = labels.to(device)
    # Release the graph since we are not going to use it later
    g = None

    # Set up logging
    logging.basicConfig(format='[%(levelname)s] %(message)s',
                        level=logging.INFO)
    logging.info(str(args))

    _, num_feats, in_feats = feats[0].shape
    logging.info("new input size: {} {}".format(num_feats, in_feats))

    # Create model
    num_hops = args.R + 1  # include self feature hop 0
    model = nn.Sequential(
        WeightedAggregator(num_feats, in_feats, num_hops),
        SIGN(in_feats, args.num_hidden, num_classes, num_hops, args.ff_layer,
             args.dropout, args.input_dropout))

    if len(labels.shape) == 1:
        # single label multi-class
        loss_fcn = nn.NLLLoss()
    else:
        # multi-label multi-class
        loss_fcn = nn.KLDivLoss(reduction='batchmean')

    print('!' * 100)
    optimizer = paddle.optimizer.Adam(parameters=model.parameters(),
                                      weight_decay=args.weight_decay)
    # optimizer = paddle.optimizer.Adam(parameters=model.parameters())
    # Start training
    best_epoch = 0
    best_val = 0
    f = open('log.txt', 'w+')
    for epoch in range(1, args.num_epochs + 1):
        start = time.time()
        print(epoch)
        train(model, feats, labels, train_nid, loss_fcn, optimizer,
              args.batch_size)
        if epoch % args.eval_every == 0:
            with paddle.no_grad():
                train_res, val_res, test_res = test(model, feats, labels,
                                                    train_nid, val_nid,
                                                    test_nid, evaluator,
                                                    args.eval_batch_size)
            end = time.time()
            val_acc = val_res[0]
            log = "Epoch {}, Times(s): {:.4f}".format(epoch, end - start)
            log += ", Accuracy: Train {:.4f}, Val {:.4f}".format(
                train_res[0], val_res[0])
            log += f", best_acc:{best_val}"
            logging.info(log)
            print(log, file=f, flush=True)
            if val_acc > best_val:
                best_val = val_acc
                best_epoch = epoch
    f.close()
    logging.info("Best Epoch {}, Val {:.4f}".format(best_epoch, best_val))
Example #15
0
e = 8.
epsilon = e/255.
max_iter= int(min(e+4, 1.25*e))

def normalize(img, mean=mean, std=std):
    img_n = img - mean
    img_n = img_n / std
    return img_n

adversary = PGDAttack(lambda x: net(x), eps=epsilon, nb_iter=7, ord=np.inf, eps_iter=epsilon/4.)

writer = SummaryWriter(comment=tensorboard_comment)

for epoch in range(start_epoch+1, nb_epoch+1):
    if epoch >= args.epoch_adv:
        train_acc, train_loss = train(epoch, net, train_loader, optimizer, criterion_da, args, adv_training=True, epsilon=args.eps_train/255., alpha=args.alpha_train/255., num_iter=args.num_iter)
    else:
        train_acc, train_loss = train(epoch, net, train_loader, optimizer, criterion_class, args, adv_training=False)
    net.eval()
    val_acc, val_loss = test(net, val_loader, criterion_class, args)

    # adv_acc, adv_loss, _, _ = adv_test(net, val_loader, criterion_class, adversary, epsilon, args, store_imgs=False)
    # writer.add_scalar('adv_acc', adv_acc, epoch)

    writer.add_scalar('train_acc', train_acc, epoch)
    writer.add_scalar('train_loss', train_loss, epoch)
    writer.add_scalar('val_acc', val_acc, epoch)
    writer.add_scalar('val_loss', val_loss, epoch)
    save_model(val_acc, net, optimizer, epoch, os.path.join("model", "checkpoints"), filename)
    if args.sgd:
        scheduler.step()
Example #16
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--data",
                        type=str,
                        required=True,
                        help="Path to the dataset directory.")
    parser.add_argument("--degree",
                        type=int,
                        default=5,
                        help="Degree of the bezier curves.")
    parser.add_argument("--log_dir",
                        type=str,
                        default="runs",
                        help="Path to save the tf event.")
    parser.add_argument("--log_name",
                        type=str,
                        required=True,
                        help="Name of the experiment.")
    parser.add_argument("--beta",
                        type=float,
                        default=30,
                        help="Loss balancing factor.")
    parser.add_argument("--weight_dir",
                        type=str,
                        default="weights",
                        help="Folder to save the model weights.")
    parser.add_argument("--pretrained_weight",
                        type=str,
                        required=True,
                        help="Path to the pretrained weight.")
    parser.add_argument("--gpu_ids",
                        type=str,
                        default='',
                        help="Specify the GPU ids.")
    parser.add_argument("--seed", type=int, default=0, help="Random seed.")
    parser.add_argument("--batch_size",
                        type=int,
                        default=256,
                        help="Batch size.")
    parser.add_argument("--num_workers",
                        type=int,
                        default=12,
                        help="Number of workers.")
    parser.add_argument("--epochs",
                        type=int,
                        default=100,
                        help="Number of epochs.")
    parser.add_argument("--input_size",
                        type=int,
                        nargs=2,
                        required=True,
                        help="Size of the input image (w, h).")
    parser.add_argument("--max_lane",
                        type=int,
                        default=4,
                        help="Maximum number of lanes.")
    parser.add_argument("--num_points",
                        type=int,
                        default=72,
                        help="Number of points for computing the loss.")
    parser.add_argument("--feat_dim",
                        type=int,
                        default=384,
                        help="The output feature dimension of the backbone.")
    parser.add_argument("--lr",
                        type=float,
                        default=1e-3,
                        help="Learning rate.")
    parser.add_argument("--momentum",
                        type=float,
                        default=0.9,
                        help="Momentum rate.")
    parser.add_argument(
        "--factor",
        type=float,
        default=0.5,
        help="Factor by which the learning rate will be reduced.")
    parser.add_argument(
        "--patience",
        type=int,
        default=15,
        help=
        "Number of epochs with no improvement after which learning rate will be reduced."
    )
    parser.add_argument(
        "--threshold",
        type=float,
        default=1e-2,
        help=
        "Threshold for measuring the new optimum, to only focus on significant changes. "
    )
    parser.add_argument("--eval_freq",
                        type=int,
                        default=1,
                        help="Evaluate frequency.")

    args = parser.parse_args()
    print(args)

    for s in args.gpu_ids:
        try:
            int(s)
        except ValueError as e:
            print("Invalid gpu id: {}".format(s))
            raise ValueError

    os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(args.gpu_ids)

    if args.gpu_ids:
        if torch.cuda.is_available():
            use_gpu = True
            cudnn.benchmark = True
            torch.cuda.manual_seed(args.seed)
        else:
            use_gpu = False
    else:
        use_gpu = False

    logtime = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
    log_dir = os.path.join(args.log_dir, args.log_name, logtime)
    train_log = os.path.join(log_dir, "train")
    val_log = os.path.join(log_dir, "val")
    mkdir(train_log)
    mkdir(val_log)
    weight_dir = os.path.join(args.weight_dir, args.log_name, logtime)
    mkdir(weight_dir)

    train_loader, val_loader, test_loader, num_fc_nodes = build_dataloader(
        args.data, args.batch_size, tuple(args.input_size), args.degree,
        args.num_points, args.max_lane, use_gpu, args.num_workers)

    model = CustomResnet(args.feat_dim, args.pretrained_weight, args.max_lane,
                         num_fc_nodes)

    optimizer = SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
    scheduler = ReduceLROnPlateau(optimizer,
                                  mode="min",
                                  factor=args.factor,
                                  patience=args.patience,
                                  threshold=args.threshold,
                                  verbose=True)

    dsd_loss = DSDRandomLoss(args.degree, args.max_lane, args.num_points)
    xent_loss = torch.nn.CrossEntropyLoss()
    criterion = {"xent": xent_loss, "dsd": dsd_loss}

    if use_gpu:
        model = model.cuda()
        model = torch.nn.DataParallel(model)

    with SummaryWriter(log_dir=train_log) as tr_writer:
        with SummaryWriter(log_dir=val_log) as val_writer:
            js = {
                "best_epoch": 0,
                "loss": 1e+12,
                "cls_loss": 1e+12,
                "dsd_loss": 1e+12,
                "acc": 0.0,
                "seed": args.seed
            }
            for e in range(args.epochs):
                for i, param_group in enumerate(optimizer.param_groups):
                    learning_rate = float(param_group['lr'])
                    tr_writer.add_scalar("lr of group {}".format(i),
                                         learning_rate,
                                         global_step=e)

                train(model, train_loader, optimizer, criterion, args.beta,
                      tr_writer, e, args.degree, use_gpu)

                if e % args.eval_freq == 0 or e == args.epochs - 1:
                    val_loss, val_cls_loss, val_dsd_loss, val_acc = evaluate(
                        model, val_loader, criterion, args.beta, scheduler,
                        val_writer, e, args.degree, weight_dir, use_gpu)
                    if val_loss < js["loss"]:
                        js["best_epoch"] = e
                        js["loss"] = val_loss
                        js["cls_loss"] = val_cls_loss
                        js["dsd_loss"] = val_dsd_loss
                        js["acc"] = val_acc

            with open(os.path.join(log_dir, "best_result.json"), 'w') as f:
                json.dump(js, f)