Beispiel #1
0
def main():

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    cudnn.enabled = True
    torch.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    genotype = eval("genotypes.%s" % args.arch)
    model = Network(args.init_ch, 10, args.layers, args.auxiliary,
                    genotype).cuda()

    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.wd)

    train_transform, valid_transform = utils._data_transforms_cifar10(args)
    train_data = dset.CIFAR10(root=args.data,
                              train=True,
                              download=True,
                              transform=train_transform)
    valid_data = dset.CIFAR10(root=args.data,
                              train=False,
                              download=True,
                              transform=valid_transform)

    train_queue = torch.utils.data.DataLoader(train_data,
                                              batch_size=args.batchsz,
                                              shuffle=True,
                                              pin_memory=True,
                                              num_workers=2)

    valid_queue = torch.utils.data.DataLoader(valid_data,
                                              batch_size=args.batchsz,
                                              shuffle=False,
                                              pin_memory=True,
                                              num_workers=2)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs))

    for epoch in range(args.epochs):
        scheduler.step()
        logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0])
        model.drop_path_prob = args.drop_path_prob * epoch / args.epochs

        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        logging.info('valid_acc: %f', valid_acc)

        train_acc, train_obj = train(train_queue, model, criterion, optimizer)
        logging.info('train_acc: %f', train_acc)

        utils.save(model, os.path.join(args.save, 'trained.pt'))
        print('saved to: trained.pt')
Beispiel #2
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    in_channels, num_classes, dataset_in_torch, stride_for_aux = utils.dataset_fields(
        args, train=False)  # new
    genotype = eval("genotypes.%s" % args.arch)
    model = Network(args.init_channels, in_channels, stride_for_aux,
                    num_classes, args.layers, args.auxiliary, genotype)
    model = model.cuda()

    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    train_data, valid_data = utils.dataset_split_and_transform(
        dataset_in_torch, args, train=False)  # new
    train_queue = torch.utils.data.DataLoader(train_data,
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              pin_memory=True,
                                              num_workers=2)

    valid_queue = torch.utils.data.DataLoader(valid_data,
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              pin_memory=True,
                                              num_workers=2)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs))

    for epoch in range(args.epochs):
        scheduler.step()
        logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0])
        model.drop_path_prob = args.drop_path_prob * epoch / args.epochs

        train_acc, train_obj = train(train_queue, model, criterion, optimizer)
        logging.info('train_acc %f', train_acc)

        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        logging.info('valid_acc %f', valid_acc)

        utils.save(model, os.path.join(args.save, 'weights.pt'))
Beispiel #3
0
def main():
  if not torch.cuda.is_available():
    logging.info('no gpu device available')
    sys.exit(1)

  np.random.seed(args.seed)
  torch.cuda.set_device(args.gpu)
  cudnn.benchmark = True
  torch.manual_seed(args.seed)
  cudnn.enabled=True
  torch.cuda.manual_seed(args.seed)
  logging.info('gpu device = %d' % args.gpu)
  logging.info("args = %s", args)

  genotype = eval("genotypes.%s" % args.arch)
  model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype)
  model = model.cuda()

  logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

  criterion = nn.CrossEntropyLoss()
  criterion = criterion.cuda()
  optimizer = torch.optim.SGD(
      model.parameters(),
      args.learning_rate,
      momentum=args.momentum,
      weight_decay=args.weight_decay
      )

  train_transform, valid_transform = utils._data_transforms_cifar10(args)
  train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)
  valid_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform)

  train_queue = torch.utils.data.DataLoader(
      train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2)

  valid_queue = torch.utils.data.DataLoader(
      valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2)

  scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs))

  for epoch in range(args.epochs):
    scheduler.step()
    logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0])
    model.drop_path_prob = args.drop_path_prob * epoch / args.epochs

    start_time = time.time()

    train_acc, train_obj = train(train_queue, model, criterion, optimizer)
    logging.info('train_acc %f', train_acc)

    valid_acc, valid_obj = infer(valid_queue, model, criterion)
    logging.info('valid_acc %f', valid_acc)

    end_time = time.time()
    duration = end_time - start_time
    print('Epoch time: %ds.' %duration)

    utils.save(model, os.path.join(args.save, 'weights.pt'))
Beispiel #4
0
    def _init_model(self):
        genotype = eval('genotypes.%s' % self.args.arch)
        model = Network(self.args.init_channels, self.num_classes,
                        self.args.layers, self.args.auxiliary, genotype,
                        self.args.parse_method)
        flops, params = profile(model,
                                inputs=(torch.randn(1, 3, 32, 32), ),
                                verbose=False)
        self.logger.info('flops = %fM', flops / 1e6)
        self.logger.info('param size = %fM', params / 1e6)

        # Try move model to multi gpus
        if torch.cuda.device_count() > 1 and self.args.multi_gpus:
            self.logger.info('use: %d gpus', torch.cuda.device_count())
            model = nn.DataParallel(model)
        else:
            self.logger.info('gpu device = %d' % self.device_id)
            torch.cuda.set_device(self.device_id)
        self.model = model.to(self.device)

        criterion = nn.CrossEntropyLoss()
        self.criterion = criterion.to(self.device)
        self.optimizer = torch.optim.SGD(model.parameters(),
                                         self.args.learning_rate,
                                         momentum=self.args.momentum,
                                         weight_decay=self.args.weight_decay)

        self.best_acc_top1 = 0
        # optionally resume from a checkpoint
        if self.args.resume:
            if os.path.isfile(self.args.resume):
                print("=> loading checkpoint {}".format(self.args.resume))
                checkpoint = torch.load(self.args.resume,
                                        map_location=self.device)
                self.dur_time = checkpoint['dur_time']
                self.args.start_epoch = checkpoint['epoch']
                self.best_acc_top1 = checkpoint['best_acc_top1']
                self.args.drop_path_prob = checkpoint['drop_path_prob']
                self.model.load_state_dict(checkpoint['state_dict'])
                self.optimizer.load_state_dict(checkpoint['optimizer'])
                print("=> loaded checkpoint '{}' (epoch {})".format(
                    self.args.resume, checkpoint['epoch']))
            else:
                print("=> no checkpoint found at '{}'".format(
                    self.args.resume))

        self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            self.optimizer,
            float(self.args.epochs),
            eta_min=0,
            last_epoch=-1
            if self.args.start_epoch == 0 else self.args.start_epoch)
        # reload the scheduler if possible
        if self.args.resume and os.path.isfile(self.args.resume):
            checkpoint = torch.load(self.args.resume)
            self.scheduler.load_state_dict(checkpoint['scheduler'])
Beispiel #5
0
def main():
  if not torch.cuda.is_available():
    logging.info('no gpu device available')
    sys.exit(1)

  np.random.seed(args.seed)
  torch.cuda.set_device(args.gpu)
  cudnn.benchmark = True
  torch.manual_seed(args.seed)
  cudnn.enabled=True
  torch.cuda.manual_seed(args.seed)
  logging.info('gpu device = %d' % args.gpu)
  logging.info("args = %s", args)

  genotype = eval("genotypes.%s" % args.arch)
  model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype,output_height=args.img_cropped_height,output_width=args.img_cropped_width)
  model = model.cuda()

  logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

  criterion = nn.MSELoss()
  criterion = criterion.cuda()
  optimizer = torch.optim.SGD(
      model.parameters(),
      args.learning_rate,
      momentum=args.momentum,
      weight_decay=args.weight_decay
      )

  train_transform, valid_transform = utils._data_trainsforms_denosining_dataset(args)
  train_data = DENOISE_DATASET(root=args.data,train_folder=args.train_data,label_folder=args.label_data,train=True, transform=train_transform,target_transform=train_transform )
 #valid_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform)
  num_train = len(train_data)
  indices = list(range(num_train))
  split = int(np.floor(args.train_portion * num_train))

  train_queue = torch.utils.data.DataLoader(
      train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2)

  valid_queue = torch.utils.data.DataLoader(
      train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]), pin_memory=True, num_workers=2)

  scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs))

  for epoch in range(args.epochs):
    scheduler.step()
    logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0])
    model.drop_path_prob = args.drop_path_prob * epoch / args.epochs

    train_obj = train(train_queue, model, criterion, optimizer)
    logging.info('train_obj %f', train_obj)

    valid_obj = infer(valid_queue, model, criterion)
    logging.info('valid_obj %f', valid_obj)

    utils.save(model, os.path.join(args.save, './weights.pt'))
Beispiel #6
0
def main():
  if not torch.cuda.is_available():
    logging.info('no gpu device available')
    sys.exit(1)

  np.random.seed(args.seed)
  torch.cuda.set_device(args.gpu)
  cudnn.benchmark = True
  torch.manual_seed(args.seed)
  cudnn.enabled=True
  torch.cuda.manual_seed(args.seed)
  logging.info('gpu device = %d' % args.gpu)
  logging.info("args = %s", args)

  # 得到train_search里学好的normal cell 和reduction cell,genotypes.DARTS就是选的学好的DARTS_V2
  genotype = eval("genotypes.%s" % args.arch)#DARTS_V2 = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('skip_connect', 0), ('skip_connect', 0), ('dil_conv_3x3', 2)], normal_concat=[2, 3, 4, 5], reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('skip_connect', 2), ('max_pool_3x3', 1), ('max_pool_3x3', 0), ('skip_connect', 2), ('skip_connect', 2), ('max_pool_3x3', 1)], reduce_concat=[2, 3, 4, 5])
  # 这里的Network用的是model.py的NetworkCIFAR
  model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype)
  model = model.cuda()

  logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

  criterion = nn.CrossEntropyLoss()
  criterion = criterion.cuda()
  optimizer = torch.optim.SGD(
      model.parameters(),
      args.learning_rate,
      momentum=args.momentum,
      weight_decay=args.weight_decay
      )

  train_transform, valid_transform = utils._data_transforms_cifar10(args)
  train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)
  valid_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform)

  train_queue = torch.utils.data.DataLoader(
      train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2)

  valid_queue = torch.utils.data.DataLoader(
      valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2)

  scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs))

  for epoch in range(args.epochs):
    scheduler.step()
    logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0])
    model.drop_path_prob = args.drop_path_prob * epoch / args.epochs

    train_acc, train_obj = train(train_queue, model, criterion, optimizer)
    logging.info('train_acc %f', train_acc)

    valid_acc, valid_obj = infer(valid_queue, model, criterion)
    logging.info('valid_acc %f', valid_acc)

    utils.save(model, os.path.join(args.save, 'weights.pt'))
Beispiel #7
0
def main():
    np.random.seed(args.seed)

    gpus = [int(i) for i in args.gpu.split(',')]
    if len(gpus) == 1:
        torch.cuda.set_device(int(args.gpu))

    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %s' % args.gpu)
    logging.info("args = %s", args)

    genotype = eval("genotypes.%s" % args.arch)
    model = Network(args.init_channels, CIFAR_CLASSES, args.layers,
                    args.auxiliary, genotype)
    model.cuda()

    if len(gpus) > 1:
        print("True")
        model = nn.parallel.DataParallel(model,
                                         device_ids=gpus,
                                         output_device=gpus[0])
        model = model.module

    utils.load(model, args.model_path)
    print("If the model is running on GPU:", next(model.parameters()).is_cuda)

    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()

    _, test_transform = utils._data_transforms_cifar10(args)
    test_data = dset.CIFAR10(root=args.data,
                             train=False,
                             download=True,
                             transform=test_transform)

    test_queue = torch.utils.data.DataLoader(test_data,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             pin_memory=True,
                                             num_workers=2)

    model.drop_path_prob = args.drop_path_prob
    test_acc, test_obj = infer(test_queue, model, criterion)
    logging.info('test_acc %f', test_acc)
Beispiel #8
0
    def build_model(self) -> nn.Module:
        genotype = self.get_genotype_from_hps()

        model = Network(
            self.hparams["init_channels"],
            10,  # num_classes
            self.hparams["layers"],
            self.hparams["auxiliary"],
            genotype,
        )
        print("param size = {} MB".format(utils.count_parameters_in_MB(model)))
        size = 0
        for p in model.parameters():
            size += p.nelement()
        print("param count: {}".format(size))

        # If loading backbone weights, do not call reset_parameters() or
        # call before loading the backbone weights.
        reset_parameters(model)
        return model
Beispiel #9
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    genotype = eval("genotypes.%s" % args.arch)
    model = Network(args.init_channels, CIFAR_CLASSES, args.layers,
                    args.auxiliary, genotype)

    if args.parallel:
        model = nn.DataParallel(model).cuda()
    else:
        model = model.cuda()

    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    train_transform, valid_transform = utils._data_transforms_cifar10(args)
    train_data = dset.CIFAR10(root=args.data,
                              train=True,
                              download=True,
                              transform=train_transform)
    valid_data = dset.CIFAR10(root=args.data,
                              train=False,
                              download=True,
                              transform=valid_transform)

    train_queue = torch.utils.data.DataLoader(train_data,
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              pin_memory=True,
                                              num_workers=2)

    valid_queue = torch.utils.data.DataLoader(valid_data,
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              pin_memory=True,
                                              num_workers=2)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs))

    start_epoch = 0
    if args.resume:
        checkpoint = torch.load(
            os.path.join(args.resume_dir, 'checkpoint.pth.tar'))
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['model_state_dict'])
        model = model.cuda()
        scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        print('model was loaded. Start training from epoch {}.'.format(
            start_epoch))

    for epoch in range(start_epoch, args.epochs):
        logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0])
        with open(os.path.join(args.save, 'learning_rate.txt'), mode='a') as f:
            f.write(str(scheduler.get_lr()[0]) + '\n')

        if args.parallel:
            model.module.drop_path_prob = args.drop_path_prob * epoch / args.epochs
        else:
            model.drop_path_prob = args.drop_path_prob * epoch / args.epochs

        train_acc, train_obj = train(train_queue, model, criterion, optimizer)
        logging.info('train_acc %f', train_acc)
        with open(os.path.join(args.save, "train_acc.txt"), mode='a') as f:
            f.write(str(train_acc) + '\n')
        with open(os.path.join(args.save, "train_loss.txt"), mode='a') as f:
            f.write(str(train_obj) + '\n')
        scheduler.step()

        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        logging.info('valid_acc %f', valid_acc)
        with open(os.path.join(args.save, "test_acc.txt"), mode='a') as f:
            f.write(str(valid_acc) + '\n')
        with open(os.path.join(args.save, "test_loss.txt"), mode='a') as f:
            f.write(str(valid_obj) + '\n')

        utils.save(model, os.path.join(args.save, 'weights.pt'))
        torch.save(scheduler.state_dict(),
                   os.path.join(args.save, 'scheduler.pth.tar'),
                   pickle_module=dill)
        utils.save_checkpoint(
            {
                'epoch': epoch + 1,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'scheduler_state_dict': scheduler.state_dict()
            },
            False,
            save=args.save)
Beispiel #10
0
def main():
    writerTf = SummaryWriter(comment='writerTf')
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    gpus = [int(i) for i in args.gpu.split(',')]
    if len(gpus) == 1:
        torch.cuda.set_device(int(args.gpu))

    # torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    # cudnn.enabled=True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %s' % args.gpu)
    logging.info("args = %s", args)

    genotype = eval("genotypes.%s" % args.arch)
    model = Network(args.init_channels, CIFAR_CLASSES, args.layers,
                    args.auxiliary, genotype)
    model = model.cuda()

    if len(gpus) > 1:
        print("True")
        model = nn.parallel.DataParallel(model,
                                         device_ids=gpus,
                                         output_device=gpus[0])
        model = model.module

    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    train_transform, valid_transform = utils._data_transforms_cifar10(args)
    train_data = dset.CIFAR10(root=args.data,
                              train=True,
                              download=True,
                              transform=train_transform)
    valid_data = dset.CIFAR10(root=args.data,
                              train=False,
                              download=True,
                              transform=valid_transform)

    train_queue = torch.utils.data.DataLoader(train_data,
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              pin_memory=True,
                                              num_workers=2)

    valid_queue = torch.utils.data.DataLoader(valid_data,
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              pin_memory=True,
                                              num_workers=2)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs))

    for epoch in range(args.epochs):
        scheduler.step()
        logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0])
        model.drop_path_prob = args.drop_path_prob * epoch / args.epochs

        train_acc, train_obj = train(train_queue, model, criterion, optimizer)
        logging.info('train_acc %f', train_acc)

        with torch.no_grad():
            valid_acc, valid_obj = infer(valid_queue, model, criterion)
        logging.info('valid_acc %f', valid_acc)

        utils.save(model, os.path.join(args.save, 'weights.pt'))

        writerTf.add_scalar('train_acc', train_acc, epoch)
        writerTf.add_scalar('train_obj', train_obj, epoch)
        writerTf.add_scalar('valid_acc', valid_acc, epoch)
        writerTf.add_scalar('valid_obj', valid_obj, epoch)
    writerTf.close()
Beispiel #11
0
def main():
    # Setup
    args = cmd_argument_parser()
    create_logger(args.save)

    # Set the gpu device to be used
    # NOTE: Only operates on a single GPU
    if torch.cuda.is_available():
        torch.cuda.set_device(int(args.gpu))
    else:
        logging.info('no gpu device available')
        sys.exit(1)

    # Ensure seeds are set
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)

    # Hardware specific tuning
    torch.backends.cudnn.enabled = True
    torch.backends.cudnn.benchmark = True

    # Get the specific architecture to train
    genotype = genomes[args.arch]

    # Create the fixed network
    # Note: This differs from the Network used in model_search.py
    # TODO: Update the Network class
    model = Network(C=args.init_channels,
                    num_classes=CIFAR_CLASSES,
                    layers=args.layers,
                    auxiliary=args.auxiliary,
                    genotype=genotype)
    model = model.cuda()

    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    # The loss function
    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()

    # Optimizer used to adjust the models parameters as well as an optimizer
    # of the learning rate
    optimizer = torch.optim.SGD(params=model.parameters(),
                                lr=args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    lr_scheduler = CosineAnnealingLR(optimizer=optimizer,
                                     T_max=float(args.epochs))

    # Get the transforms for both the train and validation data
    train_transform, valid_transform = utils._data_transforms_cifar10(args)

    # Get the data from torchvision's datasets
    train_data = CIFAR10(root=args.data,
                         train=True,
                         download=True,
                         transform=train_transform)
    valid_data = CIFAR10(root=args.data,
                         train=False,
                         download=True,
                         transform=valid_transform)

    # Create Dataloaders for both
    train_queue = DataLoader(train_data,
                             batch_size=args.batch_size,
                             shuffle=True,
                             pin_memory=True,
                             num_workers=0)

    valid_queue = DataLoader(valid_data,
                             batch_size=args.batch_size,
                             shuffle=False,
                             pin_memory=True,
                             num_workers=0)

    for epoch in range(args.epochs):
        logging.info(f'epoch = {epoch}')
        logging.info(f'lr = {lr_scheduler.get_last_lr()}')

        # More likely to drop a path as epochs progress
        model.drop_path_prob = args.drop_path_prob * (epoch / args.epochs)

        train_acc, train_obj = train(train_queue, model, criterion, optimizer,
                                     args)
        with torch.no_grad():
            valid_acc, valid_obj = infer(valid_queue, model, criterion, args)

        logging.info(f'train_acc = {train_acc}')
        logging.info(f'valid_acc = {valid_acc}')

        # Save the model for each epoch
        utils.save(model, os.path.join(args.save, 'weights.pt'))

        lr_scheduler.step()
Beispiel #12
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    genotype = eval("genotypes.%s" % args.arch)
    #print(genotype)
    model = Network(args.init_channels, CIFAR_CLASSES, args.layers,
                    args.auxiliary, genotype)
    #stat(model, (1, 16, 16))

    model = model.cuda()
    # input_size=(1,2,128)
    '''
  Input = torch.randn(1, 1, 2, 128)
  Input = Input.type(torch.cuda.FloatTensor)
  macs, params = profile(model, inputs=(Input,), custom_ops={Network: Network.forward})
  macs, params = clever_format([macs, params], "%.3f")
  print(macs, params)
  summary(model,(1,16,16))
'''

    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    '''
  train_transform, valid_transform = utils._data_transforms_cifar10(args)
  if args.set=='cifar100':
      train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=train_transform)
      valid_data = dset.CIFAR100(root=args.data, train=False, download=True, transform=valid_transform)
  else:
      train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)
      valid_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform)
  #train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)
  #valid_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform)

  train_queue = torch.utils.data.DataLoader(
      train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2)

  valid_queue = torch.utils.data.DataLoader(
      valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2)
  '''

    x = loadmat("/data/wx/PC-DARTS/data/datashujudoppler=100.mat")
    x = x.get('train_data')
    x = np.reshape(x, [-1, 1, 2, 128])
    data_num = 22000
    y1 = np.zeros([data_num, 1])
    y2 = np.ones([data_num, 1])
    y3 = np.ones([data_num, 1]) * 2
    y4 = np.ones([data_num, 1]) * 3
    y5 = np.ones([data_num, 1]) * 4
    y6 = np.ones([data_num, 1]) * 5
    y7 = np.ones([data_num, 1]) * 6
    y8 = np.ones([data_num, 1]) * 7
    y = np.vstack((y1, y2, y3, y4, y5, y6, y7, y8))
    y = np.array(y)
    X_train, X_val, Y_train, Y_val = train_test_split(x,
                                                      y,
                                                      test_size=0.3,
                                                      random_state=30)

    X_train = torch.from_numpy(X_train)
    Y_train = torch.from_numpy(Y_train)
    X_train = X_train.type(torch.FloatTensor)
    Y_train = Y_train.type(torch.LongTensor)

    Y_train = Y_train.type(torch.LongTensor)
    # Y_train=np.reshape(Y_train,(16800,4))
    Y_train = Y_train.squeeze()
    print(Y_train.type)
    print(Y_train)
    print(X_train.shape, Y_train.shape)
    train_Queue = torch.utils.data.TensorDataset(X_train, Y_train)
    print(train_Queue)
    X_val = torch.from_numpy(X_val)
    Y_val = torch.from_numpy(Y_val)
    X_val = X_val.type(torch.FloatTensor)
    Y_val = Y_val.type(torch.LongTensor)
    # Y_train = one_hot_embedding(Y_train, 4)
    Y_val = Y_val.type(torch.LongTensor)
    Y_val = Y_val.squeeze()
    print(Y_val.type, Y_val)
    valid_Queue = torch.utils.data.TensorDataset(X_val, Y_val)

    train_queue = torch.utils.data.DataLoader(train_Queue,
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              pin_memory=True,
                                              num_workers=2)

    valid_queue = torch.utils.data.DataLoader(valid_Queue,
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              pin_memory=True,
                                              num_workers=2)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs))
    best_acc = 0.0
    writer = SummaryWriter('./logs/balei')
    '''
  Input = torch.randn(1, 1, 2, 128)
  Input=Input.type(torch.cuda.FloatTensor)
  macs, params = profile(model, inputs=(Input,),custom_ops={Network:Network.forward})
  macs, params = clever_format([macs, params], "%.3f")
  print(macs, params)
  '''
    for epoch in range(args.epochs):
        scheduler.step()
        logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0])
        model.drop_path_prob = args.drop_path_prob * epoch / args.epochs

        train_acc, train_obj = train(train_queue, model, criterion, optimizer)
        logging.info('train_acc %f', train_acc)
        writer.add_scalar('Train/acc', train_acc, epoch)
        TAcc.write(str(train_acc) + ",")
        writer.add_scalar('Train/loss', train_obj, epoch)
        Tloss.write(str(train_obj) + ",")

        valid_acc, valid_obj = infer(valid_queue, model, criterion)

        if valid_acc > best_acc:
            best_acc = valid_acc
            utils.save(model, os.path.join(args.save, 'weights_best_acc.pt'))
        logging.info('valid_acc %f, best_acc %f', valid_acc, best_acc)
        writer.add_scalar('Valid/acc', valid_acc, epoch)
        VAcc.write(str(valid_acc) + ",")
        writer.add_scalar('Valid/loss', valid_obj, epoch)
        Vloss.write(str(valid_obj) + ",")
        utils.save(model, os.path.join(args.save, 'weights.pt'))
Beispiel #13
0
def main():
	if not torch.cuda.is_available():
		logging.info('no gpu device available')
		sys.exit(1)
	
	np.random.seed(args.seed)
	# torch.cuda.set_device(args.gpu)
	device = torch.device("cuda")
	cudnn.benchmark = True
	torch.manual_seed(args.seed)
	cudnn.enabled=True
	torch.cuda.manual_seed(args.seed)
	logging.info('gpu device = %d' % args.gpu)
	logging.info("args = %s", args)
	
	# read data
	train_transform, valid_transform = utils._data_transforms_cifar10(args)
	if args.dataset == 'cifar10':
		args.data = '/home/work/dataset/cifar'
		train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)
		valid_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform)
		classes = 10
	if args.dataset == 'cifar100':
		args.data = '/home/work/dataset/cifar100'
		train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=train_transform)
		valid_data = dset.CIFAR100(root=args.data, train=False, download=True, transform=valid_transform)
		classes = 100
	train_queue = torch.utils.data.DataLoader(
		train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2)
	valid_queue = torch.utils.data.DataLoader(
		valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2)
	
	# model
	genotype = eval("genotypes.%s" % args.arch)
	model = Network(args.init_channels, classes, args.layers, args.auxiliary, genotype)
	model = model.cuda()
	model.drop_path_prob = args.drop_path_prob
	
	flops, params = profile(model, inputs=(torch.randn(1, 3, 32, 32).cuda(),), verbose=False)
	logging.info('flops = %fM', flops / 1e6)
	logging.info("param size = %fMB", utils.count_parameters_in_MB(model))
	
	criterion = nn.CrossEntropyLoss()
	criterion = criterion.cuda()
	optimizer = torch.optim.SGD(
		model.parameters(),
		args.learning_rate,
		momentum=args.momentum,
		weight_decay=args.weight_decay
	)
	
	scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs))
	best_val_acc = 0.
	
	if args.resume:
		# state = torch.load('/home/work/lixudong/code_work/sgas/cnn/full_train_s3_1-20200608/weights.pt')
		# state = torch.load('/home/work/lixudong/code_work/sgas/cnn/full_train_s2_factor1-20200609/weights.pt', map_location='cpu')
		# state = torch.load('/home/work/lixudong/code_work/sgas/cnn/full_train_s3_factor1-20200609/weights.pt', map_location='cpu')
		# state = torch.load('/home/work/lixudong/code_work/sgas/cnn/full_train_s3_0-20200608/weights.pt', map_location='cpu')
		# state = torch.load('/home/work/lixudong/code_work/sgas/cnn/full_train_s2_0-20200608/weights.pt', map_location='cpu')
		state = torch.load('/home/work/lixudong/code_work/sgas/cnn/full_train_s3_2-20200608/weights.pt', map_location='cpu')
		model.load_state_dict(state)
		model = model.to(device)
		for i in range(args.start_epoch):
			scheduler.step()
		best_val_acc = 97.19#97.34#97.32#94.92#94.6#97.2
		
	for epoch in range(args.start_epoch, args.epochs):
		scheduler.step()
		logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0])
		model.drop_path_prob = args.drop_path_prob * epoch / args.epochs
		train_acc, train_obj = train(train_queue, model, criterion, optimizer)
		logging.info('train_acc %f', train_acc)
		
		with torch.no_grad():
			valid_acc, valid_obj = infer(valid_queue, model, criterion)
			if valid_acc > best_val_acc:
				best_val_acc = valid_acc
				utils.save(model, os.path.join(args.save, 'best_weights.pt'))
			# logging.info('valid_acc %f\tbest_val_acc %f', valid_acc, best_val_acc)
			logging.info('val_acc: {:.6}, best_val_acc: \033[31m{:.6}\033[0m'.format(valid_acc, best_val_acc))
		
		state = {
			'epoch': epoch,
			'model_state': model.state_dict(),
			'optimizer': optimizer.state_dict(),
			'best_val_acc': best_val_acc
		}
		torch.save(state, os.path.join(args.save, 'weights.pt.tar'))
Beispiel #14
0
def main():
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %s' % args.gpus)
    logging.info("args = %s", args)
    num_gpus = torch.cuda.device_count()
    genotype = eval("genotypes.%s" % args.arch)
    print('---------Genotype---------')
    logging.info(genotype)
    print('--------------------------')
    model = Network(args.init_channels, CIFAR_CLASSES, args.layers,
                    args.auxiliary, genotype)
    if num_gpus > 1:
        model = nn.DataParallel(model)
        model = model.cuda()
    else:
        model = model.cuda()
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    train_transform, valid_transform = utils._data_transforms_cifar10(args)
    if args.set == 'cifar100':
        train_data = dset.CIFAR100(root=args.data,
                                   train=True,
                                   download=True,
                                   transform=train_transform)
        valid_data = dset.CIFAR100(root=args.data,
                                   train=False,
                                   download=True,
                                   transform=valid_transform)
    else:
        train_data = dset.CIFAR10(root=args.data,
                                  train=True,
                                  download=True,
                                  transform=train_transform)
        valid_data = dset.CIFAR10(root=args.data,
                                  train=False,
                                  download=True,
                                  transform=valid_transform)
    #train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)
    #valid_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform)

    train_queue = torch.utils.data.DataLoader(train_data,
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              pin_memory=True,
                                              num_workers=args.workers)

    valid_queue = torch.utils.data.DataLoader(valid_data,
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              pin_memory=True,
                                              num_workers=args.workers)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs))
    best_acc = 0.0
    for epoch in range(args.epochs):
        scheduler.step()
        logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0])
        if num_gpus > 1:
            model.module.drop_path_prob = args.drop_path_prob * epoch / args.epochs
        else:
            model.drop_path_prob = args.drop_path_prob * epoch / args.epochs

        train_acc, train_obj = train(train_queue, model, criterion, optimizer)
        logging.info('train_acc %f', train_acc)

        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        if valid_acc > best_acc:
            best_acc = valid_acc
        logging.info('valid_acc %f, best_acc %f', valid_acc, best_acc)

        utils.save(model, os.path.join(args.save, 'weights.pt'))
Beispiel #15
0
def main():


    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    cudnn.enabled = True
    torch.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)


    if not args.arch:
        geno_s = pathlib.Path(geno_path).read_text()
    else:
        geno_s = "genotypes.%s" % args.arch
    genotype = eval(geno_s)
    model = Network(args.init_ch, 10, args.layers, args.auxiliary, genotype).cuda()

    logging.info(f"seed = {args.seed}")
    logging.info(f"geno_s = {geno_s}")
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = torch.optim.SGD(
        model.parameters(),
        args.lr,
        momentum=args.momentum,
        weight_decay=args.wd
    )

    train_transform, valid_transform = utils._data_transforms_cifar10(args)
    train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)
    valid_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform)

    train_queue = torch.utils.data.DataLoader(
        train_data, batch_size=args.batchsz, shuffle=True, pin_memory=True, num_workers=0 if 'pydevd' in sys.modules else 2)

    valid_queue = torch.utils.data.DataLoader(
        valid_data, batch_size=args.batchsz, shuffle=False, pin_memory=True, num_workers=0 if 'pydevd' in sys.modules else 2)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs))

    lines = [f'epoch\ttrain_acc\tval_acc']
    for epoch in range(args.epochs):
        scheduler.step()
        logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0])
        model.drop_path_prob = args.drop_path_prob * epoch / args.epochs

        train_acc, train_obj = train(train_queue, model, criterion, optimizer)
        logging.info('train_acc: %f', train_acc)

        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        logging.info('valid_acc: %f', valid_acc)

        lines.append(f'{epoch}\t{train_acc}\t{valid_acc}')

    timebudget.report()
    utils.save(model, os.path.join(args.save, 'trained.pt'))
    print('saved to: trained.pt')
    pathlib.Path(os.path.join(args.exp_path, 'eval.tsv')).write_text('\n'.join(lines))
def main():
    if not torch.cuda.is_available():
        logging.info('No GPU device available')
        sys.exit(1)
    np.random.seed(args.seed)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info("args = %s", args)
    logging.info("unparsed args = %s", unparsed)
    num_gpus = torch.cuda.device_count()

    genotype = eval("genotypes.%s" % args.arch)
    print('---------Genotype---------')
    logging.info(genotype)
    print('--------------------------')
    model = Network(args.init_channels, CIFAR_CLASSES, args.layers,
                    args.auxiliary, genotype)
    model = torch.nn.DataParallel(model)
    model = model.cuda()
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.cifar100:
        train_transform, valid_transform = utils._data_transforms_cifar100(
            args)
    else:
        train_transform, valid_transform = utils._data_transforms_cifar10(args)
    if args.cifar100:
        train_data = dset.CIFAR100(root=args.tmp_data_dir,
                                   train=True,
                                   download=True,
                                   transform=train_transform)
        valid_data = dset.CIFAR100(root=args.tmp_data_dir,
                                   train=False,
                                   download=True,
                                   transform=valid_transform)
    else:
        train_data = dset.CIFAR10(root=args.tmp_data_dir,
                                  train=True,
                                  download=True,
                                  transform=train_transform)
        valid_data = dset.CIFAR10(root=args.tmp_data_dir,
                                  train=False,
                                  download=True,
                                  transform=valid_transform)

    # train_queue = torch.utils.data.DataLoader(
    #     train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=args.workers)

    valid_queue = torch.utils.data.DataLoader(valid_data,
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              pin_memory=True,
                                              num_workers=args.workers)

    num_train = len(train_data)
    indices = list(range(num_train))
    split = 45000
    # int(np.floor(args.train_portion * num_train))

    train_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=True,
        num_workers=args.workers)

    train_valid_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:]),
        pin_memory=True,
        num_workers=args.workers)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs))
    best_acc = 0.0
    for epoch in range(args.epochs):
        logging.info('Epoch: %d lr %e', epoch, scheduler.get_lr()[0])
        model.module.drop_path_prob = args.drop_path_prob * epoch / args.epochs
        model.drop_path_prob = args.drop_path_prob * epoch / args.epochs
        start_time = time.time()
        train_acc, train_obj = train(train_queue, model, criterion, optimizer)
        scheduler.step()
        logging.info('Train_acc: %f', train_acc)

        valid_acc, valid_obj = infer(train_valid_queue, model, criterion)
        if valid_acc > best_acc:
            best_acc = valid_acc
            test_acc, _ = infer(valid_queue, model, criterion)
            logging.info('Test_acc: %f', test_acc)
        logging.info('Valid_acc: %f', valid_acc)
        logging.info('Best_acc: %f', best_acc)
        end_time = time.time()
        duration = end_time - start_time
        print('Epoch time: %ds.' % duration)
        utils.save(model.module, os.path.join(args.save, 'weights.pt'))
def main():
    wandb.init(
        project="automl-gradient-based-nas",
        name="hw" + str(args.arch),
        config=args,
        entity="automl"
    )
    wandb.config.update(args)  # adds all of the arguments as config variables

    global is_multi_gpu
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    gpus = [int(i) for i in args.gpu.split(',')]
    logging.info('gpus = %s' % gpus)

    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %s' % args.gpu)
    logging.info("args = %s", args)

    genotype = eval("genotypes.%s" % args.arch)
    model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype)
    if len(gpus) > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
        model = nn.DataParallel(model)
        is_multi_gpu = True

    model.cuda()

    weight_params = model.module.parameters() if is_multi_gpu else model.parameters()

    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))
    wandb.run.summary["param_size"] = utils.count_parameters_in_MB(model)

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    optimizer = torch.optim.SGD(
        weight_params,  # model.parameters(),
        args.learning_rate,
        momentum=args.momentum,
        weight_decay=args.weight_decay
    )

    train_transform, valid_transform = utils._data_transforms_cifar10(args)
    train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)
    valid_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform)

    train_queue = torch.utils.data.DataLoader(
        train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2)

    valid_queue = torch.utils.data.DataLoader(
        valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs))

    best_accuracy = 0

    for epoch in range(args.epochs):
        scheduler.step()
        logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0])
        model.drop_path_prob = args.drop_path_prob * epoch / args.epochs

        train_acc, train_obj = train(train_queue, model, criterion, optimizer)
        logging.info('train_acc %f', train_acc)
        wandb.log({"evaluation_train_acc": train_acc, 'epoch': epoch})

        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        logging.info('valid_acc %f', valid_acc)
        wandb.log({"evaluation_valid_acc": valid_acc, 'epoch': epoch})

        if valid_acc > best_accuracy:
            wandb.run.summary["best_valid_accuracy"] = valid_acc
            wandb.run.summary["epoch_of_best_accuracy"] = epoch
            best_accuracy = valid_acc
            utils.save(model, os.path.join(wandb.run.dir, 'weights-best.pt'))

        utils.save(model, os.path.join(wandb.run.dir, 'weights.pt'))
Beispiel #18
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    device = torch.device("cuda:{}".format(args.gpu))
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    cudnn.deterministic = True
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    if args.arch is not None:
        genotype = eval("genotypes.%s" % args.arch)

    if args.dir is not None:
        with open(os.path.join(args.dir, "genotype.pickle"), 'rb') as f:
            genotype = pickle.load(f)
        print("Unpickling genotype.pickle")

    logging.info(genotype)

    model = Network(args.init_channels, CIFAR_CLASSES, args.layers,
                    args.auxiliary, genotype)
    model = model.cuda()

    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    train_transform, valid_transform = utils._data_transforms_cifar100(args)
    train_data = dset.CIFAR100(root=args.data,
                               train=True,
                               download=True,
                               transform=train_transform)
    valid_data = dset.CIFAR100(root=args.data,
                               train=False,
                               download=True,
                               transform=valid_transform)
    logging.info("[INFO] len(train_data): {}, len(valid_data): {}".format(
        len(train_data), len(valid_data)))

    train_queue = torch.utils.data.DataLoader(train_data,
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              pin_memory=True,
                                              num_workers=2)

    valid_queue = torch.utils.data.DataLoader(valid_data,
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              pin_memory=True,
                                              num_workers=2)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs))

    test_error = []

    best_acc = 0.0
    for epoch in range(args.epochs):
        logging.info('[INFO] epoch %d lr %e', epoch + 1, scheduler.get_lr()[0])
        model.drop_path_prob = args.drop_path_prob * epoch / args.epochs

        train_acc, train_obj = train(train_queue, model, criterion, optimizer)
        logging.info('[INFO] train_acc %f', train_acc)
        writer.add_scalar("train_acc", train_acc, epoch + 1)
        writer.add_scalar("train_obj", train_obj, epoch + 1)
        scheduler.step()

        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        if valid_acc > best_acc:
            best_acc = valid_acc
            utils.save(model, os.path.join(args.save, 'best_weights.pt'))

        logging.info('[INFO] valid_acc %f', valid_acc)
        writer.add_scalar("valid_acc", valid_acc, epoch + 1)
        writer.add_scalar("valid_obj", valid_obj, epoch + 1)
        writer.add_scalar("test_error", 100 - valid_acc, epoch + 1)

        utils.save(model, os.path.join(args.save, 'weights.pt'))
        test_error.append(100 - valid_acc)
    logging.info('[INFO] best_acc %f', best_acc)

    with open("{}/test_error.pickle".format(args.save), 'wb') as f:
        pickle.dump(test_error, f)
Beispiel #19
0
def main():

    np.random.seed(args.seed)
    random.seed(args.seed)

    if torch.cuda.is_available():
        device = torch.device('cuda:{}'.format(args.gpu))
        cudnn.benchmark = False
        torch.manual_seed(args.seed)
        cudnn.enabled = True
        cudnn.deterministic = True
        torch.cuda.manual_seed(args.seed)
        logging.info('gpu device = %d' % args.gpu)
    else:
        device = torch.device('cpu')
        logging.info('No gpu device available')
        torch.manual_seed(args.seed)

    logging.info("args = %s", args)
    genotype = eval("genotypes.%s" % args.arch)
    model = Network(args.init_channels, CIFAR_CLASSES, args.layers,
                    args.auxiliary, genotype)
    model = model.to(device)

    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))
    total_params = sum(x.data.nelement() for x in model.parameters())
    logging.info('Model total parameters: {}'.format(total_params))

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    train_transform, valid_transform = utils._data_transforms_cifar10(
        args.cutout, args.cutout_length)
    train_data = dset.CIFAR10(root=args.data,
                              train=True,
                              download=True,
                              transform=train_transform)
    valid_data = dset.CIFAR10(root=args.data,
                              train=False,
                              download=True,
                              transform=valid_transform)

    train_queue = torch.utils.data.DataLoader(train_data,
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              pin_memory=True,
                                              num_workers=0)

    valid_queue = torch.utils.data.DataLoader(valid_data,
                                              batch_size=args.batch_size // 2,
                                              shuffle=False,
                                              pin_memory=True,
                                              num_workers=0)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs))

    for epoch in range(args.epochs):

        logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0])
        model.drop_path_prob = args.drop_path_prob * epoch / args.epochs

        train_acc, train_obj = train(train_queue, model, criterion, optimizer,
                                     args.gpu)
        logging.info('train_acc %f', train_acc)

        valid_acc, valid_obj = infer(valid_queue, model, criterion, args.gpu)
        logging.info('valid_acc %f', valid_acc)

        scheduler.step()

        utils.save(model, os.path.join(args.save, 'weights.pt'))
Beispiel #20
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %s' % args.gpu)
    logging.info("args = %s", args)

    genotype = eval(args.arch)
    print(genotype)
    model = Network(args.init_channels, CIFAR_CLASSES, args.layers,
                    args.auxiliary, genotype)
    model = model.cuda()
    total_params = sum(p.numel() for p in model.parameters()
                       if p.requires_grad)
    print(total_params)
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    train_transform, valid_transform = utils._data_transforms_cifar10()
    train_data = dset.CIFAR10(root=args.data,
                              train=True,
                              download=True,
                              transform=train_transform)
    valid_data = dset.CIFAR10(root=args.data,
                              train=False,
                              download=True,
                              transform=valid_transform)

    train_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        shuffle=True,
        pin_memory=True,
        num_workers=32,
    )

    valid_queue = torch.utils.data.DataLoader(valid_data,
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              pin_memory=True,
                                              num_workers=32)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs))
    max_acc = 0
    for epoch in range(args.epochs):
        model.drop_path_prob = args.drop_path_prob
        # model.drop_path_prob = args.drop_path_prob * epoch / args.epochs
        logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0])
        train_acc, train_obj = train(train_queue, model, criterion, optimizer)
        logging.info('train_acc %f', train_acc)
        scheduler.step()
        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        logging.info('valid_acc %f', valid_acc)
        logging.info('max acc:{}'.format(max_acc))
        if valid_acc > max_acc:
            logging.info('Update Max Acc')
            max_acc = valid_acc
            utils.save(model, os.path.join(local_save, 'weights.pt'))
            dump_results((genotype, valid_acc / 100))
Beispiel #21
0
def main(args):
    place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \
        if args.use_data_parallel else fluid.CUDAPlace(0)

    with fluid.dygraph.guard(place):
        genotype = eval("genotypes.%s" % args.arch)
        model = Network(
            C=args.init_channels,
            num_classes=args.class_num,
            layers=args.layers,
            auxiliary=args.auxiliary,
            genotype=genotype)

        logger.info("param size = {:.6f}MB".format(
            count_parameters_in_MB(model.parameters())))

        device_num = fluid.dygraph.parallel.Env().nranks
        step_per_epoch = int(args.trainset_num /
                             (args.batch_size * device_num))
        learning_rate = fluid.dygraph.CosineDecay(args.learning_rate,
                                                  step_per_epoch, args.epochs)
        clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=args.grad_clip)
        optimizer = fluid.optimizer.MomentumOptimizer(
            learning_rate,
            momentum=args.momentum,
            regularization=fluid.regularizer.L2Decay(args.weight_decay),
            parameter_list=model.parameters(),
            grad_clip=clip)

        if args.use_data_parallel:
            strategy = fluid.dygraph.parallel.prepare_context()
            model = fluid.dygraph.parallel.DataParallel(model, strategy)

        train_loader = fluid.io.DataLoader.from_generator(
            capacity=64,
            use_double_buffer=True,
            iterable=True,
            return_list=True,
            use_multiprocess=args.use_multiprocess)
        valid_loader = fluid.io.DataLoader.from_generator(
            capacity=64,
            use_double_buffer=True,
            iterable=True,
            return_list=True,
            use_multiprocess=args.use_multiprocess)

        train_reader = reader.train_valid(
            batch_size=args.batch_size,
            is_train=True,
            is_shuffle=True,
            args=args)
        valid_reader = reader.train_valid(
            batch_size=args.batch_size,
            is_train=False,
            is_shuffle=False,
            args=args)
        if args.use_data_parallel:
            train_reader = fluid.contrib.reader.distributed_batch_reader(
                train_reader)

        train_loader.set_batch_generator(train_reader, places=place)
        valid_loader.set_batch_generator(valid_reader, places=place)

        save_parameters = (not args.use_data_parallel) or (
            args.use_data_parallel and
            fluid.dygraph.parallel.Env().local_rank == 0)
        best_acc = 0
        for epoch in range(args.epochs):
            drop_path_prob = args.drop_path_prob * epoch / args.epochs
            logger.info('Epoch {}, lr {:.6f}'.format(
                epoch, optimizer.current_step_lr()))
            train_top1 = train(model, train_loader, optimizer, epoch,
                               drop_path_prob, args)
            logger.info("Epoch {}, train_acc {:.6f}".format(epoch, train_top1))
            valid_top1 = valid(model, valid_loader, epoch, args)
            if valid_top1 > best_acc:
                best_acc = valid_top1
                if save_parameters:
                    fluid.save_dygraph(model.state_dict(),
                                       args.model_save_dir + "/best_model")
            logger.info("Epoch {}, valid_acc {:.6f}, best_valid_acc {:.6f}".
                        format(epoch, valid_top1, best_acc))
Beispiel #22
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    genotype = eval("genotypes.%s" % args.arch)
    model = Network(args.init_channels, NTU_CLASSES, args.layers,
                    args.auxiliary, genotype)
    model = model.cuda()

    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    trainset = dataset_ntuxview.MyDataset(
        '/media/lab540/79eff75a-f78c-42f2-8902-9358e88bf654/lab540/Neura_auto_search/datasets/ntu112/ntu_cv/train.txt',
        transform=transform.ToTensor())
    train_queue = torch.utils.data.DataLoader(trainset,
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              num_workers=2)

    validset = dataset_ntuxview.MyDataset(
        '/media/lab540/79eff75a-f78c-42f2-8902-9358e88bf654/lab540/Neura_auto_search/datasets/ntu112/ntu_cv/test.txt',
        transform=transform.ToTensor())
    valid_queue = torch.utils.data.DataLoader(validset,
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              num_workers=2)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs))
    best_acc = 0.0
    for epoch in range(args.epochs):
        scheduler.step()
        logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0])
        model.drop_path_prob = args.drop_path_prob * epoch / args.epochs

        train_acc, train_obj = train(train_queue, model, criterion, optimizer)
        logging.info('train_acc %f', train_acc)

        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        if valid_acc > best_acc:
            best_acc = valid_acc
        logging.info('valid_acc %f, best_acc %f', valid_acc, best_acc)
        end_time = time.time()
        duration = end_time - start_time
        print('Epoch time: %ds.' % duration)
        utils.save(model, os.path.join(args.save, 'weights.pt'))
Beispiel #23
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    random.seed(args.seed)
    np.random.seed(
        args.data_seed)  # cutout and load_corrupted_data use np.random
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = False
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    cudnn.deterministic = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    if args.arch == 'resnet':
        model = ResNet18(CIFAR_CLASSES).cuda()
        args.auxiliary = False
    elif args.arch == 'resnet50':
        model = ResNet50(CIFAR_CLASSES).cuda()
        args.auxiliary = False
    elif args.arch == 'resnet34':
        model = ResNet34(CIFAR_CLASSES).cuda()
        args.auxiliary = False
    else:
        genotype = eval("genotypes.%s" % args.arch)
        model = Network(args.init_channels, CIFAR_CLASSES, args.layers,
                        args.auxiliary, genotype)
        model = model.cuda()

    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    train_transform, test_transform = utils._data_transforms_cifar10(args)

    # Load dataset
    if args.dataset == 'cifar10':
        noisy_train_data = CIFAR10(root=args.data,
                                   train=True,
                                   gold=False,
                                   gold_fraction=0.0,
                                   corruption_prob=args.corruption_prob,
                                   corruption_type=args.corruption_type,
                                   transform=train_transform,
                                   download=True,
                                   seed=args.data_seed)
        gold_train_data = CIFAR10(root=args.data,
                                  train=True,
                                  gold=True,
                                  gold_fraction=1.0,
                                  corruption_prob=args.corruption_prob,
                                  corruption_type=args.corruption_type,
                                  transform=train_transform,
                                  download=True,
                                  seed=args.data_seed)
        test_data = dset.CIFAR10(root=args.data,
                                 train=False,
                                 download=True,
                                 transform=test_transform)
    elif args.dataset == 'cifar100':
        noisy_train_data = CIFAR100(root=args.data,
                                    train=True,
                                    gold=False,
                                    gold_fraction=0.0,
                                    corruption_prob=args.corruption_prob,
                                    corruption_type=args.corruption_type,
                                    transform=train_transform,
                                    download=True,
                                    seed=args.data_seed)
        gold_train_data = CIFAR100(root=args.data,
                                   train=True,
                                   gold=True,
                                   gold_fraction=1.0,
                                   corruption_prob=args.corruption_prob,
                                   corruption_type=args.corruption_type,
                                   transform=train_transform,
                                   download=True,
                                   seed=args.data_seed)
        test_data = dset.CIFAR100(root=args.data,
                                  train=False,
                                  download=True,
                                  transform=test_transform)

    num_train = len(gold_train_data)
    indices = list(range(num_train))
    split = int(np.floor(args.train_portion * num_train))

    if args.gold_fraction == 1.0:
        train_data = gold_train_data
    else:
        train_data = noisy_train_data
    train_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=True,
        num_workers=0)

    if args.clean_valid:
        valid_data = gold_train_data
    else:
        valid_data = noisy_train_data

    valid_queue = torch.utils.data.DataLoader(
        valid_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:]),
        pin_memory=True,
        num_workers=0)

    test_queue = torch.utils.data.DataLoader(test_data,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             pin_memory=True,
                                             num_workers=2)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs))

    if args.loss_func == 'cce':
        criterion = nn.CrossEntropyLoss().cuda()
    elif args.loss_func == 'rll':
        criterion = utils.RobustLogLoss(alpha=args.alpha).cuda()
    elif args.loss_func == 'forward_gold':
        corruption_matrix = train_data.corruption_matrix
        criterion = utils.ForwardGoldLoss(corruption_matrix=corruption_matrix)
    else:
        assert False, "Invalid loss function '{}' given. Must be in {'cce', 'rll'}".format(
            args.loss_func)

    for epoch in range(args.epochs):
        scheduler.step()
        logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0])
        model.drop_path_prob = args.drop_path_prob * epoch / args.epochs

        train_acc, train_obj = train(train_queue, model, criterion, optimizer)
        logging.info('train_acc %f', train_acc)

        valid_acc, valid_obj = infer_valid(valid_queue, model, criterion)
        logging.info('valid_acc %f', valid_acc)

        test_acc, test_obj = infer(test_queue, model, criterion)
        logging.info('test_acc %f', test_acc)

        utils.save(model, os.path.join(args.save, 'weights.pt'))
Beispiel #24
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)
    cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    random.seed(args.seed)

    if args.arch == 'resnet':
        model = ResNet18(CIFAR_CLASSES).cuda()
        args.auxiliary = False
    elif args.arch == 'resnet50':
        model = ResNet50(CIFAR_CLASSES).cuda()
        args.auxiliary = False
    elif args.arch == 'resnet34':
        model = ResNet34(CIFAR_CLASSES).cuda()
        args.auxiliary = False
    else:
        genotype = eval("genotypes.%s" % args.arch)
        model = Network(args.init_channels, CIFAR_CLASSES, args.layers,
                        args.auxiliary, genotype)
        model = model.cuda()

    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    train_transform, test_transform = utils._data_transforms_cifar10(args)

    train_data = CIFAR10(root=args.data,
                         train=True,
                         gold=False,
                         gold_fraction=0.0,
                         corruption_prob=args.corruption_prob,
                         corruption_type=args.corruption_type,
                         transform=train_transform,
                         download=True,
                         seed=args.seed)
    gold_train_data = CIFAR10(root=args.data,
                              train=True,
                              gold=True,
                              gold_fraction=1.0,
                              corruption_prob=args.corruption_prob,
                              corruption_type=args.corruption_type,
                              transform=train_transform,
                              download=True,
                              seed=args.seed)

    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(args.train_portion * num_train))

    clean_train_queue = torch.utils.data.DataLoader(
        gold_train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=True,
        num_workers=2)

    noisy_train_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=True,
        num_workers=2)

    # clean_train_list = []
    # for input, target in clean_train_queue:
    #   input = Variable(input).cuda()
    #   target = Variable(target).cuda(async=True)
    #   clean_train_list.append((input, target))
    #
    # noisy_train_list = []
    # for input, target in noisy_train_queue:
    #   input = Variable(input).cuda()
    #   target = Variable(target).cuda(async=True)
    #   noisy_train_list.append((input, target))

    clean_valid_queue = torch.utils.data.DataLoader(
        gold_train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:]),
        pin_memory=True,
        num_workers=2)

    noisy_valid_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:]),
        pin_memory=True,
        num_workers=2)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs))

    if args.loss_func == 'cce':
        criterion = nn.CrossEntropyLoss().cuda()
    elif args.loss_func == 'rll':
        criterion = utils.RobustLogLoss(alpha=args.alpha).cuda()
    elif args.loss_func == 'forward_gold':
        corruption_matrix = train_data.corruption_matrix
        criterion = utils.ForwardGoldLoss(corruption_matrix=corruption_matrix)
    else:
        assert False, "Invalid loss function '{}' given. Must be in {'cce', 'rll'}".format(
            args.loss_func)

    global gain
    for epoch in range(args.epochs):
        if args.random_weight:
            logging.info('Epoch %d, Randomly assign weights', epoch)
            model.drop_path_prob = args.drop_path_prob * epoch / args.epochs
            clean_obj, noisy_obj = infer_random_weight(clean_train_queue,
                                                       noisy_train_queue,
                                                       model, criterion)
            logging.info('clean loss %f, noisy loss %f', clean_obj, noisy_obj)
            gain = np.random.randint(1, args.grad_clip, size=1)[0]
        else:
            scheduler.step()
            logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0])
            model.drop_path_prob = args.drop_path_prob * epoch / args.epochs

            train_acc, train_obj, another_obj = train(clean_train_queue,
                                                      noisy_train_queue, model,
                                                      criterion, optimizer)
            if args.clean_train:
                logging.info('train_acc %f, clean_loss %f, noisy_loss %f',
                             train_acc, train_obj, another_obj)
            else:
                logging.info('train_acc %f, clean_loss %f, noisy_loss %f',
                             train_acc, another_obj, train_obj)

            utils.save(model, os.path.join(args.save, 'weights.pt'))

        clean_valid_acc, clean_valid_obj = infer_valid(clean_valid_queue,
                                                       model, criterion)
        logging.info('clean_valid_acc %f, clean_valid_loss %f',
                     clean_valid_acc, clean_valid_obj)

        noisy_valid_acc, noisy_valid_obj = infer_valid(noisy_valid_queue,
                                                       model, criterion)
        logging.info('noisy_valid_acc %f, noisy_valid_loss %f',
                     noisy_valid_acc, noisy_valid_obj)
Beispiel #25
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    genotype = eval("genotypes.%s" % args.arch)
    model = Network(args.init_channels, CIFAR_CLASSES, args.layers,
                    args.auxiliary, genotype)
    model = model.cuda()

    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    """
    train_transform, valid_transform = utils._data_transforms_cifar10(args)
    train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)
    valid_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform)
  
    train_queue = torch.utils.data.DataLoader(
        train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2)
  
    valid_queue = torch.utils.data.DataLoader(
        valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2)
  """

    data_dir = '../data/kmnist/'
    data_augmentations = transforms.ToTensor()

    # Load the Data here
    train_dataset = K49(data_dir, True, data_augmentations)
    #test_dataset = K49(data_dir, False, data_augmentations)

    num_train = len(train_dataset)
    indices = list(range(num_train))
    split = int(np.floor(args.train_portion * num_train))

    train_queue = torch.utils.data.DataLoader(
        dataset=train_dataset,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=True,
        num_workers=2)

    valid_queue = torch.utils.data.DataLoader(
        dataset=train_dataset,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(
            indices[split:num_train]),
        pin_memory=True,
        num_workers=2)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs))

    for epoch in range(args.epochs):
        scheduler.step()
        logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0])
        model.drop_path_prob = args.drop_path_prob * epoch / args.epochs

        train_acc, train_obj = train(train_queue, model, criterion, optimizer)
        logging.info('train_acc %f', train_acc)

        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        logging.info('valid_acc %f', valid_acc)

        utils.save(model, os.path.join(args.save, 'weights.pt'))
Beispiel #26
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled=True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    genotype = eval("genotypes.%s" % args.arch)
    model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype)
    model = model.to('cuda')

    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.to('cuda')

    # not apply weight decay to BN layers
    if args.bn_no_decay:
        logging.info('BN layers are excluded from weight decay')
        bn_params, other_params = utils.split_bn_params(model)
        logging.debug('bn: %s', [p.dtype for p in bn_params])
        logging.debug('other: %s', [p.dtype for p in other_params])
        param_group = [{'params': bn_params, 'weight_decay': 0},
                       {'params': other_params}]
    else:
        param_group = model.parameters()

    optimizer = torch.optim.SGD(
        param_group,
        args.learning_rate,
        momentum=args.momentum,
        weight_decay=args.weight_decay
    )
    logging.info('optimizer: %s', optimizer)

    train_transform, valid_transform = utils.data_transforms_cifar10(args)
    train_data = datasets.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)
    valid_data = datasets.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform)

    train_queue = torch.utils.data.DataLoader(
        train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=args.num_workers)

    valid_queue = torch.utils.data.DataLoader(
        valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=args.num_workers)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs))

    init_epoch = 0
    best_acc = 0

    if args.recover:
        states = torch.load(args.recover)
        model.load_state_dict(states['stete'])
        init_epoch = states['epoch'] + 1
        best_acc = states['best_acc']
        logging.info('checkpoint loaded')
        scheduler.step(init_epoch)
        logging.info('scheduler is set to epoch %d. learning rate is %s', init_epoch, scheduler.get_lr())

    for epoch in range(init_epoch, args.epochs):
        logging.info('epoch %d lr %s', epoch, scheduler.get_lr())
        model.drop_path_prob = args.drop_path_prob * epoch / args.epochs

        train_acc, train_obj = train(train_queue, model, criterion, optimizer)
        logging.info('train_acc %.4f', train_acc)

        with torch.no_grad():
            valid_acc, valid_obj = infer(valid_queue, model, criterion)
            logging.info('valid_acc %f', valid_acc)

        logging.info('epoch %03d overall train_acc=%.4f valid_acc=%.4f', epoch, train_acc, valid_acc)

        scheduler.step()

        # gpu info
        utils.gpu_usage(args.debug)

        if valid_acc > best_acc:
            best_acc = valid_acc

        logging.info('best acc: %.4f', best_acc)

        utils.save_checkpoint(state={'stete': model.state_dict(),
                                     'epoch': epoch,
                                     'best_acc': best_acc,},
                              is_best=False, save=args.save)
Beispiel #27
0
def main(args):
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    genotype = eval("genotypes.%s" % args.arch)
    model = Network(args.init_channels, CIFAR_CLASSES, args.layers,
                    args.auxiliary, genotype)
    model = model.cuda()

    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    criterion = nn.MSELoss()
    criterion = criterion.cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    train_data = BindingDataset(args.annofile, args.seqfile)

    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(0.7 * num_train))

    train_queue = torch.utils.data.DataLoader(train_data,
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              pin_memory=True,
                                              num_workers=2)

    valid_queue = torch.utils.data.DataLoader(train_data,
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              pin_memory=True,
                                              num_workers=2)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs))

    for epoch in range(args.epochs):
        scheduler.step()
        logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0])
        model.drop_path_prob = args.drop_path_prob * epoch / args.epochs

        train_acc, train_obj = train(train_queue, model, criterion, optimizer)
        logging.info('train_acc %f', train_acc)

        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        logging.info('valid_acc %f', valid_acc)

        utils.save(model, os.path.join(args.save, 'weights.pt'))
Beispiel #28
0
def run(net,
        init_ch=32,
        layers=20,
        auxiliary=True,
        lr=0.025,
        momentum=0.9,
        wd=3e-4,
        cutout=True,
        cutout_length=16,
        data='../data',
        batch_size=96,
        epochs=600,
        drop_path_prob=0.2,
        auxiliary_weight=0.4):
    save = '/checkpoint/linnanwang/nasnet/' + hashlib.md5(
        json.dumps(net).encode()).hexdigest()
    utils.create_exp_dir(save, scripts_to_save=glob.glob('*.py'))

    log_format = '%(asctime)s %(message)s'
    logging.basicConfig(stream=sys.stdout,
                        level=logging.INFO,
                        format=log_format,
                        datefmt='%m/%d %I:%M:%S %p')
    fh = logging.FileHandler(os.path.join(save, 'log.txt'))
    fh.setFormatter(logging.Formatter(log_format))
    logging.getLogger().addHandler(fh)

    np.random.seed(0)
    torch.cuda.set_device(0)
    cudnn.benchmark = True
    cudnn.enabled = True
    torch.manual_seed(0)
    logging.info('gpu device = %d' % 0)
    # logging.info("args = %s", args)

    genotype = net
    model = Network(init_ch, 10, layers, auxiliary, genotype).cuda()

    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                lr,
                                momentum=momentum,
                                weight_decay=wd)
    model, optimizer = apex.amp.initialize(model, optimizer, opt_level="O3")

    train_transform, valid_transform = utils._data_transforms_cifar10(
        cutout, cutout_length)
    train_data = dset.CIFAR10(root=data,
                              train=True,
                              download=True,
                              transform=train_transform)
    valid_data = dset.CIFAR10(root=data,
                              train=False,
                              download=True,
                              transform=valid_transform)

    train_queue = torch.utils.data.DataLoader(train_data,
                                              batch_size=batch_size,
                                              shuffle=True,
                                              pin_memory=True,
                                              num_workers=2)

    valid_queue = torch.utils.data.DataLoader(valid_data,
                                              batch_size=batch_size,
                                              shuffle=False,
                                              pin_memory=True,
                                              num_workers=2)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(epochs))

    best_acc = 0.0

    for epoch in range(epochs):
        scheduler.step()
        logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0])
        model.drop_path_prob = drop_path_prob * epoch / epochs

        train_acc, train_obj = train(train_queue,
                                     model,
                                     criterion,
                                     optimizer,
                                     auxiliary=auxiliary,
                                     auxiliary_weight=auxiliary_weight)
        logging.info('train_acc: %f', train_acc)

        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        logging.info('valid_acc: %f', valid_acc)

        if valid_acc > best_acc and epoch >= 50:
            print('this model is the best')
            torch.save(model.state_dict(), os.path.join(save, 'model.pt'))
        if valid_acc > best_acc:
            best_acc = valid_acc
        print('current best acc is', best_acc)

        if epoch == 100:
            break

        # utils.save(model, os.path.join(args.save, 'trained.pt'))
        print('saved to: model.pt')

    return best_acc
Beispiel #29
0
def main():
  #判断是否有GPU可用
  if not torch.cuda.is_available():
    logging.info('no gpu device available')
    sys.exit(1)

  np.random.seed(args.seed)
  torch.cuda.set_device(args.gpu) #设置当前设备
  cudnn.benchmark = True #加速计算
  torch.manual_seed(args.seed) #为cpu设置随机数种子
  cudnn.enabled=True #cuDNN是一个GPU加速深层神经网络原语库,开启cudnn
  torch.cuda.manual_seed(args.seed)#为当前GPU设置随机种子
  #打印日志信息
  logging.info('gpu device = %d' % args.gpu)
  #gpu device = 0
  logging.info("args = %s", args)
  '''
  args = Namespace(arch='DARTS', auxiliary=False, auxiliary_weight=0.4, batch_size=96, 
  cutout=False, cutout_length=16, data='../data', drop_path_prob=0.2, epochs=600, gpu=0,
  grad_clip=5, init_channels=36, layers=20, learning_rate=0.025, model_path='saved_models',
  momentum=0.9, report_freq=50, save='eval-EXP-20190618-170816', seed=0, weight_decay=0.0003)
  '''

  genotype = eval("genotypes.%s" % args.arch) #应该是输出一个框架类型。eval() 函数用来执行一个字符串表达式,并返回表达式的值
  
  #from model import NetworkCIFAR as Network #文件模块
  model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype)
  #model = Network(通道个数=36, CIFAR_CLASSES=10, 总体layers=20, args.auxiliary使用辅助塔, genotype=框架类型)
  model = model.cuda()

  #打印模型参数的大小,即所占空间
  logging.info("param size = %fMB", utils.count_parameters_in_MB(model))
  #param size = 3.349342MB
    
  criterion = nn.CrossEntropyLoss() #定义损失函数
  criterion = criterion.cuda()
  
  #定义优化器
  optimizer = torch.optim.SGD(
      model.parameters(),
      args.learning_rate,
      momentum=args.momentum,
      weight_decay=args.weight_decay
      )
  #获得预处理之后的训练集和验证集
  train_transform, valid_transform = utils._data_transforms_cifar10(args)
  #获取数据集
  train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)
  valid_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform)
  '''
  Files already downloaded and verified
  Files already downloaded and verified
  '''  
  
  
  #对数据进行封装为Tensor,主要用来读取数据集
  '''
  pin_memory:If True, the data loader will copy tensors into CUDA pinned memory before returning them,在数据返回前,是否将数据复制到CUDA内存中
  num_workers:加快数据导入速度,工作者数量,默认是0。使用多少个子进程来导入数据。设置为0,就是使用主进程来导入数据。注意:这个数字必须是大于等于0的,不能太大,2的时候报错
  '''
  train_queue = torch.utils.data.DataLoader(
      train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=1)

  valid_queue = torch.utils.data.DataLoader(
      valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=1)

  #优化器的学习率调整策略:采用CosineAnnealingLR,余弦退火调整学习率
  scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs))
  
  #默认epochs=600
  for epoch in range(args.epochs):
        
    scheduler.step() #更新权重
    logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0])
    #epoch 0 lr 2.500000e-02
    
    #进行dropout:大小与模型的深度相关,模型深度越深,dropout的概率越大,最大0.2
    model.drop_path_prob = args.drop_path_prob * epoch / args.epochs
    #调用下面定义的函数train()
    train_acc, train_obj = train(train_queue, model, criterion, optimizer)
    '''
    train_queue:要训练的队列
    model:采用的model;
    criterion:定义的损失函数
    optimizer:所采用的优化器
    '''
    logging.info('train_acc %f', train_acc) #打印当前epoch在训练集上的精度
    
    #计算在验证集上的精度
    valid_acc, valid_obj = infer(valid_queue, model, criterion)
    logging.info('valid_acc %f', valid_acc)
    #保存模型参数
    utils.save(model, os.path.join(args.save, 'weights.pt'))
Beispiel #30
0
def main():
    if not torch.cuda.is_available():
        logging.info('No GPU device available')
        sys.exit(1)
    np.random.seed(args.seed)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info("args = %s", args)
    logging.info("unparsed args = %s", unparsed)
    num_gpus = torch.cuda.device_count()

    genotype = eval("genotypes.%s" % args.arch)

    print('---------Genotype---------')
    logging.info(genotype)
    print('--------------------------')
    model = Network(args.init_channels, CIFAR_CLASSES, args.layers,
                    args.auxiliary, genotype)
    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    model = torch.nn.DataParallel(model)
    model = model.cuda()
    start_epch = 0
    if args.resume:
        MT = torch.load(os.path.join(args.save, 'weight_optimizers.pt'))
        model.load_state_dict(MT['net'])
        optimizer.load_state_dict(MT['optimizer'])
        start_epch = MT['epoch']

    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()

    if args.cifar100:
        train_transform, valid_transform = utils._data_transforms_cifar100(
            args)
    else:
        train_transform, valid_transform = utils._data_transforms_cifar10(args)
    if args.cifar100:
        train_data = dset.CIFAR100(root=args.data_dir,
                                   train=True,
                                   download=True,
                                   transform=train_transform)
        valid_data = dset.CIFAR100(root=args.data_dir,
                                   train=False,
                                   download=True,
                                   transform=valid_transform)
    else:
        train_data = dset.CIFAR10(root=args.data_dir,
                                  train=True,
                                  download=True,
                                  transform=train_transform)
        valid_data = dset.CIFAR10(root=args.data_dir,
                                  train=False,
                                  download=True,
                                  transform=valid_transform)

    train_queue = torch.utils.data.DataLoader(train_data,
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              pin_memory=True,
                                              num_workers=args.workers)

    valid_queue = torch.utils.data.DataLoader(valid_data,
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              pin_memory=True,
                                              num_workers=args.workers)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs))
    best_acc = 0.0
    for epoch in range(start_epch, args.epochs):

        model.module.drop_path_prob = args.drop_path_prob * epoch / args.epochs
        model.drop_path_prob = args.drop_path_prob * epoch / args.epochs
        start_time = time.time()
        train_acc, train_obj = train(train_queue, model, criterion, optimizer)
        logging.info('Train_acc: %f', train_acc)

        scheduler.step()
        logging.info('Epoch: %d lr %e', epoch, scheduler.get_lr()[0])

        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        if valid_acc > best_acc:
            best_acc = valid_acc
            state = {
                'net': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'epoch': epoch
            }
            torch.save(state,
                       os.path.join(args.save, 'best_weight_optimizers.pt'))
        logging.info('Valid_acc: %f, best_acc: %f', valid_acc, best_acc)
        end_time = time.time()
        duration = end_time - start_time
        print('Epoch time: %d h.' % (duration * (args.epochs - epoch) / 3600))
        if epoch % 50 == 0:
            state = {
                'net': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'epoch': epoch
            }
            torch.save(state, os.path.join(args.save, 'weight_optimizers.pt'))