Beispiel #1
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    criterion = nn.MSELoss()
    criterion = criterion.cuda()
    model = Network(args.init_channels, 1, args.layers, criterion, input_channels=4)
    model = model.cuda()
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    optimizer = torch.optim.SGD(
        model.parameters(),
        args.learning_rate,
        momentum=args.momentum,
        weight_decay=args.weight_decay)

    # dataset = utils.BathymetryDataset(args, "guyane/guyane.csv")
    # dataset.add(args, "saint_louis/saint_louis.csv")

    dataset = utils.BathymetryDataset(args, "../mixed_train.csv", to_filter=False)
    dataset.add(args, "../mixed_validation.csv", to_balance=False)

    trains, vals = dataset.get_subset_indices(args.train_portion)

    train_queue = torch.utils.data.DataLoader(
        dataset, batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(trains),
        pin_memory=True, num_workers=2)

    valid_queue = torch.utils.data.DataLoader(
        dataset, batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(vals),
        pin_memory=True, num_workers=2)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, int(args.epochs), eta_min=args.learning_rate_min)

    architect = Architect(model, args)

    loggers = {"train": {"loss": [], "step": []}, "val": {"loss": [], "step": []}, "infer": {"loss": [], "step": []}}

    for epoch in range(args.epochs):
        scheduler.step()
        lr = scheduler.get_last_lr()[0]

        logging.info('epoch %d lr %e', epoch, lr)

        genotype = model.genotype()
        logging.info('genotype = %s', genotype)

        print(F.softmax(model.alphas_normal, dim=-1))
        print(F.softmax(model.alphas_reduce, dim=-1))

        # training
        _ = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, loggers)

        # validation
        infer_loss = infer(valid_queue, model, criterion)
        utils.log_loss(loggers["infer"], infer_loss, None, model.clock)

        utils.plot_loss_acc(loggers, args.save)

        model.update_history()

        utils.save_file(recoder=model.alphas_normal_history, path=os.path.join(args.save, 'normal'))
        utils.save_file(recoder=model.alphas_reduce_history, path=os.path.join(args.save, 'reduce'))

        utils.save(model, os.path.join(args.save, 'weights.pt'))

    print(F.softmax(model.alphas_normal, dim=-1))
    print(F.softmax(model.alphas_reduce, dim=-1))

    np.save(os.path.join(os.path.join(args.save, 'normal_weight.npy')),
            F.softmax(model.alphas_normal, dim=-1).data.cpu().numpy())
    np.save(os.path.join(os.path.join(args.save, 'reduce_weight.npy')),
            F.softmax(model.alphas_reduce, dim=-1).data.cpu().numpy())

    genotype = model.genotype()
    logging.info('genotype = %s', genotype)

    f = open(os.path.join(args.save, 'genotype.txt'), "w")
    f.write(str(genotype))
    f.close()
Beispiel #2
0
def main():
  if not torch.cuda.is_available():
    logging.info('no gpu device available')
    sys.exit(1)

  np.random.seed(args.seed)
  torch.cuda.set_device(0)
  cudnn.benchmark = True
  torch.manual_seed(args.seed)
  cudnn.enabled=True
  torch.cuda.manual_seed(args.seed)
  logging.info('gpu device = %d' % args.gpu)
  logging.info("args = %s", args)

  criterion = nn.CrossEntropyLoss()

  """Noise Darts"""
  if args.noise_darts:
    SearchControllerConf['noise_darts']['noise_type'] = args.noise_type
    SearchControllerConf['noise_darts']['T_max'] = args.max_step
  else:
    SearchControllerConf['noise_darts'] = None

  """Random Darts"""
  if args.random_search:
    SearchControllerConf['random_search']['num_identity'] = args.num_identity
    SearchControllerConf['random_search']['num_arch'] = args.num_arch
    SearchControllerConf['random_search']['flops_threshold'] = args.flops_threshold
  else:
    SearchControllerConf['random_search'] = None

  """Reweight Darts"""
  SearchControllerConf['reweight'] = args.reweight

  model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion)
  model = model.cuda()
  logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

  if args.random_search:
    genotype_list = model.random_generate()
    logging.info('genotype list = %s', genotype_list)
    logging.info('generate done!')
    sys.exit(0)

  model_optimizer = torch.optim.SGD(
      model.parameters(),
      args.learning_rate,
      momentum=args.momentum,
      weight_decay=args.weight_decay)

  ## single level
  arch_optimizer = torch.optim.Adam(model.arch_parameters(),
        lr=args.arch_learning_rate, betas=(0.9, 0.999), weight_decay=args.arch_weight_decay)

  train_transform, valid_transform = utils._data_transforms_cifar10(args)
  train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)

  num_train = len(train_data)
  indices = list(range(num_train))
  split = int(np.floor(args.train_portion * num_train))

  train_queue = torch.utils.data.DataLoader(
      train_data, batch_size=args.batch_size,
      sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
      pin_memory=True, num_workers=2)

  valid_queue = torch.utils.data.DataLoader(
      train_data, batch_size=args.batch_size,
      sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]),
      pin_memory=True, num_workers=2)

  scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        model_optimizer, float(args.epochs), eta_min=args.learning_rate_min)

  architect = Architect(model, args)
  for epoch in range(args.epochs):
    scheduler.step()
    lr = scheduler.get_lr()[0]
    logging.info('epoch %d lr %e', epoch, lr)

    genotype = model.genotype()

    logging.info('genotype = %s', genotype)

    logging.info(F.softmax(model.alphas_normal, dim=-1))
    logging.info(F.softmax(model.alphas_reduce, dim=-1))
    model.update_history()

    # training and search the model
    train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, model_optimizer, lr, epoch)
    logging.info('train_acc %f', train_acc)

    # validation the model
    valid_acc, valid_obj = infer(valid_queue, model, criterion)
    logging.info('valid_acc %f', valid_acc)

    utils.save(model, os.path.join(args.save, 'weights.pt'))
    utils.save_file(recoder = model.alphas_normal_history, path = os.path.join(args.save, 'normal'))
    utils.save_file(recoder = model.alphas_reduce_history, path = os.path.join(args.save, 'reduce'))
Beispiel #3
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(0)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)
    run_start = time.time()
    start_epoch = 0
    dur_time = 0

    criterion_train = ConvSeparateLoss(
        weight=args.aux_loss_weight
    ) if args.sep_loss == 'l2' else TriSeparateLoss(
        weight=args.aux_loss_weight)
    criterion_val = nn.CrossEntropyLoss()

    model = Network(args.init_channels,
                    CIFAR_CLASSES,
                    args.layers,
                    criterion_train,
                    steps=4,
                    multiplier=4,
                    stem_multiplier=3,
                    parse_method=args.parse_method,
                    op_threshold=args.op_threshold)
    model = model.cuda()
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    model_optimizer = torch.optim.SGD(model.parameters(),
                                      args.learning_rate,
                                      momentum=args.momentum,
                                      weight_decay=args.weight_decay)

    arch_optimizer = torch.optim.Adam(model.arch_parameters(),
                                      lr=args.arch_learning_rate,
                                      betas=(0.9, 0.999),
                                      weight_decay=args.arch_weight_decay)

    train_transform, valid_transform = utils._data_transforms_cifar(args)
    train_data = dset.CIFAR10(root=args.data,
                              train=True,
                              download=True,
                              transform=train_transform)

    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(args.train_portion * num_train))

    train_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=True,
        num_workers=2)

    valid_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(
            indices[split:num_train]),
        pin_memory=True,
        num_workers=2)

    architect = Architect(model, args)

    # resume from checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            logging.info("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            start_epoch = checkpoint['epoch']
            dur_time = checkpoint['dur_time']
            model_optimizer.load_state_dict(checkpoint['model_optimizer'])
            architect.arch_optimizer.load_state_dict(
                checkpoint['arch_optimizer'])
            model.restore(checkpoint['network_states'])
            logging.info('=> loaded checkpoint \'{}\'(epoch {})'.format(
                args.resume, start_epoch))
        else:
            logging.info('=> no checkpoint found at \'{}\''.format(
                args.resume))

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        model_optimizer,
        float(args.epochs),
        eta_min=args.learning_rate_min,
        last_epoch=-1 if start_epoch == 0 else start_epoch)
    if args.resume and os.path.isfile(args.resume):
        scheduler.load_state_dict(checkpoint['scheduler'])

    for epoch in range(start_epoch, args.epochs):
        scheduler.step()
        lr = scheduler.get_lr()[0]
        logging.info('epoch %d lr %e', epoch, lr)

        genotype = model.genotype()
        logging.info('genotype = %s', genotype)

        logging.info(F.sigmoid(model.alphas_normal))
        logging.info(F.sigmoid(model.alphas_reduce))
        model.update_history()

        # training and search the model
        train_acc, train_obj = train(train_queue, valid_queue, model,
                                     architect, criterion_train,
                                     model_optimizer, arch_optimizer)
        logging.info('train_acc %f', train_acc)

        # validation the model
        valid_acc, valid_obj = infer(valid_queue, model, criterion_val)
        logging.info('valid_acc %f', valid_acc)

        # save checkpoint
        utils.save_checkpoint(
            {
                'epoch': epoch + 1,
                'dur_time': dur_time + time.time() - run_start,
                'scheduler': scheduler.state_dict(),
                'model_optimizer': model_optimizer.state_dict(),
                'arch_optimizer': architect.optimizer.state_dict(),
                'network_states': model.states(),
            },
            is_best=False,
            save=args.save)
        logging.info('save checkpoint (epoch %d) in %s  dur_time: %s', epoch,
                     args.save,
                     utils.calc_time(dur_time + time.time() - run_start))

        # save operation weights as fig
        utils.save_file(recoder=model.alphas_normal_history,
                        path=os.path.join(args.save, 'normal'))
        utils.save_file(recoder=model.alphas_reduce_history,
                        path=os.path.join(args.save, 'reduce'))

    # save last operations
    np.save(os.path.join(os.path.join(args.save, 'normal_weight.npy')),
            F.sigmoid(model.alphas_normal).data.cpu().numpy())
    np.save(os.path.join(os.path.join(args.save, 'reduce_weight.npy')),
            F.sigmoid(model.alphas_reduce).data.cpu().numpy())
    logging.info('save last weights done')