예제 #1
0
    def _construct_model_from_theta(self, theta):
        model_clone = Network(self.model._C, self.model._num_classes,
                              self.model._layers,
                              self.model._criterion).cuda()

        for x, y in zip(model_clone.arch_parameters(),
                        self.model.arch_parameters()):
            x.data.copy_(y.data)
        model_dict = self.model.state_dict()

        params, offset = {}, 0
        for k, v in self.model.named_parameters():
            v_length = np.prod(v.size())
            params[k] = theta[offset:offset + v_length].view(v.size())
            offset += v_length

        assert offset == len(theta)
        model_dict.update(params)
        model_clone.load_state_dict(model_dict)
        return model_clone.cuda()
예제 #2
0
def main():
    # check gpu is available
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    # init
    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    # criterion, model, optimizer, for model training
    criterion = nn.CrossEntropyLoss()  # TODO add latency loss
    criterion = criterion.cuda()
    model = Network(channels, steps, strides, CLASSES, criterion)
    model = model.cuda()
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # prepare datasets
    #train_transform, valid_transform = utils._data_transforms_cifar10(args)
    #train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)

    train_transform, valid_transform = utils._data_transforms_imagenet(args)
    train_data = dset.ImageNet(root=args.data,
                               split='train',
                               download=True,
                               transform=train_transform)
    valid_data = dset.ImageNet(root=args.data,
                               split='val',
                               download=True,
                               transform=valid_transform)

    num_train = len(train_data)
    #indices = list(range(num_train))
    #split = int(np.floor(args.train_portion * num_train))

    # create dataloader
    train_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        #sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=True,
        num_workers=2)

    valid_queue = torch.utils.data.DataLoader(
        valid_data,
        batch_size=args.batch_size,
        #sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]),
        pin_memory=True,
        num_workers=2)

    # learning rate scheduler with cosineAnnealingtopk
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs), eta_min=args.learning_rate_min)

    # architect
    architect = Architect(model, args)

    # training
    for epoch in range(args.epochs):
        # lr update
        scheduler.step()
        lr = scheduler.get_lr()[0]
        logging.info('epoch %d lr %e', epoch, lr)

        # get genotype for logging
        genotype = model.genotype()
        logging.info('genotype = %s', genotype)

        for alpha in model.arch_parameters():
            print(F.softmax(alpha, dim=-1).data)

        # training
        train_acc, train_obj = train(train_queue, valid_queue, model,
                                     architect, criterion, optimizer, lr)
        logging.info('train_acc %f', train_acc)

        # validation
        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        logging.info('valid_acc %f', valid_acc)

        utils.save(model, os.path.join(args.save, 'weights.pt'))
예제 #3
0
    def search(self, train_x, train_y, valid_x, valid_y, metadata):

        np.random.seed(self.seed)
        cudnn.benchmark = True
        torch.manual_seed(self.seed)
        cudnn.enabled = True
        torch.cuda.manual_seed(self.seed)
        is_multi_gpu = False

        helper_function()
        n_classes = metadata['n_classes']

        # check torch available
        if not torch.cuda.is_available():
            logging.info('no gpu device available')
            sys.exit(1)

        cudnn.benchmark = True
        cudnn.enabled = True

        # loading criterion
        criterion = nn.CrossEntropyLoss()
        criterion = criterion.cuda()

        train_pack = list(zip(train_x, train_y))
        valid_pack = list(zip(valid_x, valid_y))

        data_channel = np.array(train_x).shape[1]

        train_loader = torch.utils.data.DataLoader(train_pack,
                                                   int(self.batch_size),
                                                   pin_memory=True,
                                                   num_workers=4)
        valid_loader = torch.utils.data.DataLoader(valid_pack,
                                                   int(self.batch_size),
                                                   pin_memory=True,
                                                   num_workers=4)

        model = Network(self.init_channels, data_channel, n_classes,
                        self.layers, criterion)
        model = model.cuda()

        # since submission server does not deal with multi-gpu
        if is_multi_gpu:
            print("Let's use", torch.cuda.device_count(), "GPUs!")
            # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
            model = nn.DataParallel(model)

        arch_parameters = model.module.arch_parameters(
        ) if is_multi_gpu else model.arch_parameters()
        arch_params = list(map(id, arch_parameters))

        parameters = model.module.parameters(
        ) if is_multi_gpu else model.parameters()
        weight_params = filter(lambda p: id(p) not in arch_params, parameters)

        optimizer = torch.optim.SGD(weight_params,
                                    self.learning_rate,
                                    momentum=self.momentum,
                                    weight_decay=self.weight_decay)

        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer, float(self.epochs), eta_min=self.learning_rate_min)

        architect = Architect(is_multi_gpu, model, criterion, self.momentum,
                              self.weight_decay, self.arch_learning_rate,
                              self.arch_weight_decay)

        best_accuracy = 0
        best_accuracy_different_cnn_counts = dict()

        for epoch in range(self.epochs):
            lr = scheduler.get_lr()[0]
            logging.info('epoch %d lr %e', epoch, lr)

            # training
            objs = utils.AvgrageMeter()
            top1 = utils.AvgrageMeter()
            top5 = utils.AvgrageMeter()

            train_batch = time.time()

            for step, (input, target) in enumerate(train_loader):

                # logging.info("epoch %d, step %d START" % (epoch, step))
                model.train()
                n = input.size(0)

                input = input.cuda()
                target = target.cuda()

                # get a random minibatch from the search queue with replacement
                input_search, target_search = next(iter(valid_loader))
                input_search = input_search.cuda()
                target_search = target_search.cuda()

                # Update architecture alpha by Adam-SGD
                # logging.info("step %d. update architecture by Adam. START" % step)
                # if args.optimization == "DARTS":
                #     architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled)
                # else:
                architect.step_milenas_2ndorder(input, target, input_search,
                                                target_search, lr, optimizer,
                                                1, 1)

                # logging.info("step %d. update architecture by Adam. FINISH" % step)
                # Update weights w by SGD, ignore the weights that gained during architecture training

                # logging.info("step %d. update weight by SGD. START" % step)
                optimizer.zero_grad()
                logits = model(input)
                loss = criterion(logits, target)

                loss.backward()
                parameters = model.module.arch_parameters(
                ) if is_multi_gpu else model.arch_parameters()
                nn.utils.clip_grad_norm_(parameters, self.grad_clip)
                optimizer.step()

                # logging.info("step %d. update weight by SGD. FINISH\n" % step)

                prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))
                objs.update(loss.item(), n)
                top1.update(prec1.item(), n)
                top5.update(prec5.item(), n)

                # torch.cuda.empty_cache()

                if step % self.report_freq == 0:
                    average_batch_t = (time.time() - train_batch) / (step + 1)
                    print("Epoch: {}, Step: {}, Top1: {}, Top5: {}, T: {}".
                          format(
                              epoch, step, top1.avg, top5.avg,
                              show_time(average_batch_t *
                                        (len(train_loader) - step))))

            model.eval()

            # validation
            with torch.no_grad():
                objs = utils.AvgrageMeter()
                top1 = utils.AvgrageMeter()
                top5 = utils.AvgrageMeter()

                for step, (input, target) in enumerate(valid_loader):
                    input = input.cuda()
                    target = target.cuda()

                    logits = model(input)
                    loss = criterion(logits, target)

                    prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))
                    n = input.size(0)
                    objs.update(loss.item(), n)
                    top1.update(prec1.item(), n)
                    top5.update(prec5.item(), n)

                    if step % self.report_freq == 0:
                        print("Epoch: {}, Step: {}, Top1: {}, Top5: {}".format(
                            epoch, step, top1.avg, top5.avg))

            scheduler.step()

            # save the structure
            genotype, normal_cnn_count, reduce_cnn_count = model.module.genotype(
            ) if is_multi_gpu else model.genotype()
            print("(n:%d,r:%d)" % (normal_cnn_count, reduce_cnn_count))
            # print(F.softmax(model.module.alphas_normal if is_multi_gpu else model.alphas_normal, dim=-1))
            # print(F.softmax(model.module.alphas_reduce if is_multi_gpu else model.alphas_reduce, dim=-1))
            # logging.info('genotype = %s', genotype)

        return model
예제 #4
0
def main():
  if not torch.cuda.is_available():
    logging.info('no gpu device available')
    sys.exit(1)

  np.random.seed(args.seed)
  torch.cuda.set_device(0)
  cudnn.benchmark = True
  torch.manual_seed(args.seed)
  cudnn.enabled=True
  torch.cuda.manual_seed(args.seed)
  logging.info('gpu device = %d' % args.gpu)
  logging.info("args = %s", args)

  criterion = nn.CrossEntropyLoss()

  """Noise Darts"""
  if args.noise_darts:
    SearchControllerConf['noise_darts']['noise_type'] = args.noise_type
    SearchControllerConf['noise_darts']['T_max'] = args.max_step
  else:
    SearchControllerConf['noise_darts'] = None

  """Random Darts"""
  if args.random_search:
    SearchControllerConf['random_search']['num_identity'] = args.num_identity
    SearchControllerConf['random_search']['num_arch'] = args.num_arch
    SearchControllerConf['random_search']['flops_threshold'] = args.flops_threshold
  else:
    SearchControllerConf['random_search'] = None

  """Reweight Darts"""
  SearchControllerConf['reweight'] = args.reweight

  model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion)
  model = model.cuda()
  logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

  if args.random_search:
    genotype_list = model.random_generate()
    logging.info('genotype list = %s', genotype_list)
    logging.info('generate done!')
    sys.exit(0)

  model_optimizer = torch.optim.SGD(
      model.parameters(),
      args.learning_rate,
      momentum=args.momentum,
      weight_decay=args.weight_decay)

  ## single level
  arch_optimizer = torch.optim.Adam(model.arch_parameters(),
        lr=args.arch_learning_rate, betas=(0.9, 0.999), weight_decay=args.arch_weight_decay)

  train_transform, valid_transform = utils._data_transforms_cifar10(args)
  train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)

  num_train = len(train_data)
  indices = list(range(num_train))
  split = int(np.floor(args.train_portion * num_train))

  train_queue = torch.utils.data.DataLoader(
      train_data, batch_size=args.batch_size,
      sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
      pin_memory=True, num_workers=2)

  valid_queue = torch.utils.data.DataLoader(
      train_data, batch_size=args.batch_size,
      sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]),
      pin_memory=True, num_workers=2)

  scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        model_optimizer, float(args.epochs), eta_min=args.learning_rate_min)

  architect = Architect(model, args)
  for epoch in range(args.epochs):
    scheduler.step()
    lr = scheduler.get_lr()[0]
    logging.info('epoch %d lr %e', epoch, lr)

    genotype = model.genotype()

    logging.info('genotype = %s', genotype)

    logging.info(F.softmax(model.alphas_normal, dim=-1))
    logging.info(F.softmax(model.alphas_reduce, dim=-1))
    model.update_history()

    # training and search the model
    train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, model_optimizer, lr, epoch)
    logging.info('train_acc %f', train_acc)

    # validation the model
    valid_acc, valid_obj = infer(valid_queue, model, criterion)
    logging.info('valid_acc %f', valid_acc)

    utils.save(model, os.path.join(args.save, 'weights.pt'))
    utils.save_file(recoder = model.alphas_normal_history, path = os.path.join(args.save, 'normal'))
    utils.save_file(recoder = model.alphas_reduce_history, path = os.path.join(args.save, 'reduce'))
예제 #5
0
class neural_architecture_search():
    def __init__(self, args):
        self.args = args

        if not torch.cuda.is_available():
            logging.info('no gpu device available')
            sys.exit(1)

        if self.args.distributed:
            # Init distributed environment
            self.rank, self.world_size, self.device = init_dist(
                port=self.args.port)
            self.seed = self.rank * self.args.seed
        else:
            torch.cuda.set_device(self.args.gpu)
            self.device = torch.device("cuda")
            self.rank = 0
            self.seed = self.args.seed
            self.world_size = 1

        if self.args.fix_seedcudnn:
            random.seed(self.seed)
            torch.backends.cudnn.deterministic = True
            np.random.seed(self.seed)
            cudnn.benchmark = False
            torch.manual_seed(self.seed)
            cudnn.enabled = True
            torch.cuda.manual_seed(self.seed)
            torch.cuda.manual_seed_all(self.seed)
        else:
            np.random.seed(self.seed)
            cudnn.benchmark = True
            torch.manual_seed(self.seed)
            cudnn.enabled = True
            torch.cuda.manual_seed(self.seed)
            torch.cuda.manual_seed_all(self.seed)

        self.path = os.path.join(generate_date, self.args.save)
        if self.rank == 0:
            utils.create_exp_dir(generate_date,
                                 self.path,
                                 scripts_to_save=glob.glob('*.py'))
            logging.basicConfig(stream=sys.stdout,
                                level=logging.INFO,
                                format=log_format,
                                datefmt='%m/%d %I:%M:%S %p')
            fh = logging.FileHandler(os.path.join(self.path, 'log.txt'))
            fh.setFormatter(logging.Formatter(log_format))
            logging.getLogger().addHandler(fh)
            logging.info("self.args = %s", self.args)
            self.logger = tensorboardX.SummaryWriter(
                './runs/' + generate_date + '/nas_{}'.format(self.args.remark))
        else:
            self.logger = None

        # set default resource_lambda for different methods
        if self.args.resource_efficient:
            if self.args.method == 'policy_gradient':
                if self.args.log_penalty:
                    default_resource_lambda = 1e-4
                else:
                    default_resource_lambda = 1e-5
            if self.args.method == 'reparametrization':
                if self.args.log_penalty:
                    default_resource_lambda = 1e-2
                else:
                    default_resource_lambda = 1e-5
            if self.args.method == 'discrete':
                if self.args.log_penalty:
                    default_resource_lambda = 1e-2
                else:
                    default_resource_lambda = 1e-4
            if self.args.resource_lambda == default_lambda:
                self.args.resource_lambda = default_resource_lambda

        #initialize loss function
        self.criterion = nn.CrossEntropyLoss().to(self.device)

        #initialize model
        self.init_model()

        #calculate model param size
        if self.rank == 0:
            logging.info("param size = %fMB",
                         utils.count_parameters_in_MB(self.model))
            self.model._logger = self.logger
            self.model._logging = logging

        #initialize optimizer
        self.init_optimizer()

        #iniatilize dataset loader
        self.init_loaddata()

        self.update_theta = True
        self.update_alpha = True

    def init_model(self):

        self.model = Network(self.args.init_channels, CIFAR_CLASSES,
                             self.args.layers, self.criterion, self.args,
                             self.rank, self.world_size)
        self.model.to(self.device)
        if self.args.distributed:
            broadcast_params(self.model)
        for v in self.model.parameters():
            if v.requires_grad:
                if v.grad is None:
                    v.grad = torch.zeros_like(v)
        self.model.normal_log_alpha.grad = torch.zeros_like(
            self.model.normal_log_alpha)
        self.model.reduce_log_alpha.grad = torch.zeros_like(
            self.model.reduce_log_alpha)

    def init_optimizer(self):

        if args.distributed:
            self.optimizer = torch.optim.SGD(
                [
                    param for name, param in self.model.named_parameters() if
                    name != 'normal_log_alpha' and name != 'reduce_log_alpha'
                ],
                self.args.learning_rate,
                momentum=self.args.momentum,
                weight_decay=self.args.weight_decay)
            self.arch_optimizer = torch.optim.Adam(
                [
                    param for name, param in self.model.named_parameters()
                    if name == 'normal_log_alpha' or name == 'reduce_log_alpha'
                ],
                lr=self.args.arch_learning_rate,
                betas=(0.5, 0.999),
                weight_decay=self.args.arch_weight_decay)
        else:
            self.optimizer = torch.optim.SGD(self.model.parameters(),
                                             self.args.learning_rate,
                                             momentum=self.args.momentum,
                                             weight_decay=args.weight_decay)

            self.arch_optimizer = torch.optim.SGD(
                self.model.arch_parameters(), lr=self.args.arch_learning_rate)

    def init_loaddata(self):

        train_transform, valid_transform = utils._data_transforms_cifar10(
            self.args)
        train_data = dset.CIFAR10(root=self.args.data,
                                  train=True,
                                  download=True,
                                  transform=train_transform)
        valid_data = dset.CIFAR10(root=self.args.data,
                                  train=False,
                                  download=True,
                                  transform=valid_transform)

        if self.args.seed:

            def worker_init_fn():
                seed = self.seed
                np.random.seed(seed)
                random.seed(seed)
                torch.manual_seed(seed)
                return
        else:
            worker_init_fn = None

        if self.args.distributed:
            train_sampler = DistributedSampler(train_data)
            valid_sampler = DistributedSampler(valid_data)

            self.train_queue = torch.utils.data.DataLoader(
                train_data,
                batch_size=self.args.batch_size // self.world_size,
                shuffle=False,
                num_workers=0,
                pin_memory=False,
                sampler=train_sampler)
            self.valid_queue = torch.utils.data.DataLoader(
                valid_data,
                batch_size=self.args.batch_size // self.world_size,
                shuffle=False,
                num_workers=0,
                pin_memory=False,
                sampler=valid_sampler)

        else:
            self.train_queue = torch.utils.data.DataLoader(
                train_data,
                batch_size=self.args.batch_size,
                shuffle=True,
                pin_memory=False,
                num_workers=2)

            self.valid_queue = torch.utils.data.DataLoader(
                valid_data,
                batch_size=self.args.batch_size,
                shuffle=False,
                pin_memory=False,
                num_workers=2)

    def main(self):
        # lr scheduler: cosine annealing
        # temp scheduler: linear annealing (self-defined in utils)
        self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            self.optimizer,
            float(self.args.epochs),
            eta_min=self.args.learning_rate_min)

        self.temp_scheduler = utils.Temp_Scheduler(self.args.epochs,
                                                   self.model._temp,
                                                   self.args.temp,
                                                   temp_min=self.args.temp_min)

        for epoch in range(self.args.epochs):
            if self.args.random_sample_pretrain:
                if epoch < self.args.random_sample_pretrain_epoch:
                    self.args.random_sample = True
                else:
                    self.args.random_sample = False

            self.scheduler.step()
            if self.args.temp_annealing:
                self.model._temp = self.temp_scheduler.step()
            self.lr = self.scheduler.get_lr()[0]

            if self.rank == 0:
                logging.info('epoch %d lr %e temp %e', epoch, self.lr,
                             self.model._temp)
                self.logger.add_scalar('epoch_temp', self.model._temp, epoch)
                logging.info(self.model.normal_log_alpha)
                logging.info(self.model.reduce_log_alpha)
                logging.info(
                    self.model._get_weights(self.model.normal_log_alpha[0]))
                logging.info(
                    self.model._get_weights(self.model.reduce_log_alpha[0]))

            genotype_edge_all = self.model.genotype_edge_all()

            if self.rank == 0:
                logging.info('genotype_edge_all = %s', genotype_edge_all)
                # create genotypes.txt file
                txt_name = self.args.remark + '_genotype_edge_all_epoch' + str(
                    epoch)
                utils.txt('genotype', self.args.save, txt_name,
                          str(genotype_edge_all), generate_date)

            self.model.train()
            train_acc, loss, error_loss, loss_alpha = self.train(
                epoch, logging)
            if self.rank == 0:
                logging.info('train_acc %f', train_acc)
                self.logger.add_scalar("epoch_train_acc", train_acc, epoch)
                self.logger.add_scalar("epoch_train_error_loss", error_loss,
                                       epoch)
                if self.args.dsnas:
                    self.logger.add_scalar("epoch_train_alpha_loss",
                                           loss_alpha, epoch)

            # validation
            self.model.eval()
            valid_acc, valid_obj = self.infer(epoch)
            if self.args.gen_max_child:
                self.args.gen_max_child_flag = True
                valid_acc_max_child, valid_obj_max_child = self.infer(epoch)
                self.args.gen_max_child_flag = False

            if self.rank == 0:
                logging.info('valid_acc %f', valid_acc)
                self.logger.add_scalar("epoch_valid_acc", valid_acc, epoch)
                if self.args.gen_max_child:
                    logging.info('valid_acc_argmax_alpha %f',
                                 valid_acc_max_child)
                    self.logger.add_scalar("epoch_valid_acc_argmax_alpha",
                                           valid_acc_max_child, epoch)

                utils.save(self.model, os.path.join(self.path, 'weights.pt'))

        if self.rank == 0:
            logging.info(self.model.normal_log_alpha)
            logging.info(self.model.reduce_log_alpha)
            genotype_edge_all = self.model.genotype_edge_all()
            logging.info('genotype_edge_all = %s', genotype_edge_all)

    def train(self, epoch, logging):
        objs = utils.AvgrageMeter()
        top1 = utils.AvgrageMeter()
        top5 = utils.AvgrageMeter()
        grad = utils.AvgrageMeter()

        normal_resource_gradient = 0
        reduce_resource_gradient = 0
        normal_loss_gradient = 0
        reduce_loss_gradient = 0
        normal_total_gradient = 0
        reduce_total_gradient = 0

        loss_alpha = None

        count = 0
        for step, (input, target) in enumerate(self.train_queue):
            if self.args.alternate_update:
                if step % 2 == 0:
                    self.update_theta = True
                    self.update_alpha = False
                else:
                    self.update_theta = False
                    self.update_alpha = True

            n = input.size(0)
            input = input.to(self.device)
            target = target.to(self.device, non_blocking=True)
            if self.args.snas:
                logits, logits_aux, penalty, op_normal, op_reduce = self.model(
                    input)
                error_loss = self.criterion(logits, target)
                if self.args.auxiliary:
                    loss_aux = self.criterion(logits_aux, target)
                    error_loss += self.args.auxiliary_weight * loss_aux

            if self.args.dsnas:
                logits, error_loss, loss_alpha, penalty = self.model(
                    input, target, self.criterion)

            num_normal = self.model.num_normal
            num_reduce = self.model.num_reduce
            normal_arch_entropy = self.model._arch_entropy(
                self.model.normal_log_alpha)
            reduce_arch_entropy = self.model._arch_entropy(
                self.model.reduce_log_alpha)

            if self.args.resource_efficient:
                if self.args.method == 'policy_gradient':
                    resource_penalty = (penalty[2]) / 6 + self.args.ratio * (
                        penalty[7]) / 2
                    log_resource_penalty = (
                        penalty[35]) / 6 + self.args.ratio * (penalty[36]) / 2
                elif self.args.method == 'reparametrization':
                    resource_penalty = (penalty[26]) / 6 + self.args.ratio * (
                        penalty[25]) / 2
                    log_resource_penalty = (
                        penalty[37]) / 6 + self.args.ratio * (penalty[38]) / 2
                elif self.args.method == 'discrete':
                    resource_penalty = (penalty[28]) / 6 + self.args.ratio * (
                        penalty[27]) / 2
                    log_resource_penalty = (
                        penalty[39]) / 6 + self.args.ratio * (penalty[40]) / 2
                elif self.args.method == 'none':
                    # TODo
                    resource_penalty = torch.zeros(1).cuda()
                    log_resource_penalty = torch.zeros(1).cuda()
                else:
                    logging.info(
                        "wrongly input of method, please re-enter --method from 'policy_gradient', 'discrete', "
                        "'reparametrization', 'none'")
                    sys.exit(1)
            else:
                resource_penalty = torch.zeros(1).cuda()
                log_resource_penalty = torch.zeros(1).cuda()

            if self.args.log_penalty:
                resource_loss = self.model._resource_lambda * log_resource_penalty
            else:
                resource_loss = self.model._resource_lambda * resource_penalty

            if self.args.loss:
                if self.args.snas:
                    loss = resource_loss.clone() + error_loss.clone()
                elif self.args.dsnas:
                    loss = resource_loss.clone()
                else:
                    loss = resource_loss.clone() + -child_coef * (
                        torch.log(normal_one_hot_prob) +
                        torch.log(reduce_one_hot_prob)).sum()
            else:
                if self.args.snas or self.args.dsnas:
                    loss = error_loss.clone()

            if self.args.distributed:
                loss.div_(self.world_size)
                error_loss.div_(self.world_size)
                resource_loss.div_(self.world_size)
                if self.args.dsnas:
                    loss_alpha.div_(self.world_size)

            # logging gradient
            count += 1
            if self.args.resource_efficient:
                self.optimizer.zero_grad()
                self.arch_optimizer.zero_grad()
                resource_loss.backward(retain_graph=True)
                if not self.args.random_sample:
                    normal_resource_gradient += self.model.normal_log_alpha.grad
                    reduce_resource_gradient += self.model.reduce_log_alpha.grad
            if self.args.snas:
                self.optimizer.zero_grad()
                self.arch_optimizer.zero_grad()
                error_loss.backward(retain_graph=True)
                if not self.args.random_sample:
                    normal_loss_gradient += self.model.normal_log_alpha.grad
                    reduce_loss_gradient += self.model.reduce_log_alpha.grad
                self.optimizer.zero_grad()
                self.arch_optimizer.zero_grad()

            if self.args.snas or not self.args.random_sample and not self.args.dsnas:
                loss.backward()
            if not self.args.random_sample:
                normal_total_gradient += self.model.normal_log_alpha.grad
                reduce_total_gradient += self.model.reduce_log_alpha.grad

            if self.args.distributed:
                reduce_tensorgradients(self.model.parameters(), sync=True)
                nn.utils.clip_grad_norm_([
                    param for name, param in self.model.named_parameters() if
                    name != 'normal_log_alpha' and name != 'reduce_log_alpha'
                ], self.args.grad_clip)
                arch_grad_norm = nn.utils.clip_grad_norm_([
                    param for name, param in self.model.named_parameters()
                    if name == 'normal_log_alpha' or name == 'reduce_log_alpha'
                ], 10.)
            else:
                nn.utils.clip_grad_norm_(self.model.parameters(),
                                         self.args.grad_clip)
                arch_grad_norm = nn.utils.clip_grad_norm_(
                    self.model.arch_parameters(), 10.)

            grad.update(arch_grad_norm)
            if not self.args.fix_weight and self.update_theta:
                self.optimizer.step()
            self.optimizer.zero_grad()
            if not self.args.random_sample and self.update_alpha:
                self.arch_optimizer.step()
            self.arch_optimizer.zero_grad()

            if self.rank == 0:
                self.logger.add_scalar(
                    "iter_train_loss", error_loss,
                    step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar(
                    "normal_arch_entropy", normal_arch_entropy,
                    step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar(
                    "reduce_arch_entropy", reduce_arch_entropy,
                    step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar(
                    "total_arch_entropy",
                    normal_arch_entropy + reduce_arch_entropy,
                    step + len(self.train_queue.dataset) * epoch)
                if self.args.dsnas:
                    #reward_normal_edge
                    self.logger.add_scalar(
                        "reward_normal_edge_0",
                        self.model.normal_edge_reward[0],
                        step + len(self.train_queue.dataset) * epoch)
                    self.logger.add_scalar(
                        "reward_normal_edge_1",
                        self.model.normal_edge_reward[1],
                        step + len(self.train_queue.dataset) * epoch)
                    self.logger.add_scalar(
                        "reward_normal_edge_2",
                        self.model.normal_edge_reward[2],
                        step + len(self.train_queue.dataset) * epoch)
                    self.logger.add_scalar(
                        "reward_normal_edge_3",
                        self.model.normal_edge_reward[3],
                        step + len(self.train_queue.dataset) * epoch)
                    self.logger.add_scalar(
                        "reward_normal_edge_4",
                        self.model.normal_edge_reward[4],
                        step + len(self.train_queue.dataset) * epoch)
                    self.logger.add_scalar(
                        "reward_normal_edge_5",
                        self.model.normal_edge_reward[5],
                        step + len(self.train_queue.dataset) * epoch)
                    self.logger.add_scalar(
                        "reward_normal_edge_6",
                        self.model.normal_edge_reward[6],
                        step + len(self.train_queue.dataset) * epoch)
                    self.logger.add_scalar(
                        "reward_normal_edge_7",
                        self.model.normal_edge_reward[7],
                        step + len(self.train_queue.dataset) * epoch)
                    self.logger.add_scalar(
                        "reward_normal_edge_8",
                        self.model.normal_edge_reward[8],
                        step + len(self.train_queue.dataset) * epoch)
                    self.logger.add_scalar(
                        "reward_normal_edge_9",
                        self.model.normal_edge_reward[9],
                        step + len(self.train_queue.dataset) * epoch)
                    self.logger.add_scalar(
                        "reward_normal_edge_10",
                        self.model.normal_edge_reward[10],
                        step + len(self.train_queue.dataset) * epoch)
                    self.logger.add_scalar(
                        "reward_normal_edge_11",
                        self.model.normal_edge_reward[11],
                        step + len(self.train_queue.dataset) * epoch)
                    self.logger.add_scalar(
                        "reward_normal_edge_12",
                        self.model.normal_edge_reward[12],
                        step + len(self.train_queue.dataset) * epoch)
                    self.logger.add_scalar(
                        "reward_normal_edge_13",
                        self.model.normal_edge_reward[13],
                        step + len(self.train_queue.dataset) * epoch)
                    #reward_reduce_edge
                    self.logger.add_scalar(
                        "reward_reduce_edge_0",
                        self.model.reduce_edge_reward[0],
                        step + len(self.train_queue.dataset) * epoch)
                    self.logger.add_scalar(
                        "reward_reduce_edge_1",
                        self.model.reduce_edge_reward[1],
                        step + len(self.train_queue.dataset) * epoch)
                    self.logger.add_scalar(
                        "reward_reduce_edge_2",
                        self.model.reduce_edge_reward[2],
                        step + len(self.train_queue.dataset) * epoch)
                    self.logger.add_scalar(
                        "reward_reduce_edge_3",
                        self.model.reduce_edge_reward[3],
                        step + len(self.train_queue.dataset) * epoch)
                    self.logger.add_scalar(
                        "reward_reduce_edge_4",
                        self.model.reduce_edge_reward[4],
                        step + len(self.train_queue.dataset) * epoch)
                    self.logger.add_scalar(
                        "reward_reduce_edge_5",
                        self.model.reduce_edge_reward[5],
                        step + len(self.train_queue.dataset) * epoch)
                    self.logger.add_scalar(
                        "reward_reduce_edge_6",
                        self.model.reduce_edge_reward[6],
                        step + len(self.train_queue.dataset) * epoch)
                    self.logger.add_scalar(
                        "reward_reduce_edge_7",
                        self.model.reduce_edge_reward[7],
                        step + len(self.train_queue.dataset) * epoch)
                    self.logger.add_scalar(
                        "reward_reduce_edge_8",
                        self.model.reduce_edge_reward[8],
                        step + len(self.train_queue.dataset) * epoch)
                    self.logger.add_scalar(
                        "reward_reduce_edge_9",
                        self.model.reduce_edge_reward[9],
                        step + len(self.train_queue.dataset) * epoch)
                    self.logger.add_scalar(
                        "reward_reduce_edge_10",
                        self.model.reduce_edge_reward[10],
                        step + len(self.train_queue.dataset) * epoch)
                    self.logger.add_scalar(
                        "reward_reduce_edge_11",
                        self.model.reduce_edge_reward[11],
                        step + len(self.train_queue.dataset) * epoch)
                    self.logger.add_scalar(
                        "reward_reduce_edge_12",
                        self.model.reduce_edge_reward[12],
                        step + len(self.train_queue.dataset) * epoch)
                    self.logger.add_scalar(
                        "reward_reduce_edge_13",
                        self.model.reduce_edge_reward[13],
                        step + len(self.train_queue.dataset) * epoch)
                #policy size
                self.logger.add_scalar(
                    "iter_normal_size_policy", penalty[2] / num_normal,
                    step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar(
                    "iter_reduce_size_policy", penalty[7] / num_reduce,
                    step + len(self.train_queue.dataset) * epoch)
                # baseline: discrete_probability
                self.logger.add_scalar(
                    "iter_normal_size_baseline", penalty[3] / num_normal,
                    step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar(
                    "iter_normal_flops_baseline", penalty[5] / num_normal,
                    step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar(
                    "iter_normal_mac_baseline", penalty[6] / num_normal,
                    step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar(
                    "iter_reduce_size_baseline", penalty[8] / num_reduce,
                    step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar(
                    "iter_reduce_flops_baseline", penalty[9] / num_reduce,
                    step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar(
                    "iter_reduce_mac_baseline", penalty[10] / num_reduce,
                    step + len(self.train_queue.dataset) * epoch)
                # R - median(R)
                self.logger.add_scalar(
                    "iter_normal_size-avg", penalty[60] / num_normal,
                    step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar(
                    "iter_normal_flops-avg", penalty[61] / num_normal,
                    step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar(
                    "iter_normal_mac-avg", penalty[62] / num_normal,
                    step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar(
                    "iter_reduce_size-avg", penalty[63] / num_reduce,
                    step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar(
                    "iter_reduce_flops-avg", penalty[64] / num_reduce,
                    step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar(
                    "iter_reduce_mac-avg", penalty[65] / num_reduce,
                    step + len(self.train_queue.dataset) * epoch)
                # lnR - ln(median)
                self.logger.add_scalar(
                    "iter_normal_ln_size-ln_avg", penalty[66] / num_normal,
                    step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar(
                    "iter_normal_ln_flops-ln_avg", penalty[67] / num_normal,
                    step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar(
                    "iter_normal_ln_mac-ln_avg", penalty[68] / num_normal,
                    step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar(
                    "iter_reduce_ln_size-ln_avg", penalty[69] / num_reduce,
                    step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar(
                    "iter_reduce_ln_flops-ln_avg", penalty[70] / num_reduce,
                    step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar(
                    "iter_reduce_ln_mac-ln_avg", penalty[71] / num_reduce,
                    step + len(self.train_queue.dataset) * epoch)
                '''
                self.logger.add_scalar("iter_normal_size_normalized", penalty[17] / 6, step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar("iter_normal_flops_normalized", penalty[18] / 6, step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar("iter_normal_mac_normalized", penalty[19] / 6, step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar("iter_reduce_size_normalized", penalty[20] / 2, step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar("iter_reduce_flops_normalized", penalty[21] / 2, step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar("iter_reduce_mac_normalized", penalty[22] / 2, step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar("iter_normal_penalty_normalized", penalty[23] / 6,
                                  step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar("iter_reduce_penalty_normalized", penalty[24] / 2,
                                  step + len(self.train_queue.dataset) * epoch)
                '''
                # Monte_Carlo(R_i)
                self.logger.add_scalar(
                    "iter_normal_size_mc", penalty[29] / num_normal,
                    step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar(
                    "iter_normal_flops_mc", penalty[30] / num_normal,
                    step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar(
                    "iter_normal_mac_mc", penalty[31] / num_normal,
                    step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar(
                    "iter_reduce_size_mc", penalty[32] / num_reduce,
                    step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar(
                    "iter_reduce_flops_mc", penalty[33] / num_reduce,
                    step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar(
                    "iter_reduce_mac_mc", penalty[34] / num_reduce,
                    step + len(self.train_queue.dataset) * epoch)
                # log(|R_i|)
                self.logger.add_scalar(
                    "iter_normal_log_size", penalty[41] / num_normal,
                    step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar(
                    "iter_normal_log_flops", penalty[42] / num_normal,
                    step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar(
                    "iter_normal_log_mac", penalty[43] / num_normal,
                    step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar(
                    "iter_reduce_log_size", penalty[44] / num_reduce,
                    step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar(
                    "iter_reduce_log_flops", penalty[45] / num_reduce,
                    step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar(
                    "iter_reduce_log_mac", penalty[46] / num_reduce,
                    step + len(self.train_queue.dataset) * epoch)
                # log(P)R_i
                self.logger.add_scalar(
                    "iter_normal_logP_size", penalty[47] / num_normal,
                    step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar(
                    "iter_normal_logP_flops", penalty[48] / num_normal,
                    step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar(
                    "iter_normal_logP_mac", penalty[49] / num_normal,
                    step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar(
                    "iter_reduce_logP_size", penalty[50] / num_reduce,
                    step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar(
                    "iter_reduce_logP_flops", penalty[51] / num_reduce,
                    step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar(
                    "iter_reduce_logP_mac", penalty[52] / num_reduce,
                    step + len(self.train_queue.dataset) * epoch)
                # log(P)log(R_i)
                self.logger.add_scalar(
                    "iter_normal_logP_log_size", penalty[53] / num_normal,
                    step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar(
                    "iter_normal_logP_log_flops", penalty[54] / num_normal,
                    step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar(
                    "iter_normal_logP_log_mac", penalty[55] / num_normal,
                    step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar(
                    "iter_reduce_logP_log_size", penalty[56] / num_reduce,
                    step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar(
                    "iter_reduce_logP_log_flops", penalty[57] / num_reduce,
                    step + len(self.train_queue.dataset) * epoch)
                self.logger.add_scalar(
                    "iter_reduce_logP_log_mac", penalty[58] / num_reduce,
                    step + len(self.train_queue.dataset) * epoch)

            prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))

            if self.args.distributed:
                loss = loss.detach()
                dist.all_reduce(error_loss)
                dist.all_reduce(prec1)
                dist.all_reduce(prec5)
                prec1.div_(self.world_size)
                prec5.div_(self.world_size)
                #dist_util.all_reduce([loss, prec1, prec5], 'mean')
            objs.update(error_loss.item(), n)
            top1.update(prec1.item(), n)
            top5.update(prec5.item(), n)

            if step % self.args.report_freq == 0 and self.rank == 0:
                logging.info('train %03d %e %f %f', step, objs.avg, top1.avg,
                             top5.avg)
                self.logger.add_scalar(
                    "iter_train_top1_acc", top1.avg,
                    step + len(self.train_queue.dataset) * epoch)

        if self.rank == 0:
            logging.info('-------resource gradient--------')
            logging.info(normal_resource_gradient / count)
            logging.info(reduce_resource_gradient / count)
            logging.info('-------loss gradient--------')
            logging.info(normal_loss_gradient / count)
            logging.info(reduce_loss_gradient / count)
            logging.info('-------total gradient--------')
            logging.info(normal_total_gradient / count)
            logging.info(reduce_total_gradient / count)

        return top1.avg, loss, error_loss, loss_alpha

    def infer(self, epoch):
        objs = utils.AvgrageMeter()
        top1 = utils.AvgrageMeter()
        top5 = utils.AvgrageMeter()

        self.model.eval()
        with torch.no_grad():
            for step, (input, target) in enumerate(self.valid_queue):
                input = input.to(self.device)
                target = target.to(self.device)
                if self.args.snas:
                    logits, logits_aux, resource_loss, op_normal, op_reduce = self.model(
                        input)
                    loss = self.criterion(logits, target)
                elif self.args.dsnas:
                    logits, error_loss, loss_alpha, resource_loss = self.model(
                        input, target, self.criterion)
                    loss = error_loss

                prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))

                if self.args.distributed:
                    loss.div_(self.world_size)
                    loss = loss.detach()
                    dist.all_reduce(loss)
                    dist.all_reduce(prec1)
                    dist.all_reduce(prec5)
                    prec1.div_(self.world_size)
                    prec5.div_(self.world_size)
                objs.update(loss.item(), input.size(0))
                top1.update(prec1.item(), input.size(0))
                top5.update(prec5.item(), input.size(0))

                if step % self.args.report_freq == 0 and self.rank == 0:
                    logging.info('valid %03d %e %f %f', step, objs.avg,
                                 top1.avg, top5.avg)
                    self.logger.add_scalar(
                        "iter_valid_loss", loss,
                        step + len(self.valid_queue.dataset) * epoch)
                    self.logger.add_scalar(
                        "iter_valid_top1_acc", top1.avg,
                        step + len(self.valid_queue.dataset) * epoch)

        return top1.avg, objs.avg
예제 #6
0
def main():
  if not torch.cuda.is_available():
    logging.info('no gpu device available')
    sys.exit(1)

  np.random.seed(args.seed)
  # torch.cuda.set_device(args.gpu)
  gpus = [int(i) for i in args.gpu.split(',')]
  if len(gpus) == 1:
    torch.cuda.set_device(int(args.gpu))
  # cudnn.benchmark = True
  torch.manual_seed(args.seed)
  # cudnn.enabled=True
  torch.cuda.manual_seed(args.seed)
  logging.info('gpu device = %s' % args.gpu)
  logging.info("args = %s", args)

  criterion = nn.CrossEntropyLoss()
  criterion = criterion.cuda()
  model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion)
  model = model.cuda()
  if len(gpus)>1:
    print("True")
    model = nn.parallel.DataParallel(model, device_ids=gpus, output_device=gpus[0])
    model = model.module

  arch_params = list(map(id, model.arch_parameters()))
  weight_params = filter(lambda p: id(p) not in arch_params,
                         model.parameters())

  logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

  optimizer = torch.optim.SGD(
      # model.parameters(),
      weight_params,
      args.learning_rate,
      momentum=args.momentum,
      weight_decay=args.weight_decay)
  #optimizer = nn.DataParallel(optimizer, device_ids=gpus)

  train_transform, valid_transform = utils._data_transforms_cifar10(args)
  train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)

  num_train = len(train_data)
  indices = list(range(num_train))
  split = int(np.floor(args.train_portion * num_train))

  train_queue = torch.utils.data.DataLoader(
      train_data, batch_size=args.batch_size,
      sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
      pin_memory=True, num_workers=2)

  valid_queue = torch.utils.data.DataLoader(
      train_data, batch_size=args.batch_size,
      sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]),
      pin_memory=True, num_workers=2)

  scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs), eta_min=args.learning_rate_min)

  architect = Architect(model, criterion, args)

  for epoch in range(args.epochs):
    scheduler.step()
    lr = scheduler.get_lr()[0]
    logging.info('epoch %d lr %e', epoch, lr)

    genotype = model.genotype()
    logging.info('genotype = %s', genotype)

    print(F.softmax(model.alphas_normal, dim=-1))
    print(F.softmax(model.alphas_reduce, dim=-1))

    # training
    train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr)
    logging.info('train_acc %f', train_acc)

    # validation
    with torch.no_grad():
      valid_acc, valid_obj = infer(valid_queue, model, criterion)
    logging.info('valid_acc %f', valid_acc)

    utils.save(model, os.path.join(args.save, 'weights.pt'))
예제 #7
0
def main():
    if is_wandb_used:
        wandb.init(project="automl-gradient-based-nas",
                   name="r" + str(args.run_id) + "-e" + str(args.epochs) +
                   "-lr" + str(args.learning_rate) + "-l(" +
                   str(args.lambda_train_regularizer) + "," +
                   str(args.lambda_valid_regularizer) + ")",
                   config=args,
                   entity="automl")

    global is_multi_gpu

    gpus = [int(i) for i in args.gpu.split(',')]
    logging.info('gpus = %s' % gpus)
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)

    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %s' % args.gpu)
    logging.info("args = %s", args)

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()

    # default: args.init_channels = 16, CIFAR_CLASSES = 10, args.layers = 8
    model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion)

    if len(gpus) > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
        model = nn.DataParallel(model)
        is_multi_gpu = True

    model.cuda()
    if args.model_path != "saved_models":
        utils.load(model, args.model_path)

    arch_parameters = model.module.arch_parameters(
    ) if is_multi_gpu else model.arch_parameters()
    arch_params = list(map(id, arch_parameters))

    parameters = model.module.parameters(
    ) if is_multi_gpu else model.parameters()
    weight_params = filter(lambda p: id(p) not in arch_params, parameters)

    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    optimizer = torch.optim.SGD(
        weight_params,  # model.parameters(),
        args.learning_rate,
        momentum=args.momentum,
        weight_decay=args.weight_decay)

    train_transform, valid_transform = utils._data_transforms_cifar10(args)

    # will cost time to download the data
    train_data = dset.CIFAR10(root=args.data,
                              train=True,
                              download=True,
                              transform=train_transform)

    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(args.train_portion * num_train))  # split index

    train_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size * len(gpus),
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=True,
        num_workers=2)

    valid_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size * len(gpus),
        sampler=torch.utils.data.sampler.SubsetRandomSampler(
            indices[split:num_train]),
        pin_memory=True,
        num_workers=2)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs), eta_min=args.learning_rate_min)

    architect = Architect(model, criterion, args)

    best_accuracy = 0
    best_accuracy_different_cnn_counts = dict()

    if is_wandb_used:
        table = wandb.Table(columns=["Epoch", "Searched Architecture"])

    for epoch in range(args.epochs):
        scheduler.step()

        lr = scheduler.get_lr()[0]
        logging.info('epoch %d lr %e', epoch, lr)

        # training
        train_acc, train_obj, train_loss = train(epoch, train_queue,
                                                 valid_queue, model, architect,
                                                 criterion, optimizer, lr)
        logging.info('train_acc %f', train_acc)
        if is_wandb_used:
            wandb.log({"searching_train_acc": train_acc, "epoch": epoch})
            wandb.log({"searching_train_loss": train_loss, "epoch": epoch})

        # validation
        with torch.no_grad():
            valid_acc, valid_obj, valid_loss = infer(valid_queue, model,
                                                     criterion)
        logging.info('valid_acc %f', valid_acc)
        if is_wandb_used:
            wandb.log({"searching_valid_acc": valid_acc, "epoch": epoch})
            wandb.log({"searching_valid_loss": valid_loss, "epoch": epoch})
            wandb.log({
                "search_train_valid_acc_gap": train_acc - valid_acc,
                "epoch": epoch
            })
            wandb.log({
                "search_train_valid_loss_gap": train_loss - valid_loss,
                "epoch": epoch
            })

        # save the structure
        genotype, normal_cnn_count, reduce_cnn_count = model.module.genotype(
        ) if is_multi_gpu else model.genotype()
        cnn_count = normal_cnn_count + reduce_cnn_count
        wandb.log({"cnn_count": cnn_count, "epoch": epoch})
        model_size = model.module.get_current_model_size(
        ) if is_multi_gpu else model.get_current_model_size()
        wandb.log({"model_size": model_size, "epoch": epoch})

        # early stopping
        if args.early_stopping == 1:
            if normal_cnn_count == 6 and reduce_cnn_count == 0:
                break

        print("(n:%d,r:%d)" % (normal_cnn_count, reduce_cnn_count))
        print(
            F.softmax(model.module.alphas_normal
                      if is_multi_gpu else model.alphas_normal,
                      dim=-1))
        print(
            F.softmax(model.module.alphas_reduce
                      if is_multi_gpu else model.alphas_reduce,
                      dim=-1))
        logging.info('genotype = %s', genotype)
        if is_wandb_used:
            wandb.log({"genotype": str(genotype)}, step=epoch - 1)
            table.add_data(str(epoch), str(genotype))
            wandb.log({"Searched Architecture": table})

            # save the cnn architecture according to the CNN count
            cnn_count = normal_cnn_count * 10 + reduce_cnn_count
            wandb.log({
                "searching_cnn_count(%s)" % cnn_count: valid_acc,
                "epoch": epoch
            })
            if cnn_count not in best_accuracy_different_cnn_counts.keys():
                best_accuracy_different_cnn_counts[cnn_count] = valid_acc
                summary_key_cnn_structure = "best_acc_for_cnn_structure(n:%d,r:%d)" % (
                    normal_cnn_count, reduce_cnn_count)
                wandb.run.summary[summary_key_cnn_structure] = valid_acc

                summary_key_best_cnn_structure = "epoch_of_best_acc_for_cnn_structure(n:%d,r:%d)" % (
                    normal_cnn_count, reduce_cnn_count)
                wandb.run.summary[summary_key_best_cnn_structure] = epoch
            else:
                if valid_acc > best_accuracy_different_cnn_counts[cnn_count]:
                    best_accuracy_different_cnn_counts[cnn_count] = valid_acc
                    summary_key_cnn_structure = "best_acc_for_cnn_structure(n:%d,r:%d)" % (
                        normal_cnn_count, reduce_cnn_count)
                    wandb.run.summary[summary_key_cnn_structure] = valid_acc

                    summary_key_best_cnn_structure = "epoch_of_best_acc_for_cnn_structure(n:%d,r:%d)" % (
                        normal_cnn_count, reduce_cnn_count)
                    wandb.run.summary[summary_key_best_cnn_structure] = epoch

            if valid_acc > best_accuracy:
                best_accuracy = valid_acc
                wandb.run.summary["best_valid_accuracy"] = valid_acc
                wandb.run.summary["epoch_of_best_accuracy"] = epoch
                utils.save(model, os.path.join(wandb.run.dir, 'weights.pt'))
예제 #8
0
def main():
  if not torch.cuda.is_available():
    logging.info('no gpu device available')
    sys.exit(1)

  np.random.seed(args.seed)
  torch.cuda.set_device(args.gpu)
  cudnn.benchmark = True
  torch.manual_seed(args.seed)
  cudnn.enabled=True
  torch.cuda.manual_seed(args.seed)
  logging.info('gpu device = %d' % args.gpu)
  logging.info("args = %s", args)

  criterion = nn.CrossEntropyLoss()
  criterion = criterion.cuda()

  model = Network(args.init_channels, CIFAR_CLASSES, args.op_search_layers, criterion)

  start_epoch=0

  model = model.cuda()
  logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

  optimizer = torch.optim.SGD(
      model.parameters(),
      args.learning_rate,
      momentum=args.momentum,
      weight_decay=args.weight_decay)

  scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs), eta_min=args.learning_rate_min)
    
  arch_optimizer = torch.optim.Adam(model.arch_parameters(),
                                    lr=args.arch_learning_rate, betas=(0.9, 0.999), weight_decay=args.arch_weight_decay)

  architect = Architect(model, args)


  train_transform, valid_transform = utils._data_transforms_cifar10(args)

  if args.set=='cifar100':
      train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=train_transform)
      val_data = dset.CIFAR100(root=args.data, train=False, download=True, transform=valid_transform)
  else:
      train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)
      val_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform)


  num_train = len(train_data)
  indices = list(range(num_train))
  split = int(np.floor(args.train_portion * num_train))

  #train_queue_A and train_queue_B for bilevel optimization on operation
  #train_queue_full for onelevel optimization on topology
  #valid_queue test_data

  train_queue_A = torch.utils.data.DataLoader(
    train_data, batch_size=args.batch_size,
    sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
    pin_memory=True, num_workers=2)

  train_queue_B = torch.utils.data.DataLoader(
      train_data, batch_size=args.batch_size,
    sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]),
    pin_memory=True, num_workers=2)

  train_queue_Full = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size,pin_memory=True, num_workers=2)

  valid_queue = torch.utils.data.DataLoader(
    val_data, batch_size=args.batch_size,
      pin_memory=True, num_workers=2)

  for epoch in range(start_epoch, args.epochs):
    if epoch == Op_Pretrain_Start:
      model.phase = 'op_pretrain'
      logging.info("Begin operation pretrain!")
    elif epoch == Op_Search_Start:
      model.phase = 'op_search'
      logging.info("Begin operation search!")
    elif epoch == Tp_Pretrain_Start:
      model.__init__(args.init_channels,
                     CIFAR_CLASSES, args.op_search_layers, criterion, init_arch=False)
      model.phase = 'tp_pretrain'
      optimizer = torch.optim.SGD(
        model.parameters(),
        args.learning_rate,  # use twice data to update parameters
        momentum=args.momentum,
        weight_decay=args.weight_decay)
      scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs), eta_min=args.learning_rate_min)
      model.prune_model()
      arch_optimizer = torch.optim.Adam(model.arch_parameters(),
                                        lr=args.arch_learning_rate, betas=(0.9, 0.999),
                                        weight_decay=args.arch_weight_decay)
      model = model.cuda()
      architect = None  # use one-step to optimize topology
      logging.info("Prune model finish!")
      logging.info("Load Prune Architecture finish!")
      logging.info("Begin topology pretrain!")
    elif epoch == Tp_Search_Start:
      model.phase = 'tp_search'
      logging.info("Begin topology search!")
    else:
      pass

    if 'pretrain' in model.phase:
      model.T = 1.0
    else:
      if 'op' in model.phase:
        model.T = 1.0
      else:
        model.T = 10 * pow(Tp_Anneal_Rate, epoch - Tp_Search_Start)

    scheduler.step(epoch)

    lr = scheduler.get_lr()[0]
    logging.info('epoch:%d phase:%s lr:%e', epoch, model.phase, lr)

    print_genotype(model)

    # training
    if 'op' in model.phase:
      train_acc, train_obj = train_op(train_queue_A, train_queue_B, model, architect, criterion, optimizer, lr)
    else:
      train_acc, train_obj = train_tp(train_queue_A, train_queue_Full, model, criterion, optimizer,arch_optimizer)


    logging.info('train_acc %f', train_acc)

    # validation
    valid_acc, valid_obj = infer(valid_queue, model, criterion)
    logging.info('valid_acc %f', valid_acc)

    utils.save(model, os.path.join(args.save, 'weights_%s.pth'%model.phase))
    model.save_arch(os.path.join(args.save, 'arch_%s.pth'%model.phase))

  print_genotype(model)
def main():
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    gpus = [int(i) for i in args.gpu.split(',')]  # argparser传入的参数转为int list
    if len(gpus) == 1:
        torch.cuda.set_device(int(args.gpu))

    # cudnn.benchmark = True
    torch.manual_seed(args.seed)
    # cudnn.enabled=True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %s' % args.gpu)
    logging.info("args = %s", args)

    # loss function
    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()

    # 初始化模型,构建一个超网,并将其部署到GPU上
    model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion)
    model = model.cuda()

    arch_params = list(map(id, model.arch_parameters()))
    weight_params = filter(
        lambda p: id(p) not in arch_params,  #暂时没看到怎么用
        model.parameters())

    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    optimizer = torch.optim.SGD(
        model.parameters(),  # 优化器更新的参数
        # weight_params,
        args.learning_rate,  # 学习率
        momentum=args.momentum,
        weight_decay=args.weight_decay)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs), eta_min=args.learning_rate_min)

    train_transform, valid_transform = utils._data_transforms_cifar10(args)
    train_data = dset.CIFAR10(root=args.data,
                              train=True,
                              download=True,
                              transform=train_transform)
    # dset:torchvision.dataset的缩写
    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(args.train_portion * num_train))  #
    print("使用多线程做dataloader会报错!")
    # 数据集划分为训练和验证集,并打包成有序的结构
    train_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=True,
        num_workers=0)

    valid_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(
            indices[split:num_train]),
        pin_memory=True,
        num_workers=0)

    # 在Architecture中创建架构参数和架构参数更新函数
    architect = Architect(model, criterion,
                          args)  #有一个专门的architect.py 不知道是干嘛的,train要输入
    model = nn.parallel.DataParallel(model)
    '''  
  if len(gpus)>1:
    print("True")
    print(gpus)
  model = nn.parallel.DataParallel(model)
  '''

    for epoch in range(args.epochs):
        lr = scheduler.get_lr()[0]
        logging.info('epoch %d lr %e', epoch, lr)
        genotype = model.module.genotype(
        )  # model_search.py里待搜索的Network类型自带的参数
        logging.info('genotype = %s', genotype)  # 打印当前epoch 的cell的网络结构
        print(F.softmax(model.module.alphas_normal, dim=-1))
        print(F.softmax(model.module.alphas_reduce, dim=-1))

        # training
        train_acc, train_obj = train(train_queue, valid_queue, model,
                                     architect, criterion, optimizer, lr)
        logging.info('train_acc %f', train_acc)

        # validation
        with torch.no_grad():
            valid_acc, valid_obj = infer(valid_queue, model.module, criterion)
        logging.info('valid_acc %f', valid_acc)
        scheduler.step()

        utils.save(model.module, os.path.join(args.save, 'weights.pt'))
예제 #10
0
def main():

    args = get_args()
    # get log
    args.save = '{}/search-{}'.format(args.save,
                                      time.strftime("%Y%m%d-%H%M%S"))
    tools.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py'))
    log_format = '%(asctime)s %(message)s'
    logging.basicConfig(stream=sys.stdout,
                        level=logging.INFO,
                        format=log_format,
                        datefmt='%m/%d %I:%M:%S %p')
    fh = logging.FileHandler(os.path.join(args.save, 'log.txt'))
    fh.setFormatter(logging.Formatter(log_format))
    logger = logging.getLogger('Train Search')
    logger.addHandler(fh)

    # monitor
    pymonitor = ProgressMonitor(logger)
    tbmonitor = TensorBoardMonitor(logger, args.save)
    monitors = [pymonitor, tbmonitor]

    if not torch.cuda.is_available():
        logger.info('no gpu device available')
        sys.exit(1)
    # set random seed
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    args.use_cuda = args.gpus > 0 and torch.cuda.is_available()
    args.device = torch.device('cuda:0' if args.use_cuda else 'cpu')
    if args.use_cuda:
        torch.cuda.manual_seed(args.seed)
        cudnn.enabled = True
        cudnn.benchmark = True
    setting = {k: v for k, v in args._get_kwargs()}
    logger.info(setting)
    with open(os.path.join(args.save, "args.yaml"),
              "w") as yaml_file:  # dump experiment config
        yaml.dump(args, yaml_file)

    if args.cifar100:
        CIFAR_CLASSES = 100
        data_folder = 'cifar-100-python'
    else:
        CIFAR_CLASSES = 10
        data_folder = 'cifar-10-batches-py'

    #  prepare dataset
    if args.cifar100:
        train_transform, valid_transform = tools._data_transforms_cifar100(
            args)
    else:
        train_transform, valid_transform = tools._data_transforms_cifar10(args)

    if args.cifar100:
        train_data = dset.CIFAR100(root=args.tmp_data_dir,
                                   train=True,
                                   download=True,
                                   transform=train_transform)
        vaild_ata = dset.CIFAR100(root=args.tmp_data_dir,
                                  train=False,
                                  download=False,
                                  transform=valid_transform)
    else:
        train_data = dset.CIFAR10(root=args.tmp_data_dir,
                                  train=True,
                                  download=True,
                                  transform=train_transform)
        vaild_ata = dset.CIFAR10(root=args.tmp_data_dir,
                                 train=False,
                                 download=False,
                                 transform=valid_transform)

    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(args.train_portion * num_train))

    train_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=True,
        num_workers=args.workers)

    valid_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(
            indices[split:num_train]),
        pin_memory=True,
        num_workers=args.workers)

    valLoader = torch.utils.data.DataLoader(vaild_ata,
                                            batch_size=args.batch_size,
                                            pin_memory=True,
                                            num_workers=args.workers)

    # build Network
    criterion = nn.CrossEntropyLoss()
    criterion = criterion.to(args.device)
    switches = []
    for i in range(14):
        switches.append([True for j in range(len(PRIMITIVES))])
    switches_normal = copy.deepcopy(switches)
    switches_reduce = copy.deepcopy(switches)
    # To be moved to args
    num_to_keep = [5, 3, 1]
    num_to_drop = [3, 2, 2]
    if len(args.add_width) == 3:
        add_width = args.add_width
    else:
        add_width = [0, 0, 0]
    if len(args.add_layers) == 3:
        add_layers = args.add_layers
    else:
        add_layers = [0, 6, 12]
    if len(args.dropout_rate) == 3:
        drop_rate = args.dropout_rate
    else:
        drop_rate = [0.1, 0.4, 0.7]
    eps_no_archs = [10, 10, 10]
    state_epochs = 0
    for sp in range(len(num_to_keep)):
        model = Network(args.init_channels + int(add_width[sp]),
                        CIFAR_CLASSES,
                        args.layers + int(add_layers[sp]),
                        criterion,
                        steps=args.nodes,
                        multiplier=args.multiplier,
                        stem_multiplier=args.stem_multiplier,
                        switches_normal=switches_normal,
                        switches_reduce=switches_reduce,
                        p=float(drop_rate[sp]))

        model = model.to(args.device)
        logger.info("stage:{} param size:{}MB".format(
            sp, tools.count_parameters_in_MB(model)))

        optimizer = torch.optim.SGD(model.weight_parameters(),
                                    args.learning_rate,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)
        optimizer_a = torch.optim.Adam(model.arch_parameters(),
                                       lr=args.arch_learning_rate,
                                       betas=(0.5, 0.999),
                                       weight_decay=args.arch_weight_decay)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer, float(args.epochs), eta_min=args.learning_rate_min)

        sm_dim = -1
        epochs = args.epochs
        eps_no_arch = eps_no_archs[sp]
        scale_factor = 0.2
        for epoch in range(epochs):
            lr = scheduler.get_lr()[0]
            logger.info('Epoch: %d lr: %e', epoch, lr)
            epoch_start = time.time()
            # training
            if epoch < eps_no_arch:
                model.p = float(drop_rate[sp]) * (epochs - epoch - 1) / epochs
                model.update_p()
                train_acc, train_obj = train(state_epochs + epoch,
                                             train_queue,
                                             valid_queue,
                                             model,
                                             criterion,
                                             optimizer,
                                             optimizer_a,
                                             args,
                                             monitors,
                                             logger,
                                             train_arch=False)
            else:
                model.p = float(drop_rate[sp]) * np.exp(
                    -(epoch - eps_no_arch) * scale_factor)
                model.update_p()
                train_acc, train_obj = train(state_epochs + epoch,
                                             train_queue,
                                             valid_queue,
                                             model,
                                             criterion,
                                             optimizer,
                                             optimizer_a,
                                             args,
                                             monitors,
                                             logger,
                                             train_arch=True)

            # validation
            valid_acc, valid_obj = infer(state_epochs + epoch, valLoader,
                                         model, criterion, args, monitors,
                                         logger)

            if epoch >= eps_no_arch:
                # 将本epoch的解析结果保存
                arch_param = model.arch_parameters()
                normal_prob = F.softmax(arch_param[0],
                                        dim=-1).data.cpu().numpy()
                reduce_prob = F.softmax(arch_param[1],
                                        dim=-1).data.cpu().numpy()
                logger.info('Genotypev: {}'.format(
                    parse_genotype(switches_normal.copy(),
                                   switches_reduce.copy(), normal_prob.copy(),
                                   reduce_prob.copy())))

            scheduler.step()

        tools.save(model,
                   os.path.join(args.save, 'state{}_weights.pt'.format(sp)))
        state_epochs += args.epochs

        # Save switches info for s-c refinement.
        if sp == len(num_to_keep) - 1:
            switches_normal_2 = copy.deepcopy(switches_normal)
            switches_reduce_2 = copy.deepcopy(switches_reduce)
        arch_param = model.arch_parameters()
        normal_prob = F.softmax(arch_param[0], dim=-1).data.cpu().numpy()
        reduce_prob = F.softmax(arch_param[1], dim=-1).data.cpu().numpy()

        logger.info('------Stage %d end!------' % sp)
        logger.info("normal: \n{}".format(normal_prob))
        logger.info("reduce: \n{}".format(reduce_prob))
        logger.info('Genotypev: {}'.format(
            parse_genotype(switches_normal.copy(), switches_reduce.copy(),
                           normal_prob.copy(), reduce_prob.copy())))

        # 根据最新的结构权重,旧的搜索空间,需要抛弃的数量,当前状态 来进行空间正则化
        switches_normal = update_switches(normal_prob.copy(),
                                          switches_normal, num_to_drop[sp], sp,
                                          len(num_to_keep))
        switches_reduce = update_switches(reduce_prob.copy(),
                                          switches_reduce, num_to_drop[sp], sp,
                                          len(num_to_keep))

        logger.info('------Dropping %d paths------' % num_to_drop[sp])
        logger.info('switches_normal = %s', switches_normal)
        logging_switches(switches_normal, logger)
        logger.info('switches_reduce = %s', switches_reduce)
        logging_switches(switches_reduce, logger)

        if sp == len(num_to_keep) - 1:
            # arch_param = model.arch_parameters()
            # normal_prob = F.softmax(arch_param[0], dim=sm_dim).data.cpu().numpy()
            # reduce_prob = F.softmax(arch_param[1], dim=sm_dim).data.cpu().numpy()
            normal_final = [0 for idx in range(14)]
            reduce_final = [0 for idx in range(14)]
            # remove all Zero operations
            for i in range(14):
                if switches_normal_2[i][0] == True:
                    normal_prob[i][0] = 0
                normal_final[i] = max(normal_prob[i])
                if switches_reduce_2[i][0] == True:
                    reduce_prob[i][0] = 0
                reduce_final[i] = max(reduce_prob[i])
            # Generate Architecture, similar to DARTS
            keep_normal = [0, 1]
            keep_reduce = [0, 1]
            n = 3
            start = 2
            for i in range(3):
                end = start + n
                tbsn = normal_final[start:end]
                tbsr = reduce_final[start:end]
                edge_n = sorted(range(n), key=lambda x: tbsn[x])
                keep_normal.append(edge_n[-1] + start)
                keep_normal.append(edge_n[-2] + start)
                edge_r = sorted(range(n), key=lambda x: tbsr[x])
                keep_reduce.append(edge_r[-1] + start)
                keep_reduce.append(edge_r[-2] + start)
                start = end
                n = n + 1
            # set switches according the ranking of arch parameters
            for i in range(14):
                if not i in keep_normal:
                    for j in range(len(PRIMITIVES)):
                        switches_normal[i][j] = False
                if not i in keep_reduce:
                    for j in range(len(PRIMITIVES)):
                        switches_reduce[i][j] = False
            # translate switches into genotype
            genotype = parse_network(switches_normal, switches_reduce)
            logger.info(genotype)
            ## restrict skipconnect (normal cell only)
            logger.info('Restricting skipconnect...')
            # generating genotypes with different numbers of skip-connect operations
            for sks in range(0, 9):
                max_sk = 8 - sks
                num_sk = check_sk_number(switches_normal)
                if not num_sk > max_sk:
                    continue
                while num_sk > max_sk:
                    normal_prob = delete_min_sk_prob(switches_normal,
                                                     switches_normal_2,
                                                     normal_prob)
                    switches_normal = keep_1_on(switches_normal_2, normal_prob)
                    switches_normal = keep_2_branches(switches_normal,
                                                      normal_prob)
                    num_sk = check_sk_number(switches_normal)
                logger.info('Number of skip-connect: %d', max_sk)
                genotype = parse_network(switches_normal, switches_reduce)
                logger.info(genotype)
class neural_architecture_search():
    def __init__(self, args):
        self.args = args

        if not torch.cuda.is_available():
            logging.info('no gpu device available')
            sys.exit(1)

        torch.cuda.set_device(self.args.gpu)
        self.device = torch.device("cuda")
        self.rank = 0
        self.seed = self.args.seed
        self.world_size = 1

        if self.args.fix_cudnn:
            random.seed(self.seed)
            torch.backends.cudnn.deterministic = True
            np.random.seed(self.seed)
            cudnn.benchmark = False
            torch.manual_seed(self.seed)
            cudnn.enabled = True
            torch.cuda.manual_seed(self.seed)
            torch.cuda.manual_seed_all(self.seed)
        else:
            np.random.seed(self.seed)
            cudnn.benchmark = True
            torch.manual_seed(self.seed)
            cudnn.enabled = True
            torch.cuda.manual_seed(self.seed)
            torch.cuda.manual_seed_all(self.seed)

        self.path = os.path.join(generate_date, self.args.save)
        if self.rank == 0:
            utils.create_exp_dir(generate_date,
                                 self.path,
                                 scripts_to_save=glob.glob('*.py'))
            logging.basicConfig(stream=sys.stdout,
                                level=logging.INFO,
                                format=log_format,
                                datefmt='%m/%d %I:%M:%S %p')
            fh = logging.FileHandler(os.path.join(self.path, 'log.txt'))
            fh.setFormatter(logging.Formatter(log_format))
            logging.getLogger().addHandler(fh)
            logging.info("self.args = %s", self.args)
            self.logger = tensorboardX.SummaryWriter('./runs/' +
                                                     generate_date + '/' +
                                                     self.args.save_log)
        else:
            self.logger = None

        #initialize loss function
        self.criterion = nn.CrossEntropyLoss().to(self.device)

        #initialize model
        self.init_model()
        if self.args.resume:
            self.reload_model()

        #calculate model param size
        if self.rank == 0:
            logging.info("param size = %fMB",
                         utils.count_parameters_in_MB(self.model))
            self.model._logger = self.logger
            self.model._logging = logging

        #initialize optimizer
        self.init_optimizer()

        #iniatilize dataset loader
        self.init_loaddata()

        self.update_theta = True
        self.update_alpha = True

    def init_model(self):

        self.model = Network(self.args.init_channels, CIFAR_CLASSES,
                             self.args.layers, self.criterion, self.args,
                             self.rank, self.world_size, self.args.steps,
                             self.args.multiplier)
        self.model.to(self.device)
        for v in self.model.parameters():
            if v.requires_grad:
                if v.grad is None:
                    v.grad = torch.zeros_like(v)
        self.model.normal_log_alpha.grad = torch.zeros_like(
            self.model.normal_log_alpha)
        self.model.reduce_log_alpha.grad = torch.zeros_like(
            self.model.reduce_log_alpha)

    def reload_model(self):
        self.model.load_state_dict(torch.load(self.args.resume_path +
                                              '/weights.pt'),
                                   strict=True)

    def init_optimizer(self):

        self.optimizer = torch.optim.SGD(self.model.parameters(),
                                         self.args.learning_rate,
                                         momentum=self.args.momentum,
                                         weight_decay=args.weight_decay)

        self.arch_optimizer = torch.optim.Adam(
            self.model.arch_parameters(),
            lr=self.args.arch_learning_rate,
            betas=(0.5, 0.999),
            weight_decay=self.args.arch_weight_decay)

    def init_loaddata(self):

        train_transform, valid_transform = utils._data_transforms_cifar10(
            self.args)
        train_data = dset.CIFAR10(root=self.args.data,
                                  train=True,
                                  download=True,
                                  transform=train_transform)
        valid_data = dset.CIFAR10(root=self.args.data,
                                  train=False,
                                  download=True,
                                  transform=valid_transform)

        if self.args.seed:

            def worker_init_fn():
                seed = self.seed
                np.random.seed(seed)
                random.seed(seed)
                torch.manual_seed(seed)
                return
        else:
            worker_init_fn = None

        num_train = len(train_data)
        indices = list(range(num_train))

        self.train_queue = torch.utils.data.DataLoader(
            train_data,
            batch_size=self.args.batch_size,
            shuffle=True,
            pin_memory=False,
            num_workers=2)

        self.valid_queue = torch.utils.data.DataLoader(
            valid_data,
            batch_size=self.args.batch_size,
            shuffle=False,
            pin_memory=False,
            num_workers=2)

    def main(self):
        # lr scheduler: cosine annealing
        # temp scheduler: linear annealing (self-defined in utils)
        self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            self.optimizer,
            float(self.args.epochs),
            eta_min=self.args.learning_rate_min)

        self.temp_scheduler = utils.Temp_Scheduler(self.args.epochs,
                                                   self.model._temp,
                                                   self.args.temp,
                                                   temp_min=self.args.temp_min)

        for epoch in range(self.args.epochs):
            if self.args.child_reward_stat:
                self.update_theta = False
                self.update_alpha = False

            if self.args.current_reward:
                self.model.normal_reward_mean = torch.zeros_like(
                    self.model.normal_reward_mean)
                self.model.reduce_reward_mean = torch.zeros_like(
                    self.model.reduce_reward_mean)
                self.model.count = 0

            if epoch < self.args.resume_epoch:
                continue
            self.scheduler.step()
            if self.args.temp_annealing:
                self.model._temp = self.temp_scheduler.step()
            self.lr = self.scheduler.get_lr()[0]

            if self.rank == 0:
                logging.info('epoch %d lr %e temp %e', epoch, self.lr,
                             self.model._temp)
                self.logger.add_scalar('epoch_temp', self.model._temp, epoch)
                logging.info(self.model.normal_log_alpha)
                logging.info(self.model.reduce_log_alpha)
                logging.info(F.softmax(self.model.normal_log_alpha, dim=-1))
                logging.info(F.softmax(self.model.reduce_log_alpha, dim=-1))

            genotype_edge_all = self.model.genotype_edge_all()

            if self.rank == 0:
                logging.info('genotype_edge_all = %s', genotype_edge_all)
                # create genotypes.txt file
                txt_name = remark + '_genotype_edge_all_epoch' + str(epoch)
                utils.txt('genotype', self.args.save, txt_name,
                          str(genotype_edge_all), generate_date)

            self.model.train()
            train_acc, loss, error_loss, loss_alpha = self.train(
                epoch, logging)
            if self.rank == 0:
                logging.info('train_acc %f', train_acc)
                self.logger.add_scalar("epoch_train_acc", train_acc, epoch)
                self.logger.add_scalar("epoch_train_error_loss", error_loss,
                                       epoch)
                if self.args.dsnas:
                    self.logger.add_scalar("epoch_train_alpha_loss",
                                           loss_alpha, epoch)

                if self.args.dsnas and not self.args.child_reward_stat:
                    if self.args.current_reward:
                        logging.info('reward mean stat')
                        logging.info(self.model.normal_reward_mean)
                        logging.info(self.model.reduce_reward_mean)
                        logging.info('count')
                        logging.info(self.model.count)
                    else:
                        logging.info('reward mean stat')
                        logging.info(self.model.normal_reward_mean)
                        logging.info(self.model.reduce_reward_mean)
                        if self.model.normal_reward_mean.size(0) > 1:
                            logging.info('reward mean total stat')
                            logging.info(self.model.normal_reward_mean.sum(0))
                            logging.info(self.model.reduce_reward_mean.sum(0))

                if self.args.child_reward_stat:
                    logging.info('reward mean stat')
                    logging.info(self.model.normal_reward_mean.sum(0))
                    logging.info(self.model.reduce_reward_mean.sum(0))
                    logging.info('reward var stat')
                    logging.info(
                        self.model.normal_reward_mean_square.sum(0) -
                        self.model.normal_reward_mean.sum(0)**2)
                    logging.info(
                        self.model.reduce_reward_mean_square.sum(0) -
                        self.model.reduce_reward_mean.sum(0)**2)

            # validation
            self.model.eval()
            valid_acc, valid_obj = self.infer(epoch)
            if self.args.gen_max_child:
                self.args.gen_max_child_flag = True
                valid_acc_max_child, valid_obj_max_child = self.infer(epoch)
                self.args.gen_max_child_flag = False

            if self.rank == 0:
                logging.info('valid_acc %f', valid_acc)
                self.logger.add_scalar("epoch_valid_acc", valid_acc, epoch)
                if self.args.gen_max_child:
                    logging.info('valid_acc_argmax_alpha %f',
                                 valid_acc_max_child)
                    self.logger.add_scalar("epoch_valid_acc_argmax_alpha",
                                           valid_acc_max_child, epoch)

                utils.save(self.model, os.path.join(self.path, 'weights.pt'))

        if self.rank == 0:
            logging.info(self.model.normal_log_alpha)
            logging.info(self.model.reduce_log_alpha)
            genotype_edge_all = self.model.genotype_edge_all()
            logging.info('genotype_edge_all = %s', genotype_edge_all)

    def train(self, epoch, logging):
        objs = utils.AvgrageMeter()
        top1 = utils.AvgrageMeter()
        top5 = utils.AvgrageMeter()
        grad = utils.AvgrageMeter()

        normal_loss_gradient = 0
        reduce_loss_gradient = 0
        normal_total_gradient = 0
        reduce_total_gradient = 0

        loss_alpha = None

        train_correct_count = 0
        train_correct_cost = 0
        train_correct_entropy = 0
        train_correct_loss = 0
        train_wrong_count = 0
        train_wrong_cost = 0
        train_wrong_entropy = 0
        train_wrong_loss = 0

        count = 0
        for step, (input, target) in enumerate(self.train_queue):

            n = input.size(0)
            input = input.to(self.device)
            target = target.to(self.device, non_blocking=True)
            if self.args.snas:
                logits, logits_aux = self.model(input)
                error_loss = self.criterion(logits, target)
                if self.args.auxiliary:
                    loss_aux = self.criterion(logits_aux, target)
                    error_loss += self.args.auxiliary_weight * loss_aux

            if self.args.dsnas:
                logits, error_loss, loss_alpha = self.model(
                    input,
                    target,
                    self.criterion,
                    update_theta=self.update_theta,
                    update_alpha=self.update_alpha)

            for i in range(logits.size(0)):
                index = logits[i].topk(5, 0, True, True)[1]
                if index[0].item() == target[i].item():
                    train_correct_cost += (
                        -logits[i, target[i].item()] +
                        (F.softmax(logits[i]) * logits[i]).sum())
                    train_correct_count += 1
                    discrete_prob = F.softmax(logits[i], dim=-1)
                    train_correct_entropy += -(
                        discrete_prob * torch.log(discrete_prob)).sum(-1)
                    train_correct_loss += -torch.log(discrete_prob)[
                        target[i].item()]
                else:
                    train_wrong_cost += (
                        -logits[i, target[i].item()] +
                        (F.softmax(logits[i]) * logits[i]).sum())
                    train_wrong_count += 1
                    discrete_prob = F.softmax(logits[i], dim=-1)
                    train_wrong_entropy += -(discrete_prob *
                                             torch.log(discrete_prob)).sum(-1)
                    train_wrong_loss += -torch.log(discrete_prob)[
                        target[i].item()]

            num_normal = self.model.num_normal
            num_reduce = self.model.num_reduce

            if self.args.snas or self.args.dsnas:
                loss = error_loss.clone()

            #self.update_lr()

            # logging gradient
            count += 1
            if self.args.snas:
                self.optimizer.zero_grad()
                self.arch_optimizer.zero_grad()
                error_loss.backward(retain_graph=True)
                if not self.args.random_sample:
                    normal_loss_gradient += self.model.normal_log_alpha.grad
                    reduce_loss_gradient += self.model.reduce_log_alpha.grad
                self.optimizer.zero_grad()
                self.arch_optimizer.zero_grad()

            if self.args.snas and (not self.args.random_sample
                                   and not self.args.dsnas):
                loss.backward()

            if not self.args.random_sample:
                normal_total_gradient += self.model.normal_log_alpha.grad
                reduce_total_gradient += self.model.reduce_log_alpha.grad

            nn.utils.clip_grad_norm_(self.model.parameters(),
                                     self.args.grad_clip)
            arch_grad_norm = nn.utils.clip_grad_norm_(
                self.model.arch_parameters(), 10.)

            grad.update(arch_grad_norm)
            if not self.args.fix_weight and self.update_theta:
                self.optimizer.step()
            self.optimizer.zero_grad()

            if not self.args.random_sample and self.update_alpha:
                self.arch_optimizer.step()
            self.arch_optimizer.zero_grad()

            prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))

            objs.update(error_loss.item(), n)
            top1.update(prec1.item(), n)
            top5.update(prec5.item(), n)

            if step % self.args.report_freq == 0 and self.rank == 0:
                logging.info('train %03d %e %f %f', step, objs.avg, top1.avg,
                             top5.avg)
                self.logger.add_scalar(
                    "iter_train_top1_acc", top1.avg,
                    step + len(self.train_queue.dataset) * epoch)

        if self.rank == 0:
            logging.info('-------loss gradient--------')
            logging.info(normal_loss_gradient / count)
            logging.info(reduce_loss_gradient / count)
            logging.info('-------total gradient--------')
            logging.info(normal_total_gradient / count)
            logging.info(reduce_total_gradient / count)

        logging.info('correct loss ')
        logging.info((train_correct_loss / train_correct_count).item())
        logging.info('correct entropy ')
        logging.info((train_correct_entropy / train_correct_count).item())
        logging.info('correct cost ')
        logging.info((train_correct_cost / train_correct_count).item())
        logging.info('correct count ')
        logging.info(train_correct_count)

        logging.info('wrong loss ')
        logging.info((train_wrong_loss / train_wrong_count).item())
        logging.info('wrong entropy ')
        logging.info((train_wrong_entropy / train_wrong_count).item())
        logging.info('wrong cost ')
        logging.info((train_wrong_cost / train_wrong_count).item())
        logging.info('wrong count ')
        logging.info(train_wrong_count)

        logging.info('total loss ')
        logging.info(((train_correct_loss + train_wrong_loss) /
                      (train_correct_count + train_wrong_count)).item())
        logging.info('total entropy ')
        logging.info(((train_correct_entropy + train_wrong_entropy) /
                      (train_correct_count + train_wrong_count)).item())
        logging.info('total cost ')
        logging.info(((train_correct_cost + train_wrong_cost) /
                      (train_correct_count + train_wrong_count)).item())
        logging.info('total count ')
        logging.info(train_correct_count + train_wrong_count)

        return top1.avg, loss, error_loss, loss_alpha

    def infer(self, epoch):
        objs = utils.AvgrageMeter()
        top1 = utils.AvgrageMeter()
        top5 = utils.AvgrageMeter()

        self.model.eval()
        with torch.no_grad():
            for step, (input, target) in enumerate(self.valid_queue):
                input = input.to(self.device)
                target = target.to(self.device)
                if self.args.snas:
                    logits, logits_aux = self.model(input)
                    loss = self.criterion(logits, target)
                elif self.args.dsnas:
                    logits, error_loss, loss_alpha = self.model(
                        input, target, self.criterion)
                    loss = error_loss

                prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))

                objs.update(loss.item(), input.size(0))
                top1.update(prec1.item(), input.size(0))
                top5.update(prec5.item(), input.size(0))

                if step % self.args.report_freq == 0 and self.rank == 0:
                    logging.info('valid %03d %e %f %f', step, objs.avg,
                                 top1.avg, top5.avg)
                    self.logger.add_scalar(
                        "iter_valid_loss", loss,
                        step + len(self.valid_queue.dataset) * epoch)
                    self.logger.add_scalar(
                        "iter_valid_top1_acc", top1.avg,
                        step + len(self.valid_queue.dataset) * epoch)

        return top1.avg, objs.avg
예제 #12
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(0)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)
    run_start = time.time()
    start_epoch = 0
    dur_time = 0

    criterion_train = ConvSeparateLoss(
        weight=args.aux_loss_weight
    ) if args.sep_loss == 'l2' else TriSeparateLoss(
        weight=args.aux_loss_weight)
    criterion_val = nn.CrossEntropyLoss()

    model = Network(args.init_channels,
                    CIFAR_CLASSES,
                    args.layers,
                    criterion_train,
                    steps=4,
                    multiplier=4,
                    stem_multiplier=3,
                    parse_method=args.parse_method,
                    op_threshold=args.op_threshold)
    model = model.cuda()
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    model_optimizer = torch.optim.SGD(model.parameters(),
                                      args.learning_rate,
                                      momentum=args.momentum,
                                      weight_decay=args.weight_decay)

    arch_optimizer = torch.optim.Adam(model.arch_parameters(),
                                      lr=args.arch_learning_rate,
                                      betas=(0.9, 0.999),
                                      weight_decay=args.arch_weight_decay)

    train_transform, valid_transform = utils._data_transforms_cifar(args)
    train_data = dset.CIFAR10(root=args.data,
                              train=True,
                              download=True,
                              transform=train_transform)

    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(args.train_portion * num_train))

    train_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=True,
        num_workers=2)

    valid_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(
            indices[split:num_train]),
        pin_memory=True,
        num_workers=2)

    architect = Architect(model, args)

    # resume from checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            logging.info("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            start_epoch = checkpoint['epoch']
            dur_time = checkpoint['dur_time']
            model_optimizer.load_state_dict(checkpoint['model_optimizer'])
            architect.arch_optimizer.load_state_dict(
                checkpoint['arch_optimizer'])
            model.restore(checkpoint['network_states'])
            logging.info('=> loaded checkpoint \'{}\'(epoch {})'.format(
                args.resume, start_epoch))
        else:
            logging.info('=> no checkpoint found at \'{}\''.format(
                args.resume))

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        model_optimizer,
        float(args.epochs),
        eta_min=args.learning_rate_min,
        last_epoch=-1 if start_epoch == 0 else start_epoch)
    if args.resume and os.path.isfile(args.resume):
        scheduler.load_state_dict(checkpoint['scheduler'])

    for epoch in range(start_epoch, args.epochs):
        scheduler.step()
        lr = scheduler.get_lr()[0]
        logging.info('epoch %d lr %e', epoch, lr)

        genotype = model.genotype()
        logging.info('genotype = %s', genotype)

        logging.info(F.sigmoid(model.alphas_normal))
        logging.info(F.sigmoid(model.alphas_reduce))
        model.update_history()

        # training and search the model
        train_acc, train_obj = train(train_queue, valid_queue, model,
                                     architect, criterion_train,
                                     model_optimizer, arch_optimizer)
        logging.info('train_acc %f', train_acc)

        # validation the model
        valid_acc, valid_obj = infer(valid_queue, model, criterion_val)
        logging.info('valid_acc %f', valid_acc)

        # save checkpoint
        utils.save_checkpoint(
            {
                'epoch': epoch + 1,
                'dur_time': dur_time + time.time() - run_start,
                'scheduler': scheduler.state_dict(),
                'model_optimizer': model_optimizer.state_dict(),
                'arch_optimizer': architect.optimizer.state_dict(),
                'network_states': model.states(),
            },
            is_best=False,
            save=args.save)
        logging.info('save checkpoint (epoch %d) in %s  dur_time: %s', epoch,
                     args.save,
                     utils.calc_time(dur_time + time.time() - run_start))

        # save operation weights as fig
        utils.save_file(recoder=model.alphas_normal_history,
                        path=os.path.join(args.save, 'normal'))
        utils.save_file(recoder=model.alphas_reduce_history,
                        path=os.path.join(args.save, 'reduce'))

    # save last operations
    np.save(os.path.join(os.path.join(args.save, 'normal_weight.npy')),
            F.sigmoid(model.alphas_normal).data.cpu().numpy())
    np.save(os.path.join(os.path.join(args.save, 'reduce_weight.npy')),
            F.sigmoid(model.alphas_reduce).data.cpu().numpy())
    logging.info('save last weights done')
예제 #13
0
파일: search.py 프로젝트: yuanchunyu/bnas
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info("args = %s", args)

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion)
    model = model.cuda()

    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))
    train_transform, valid_transform = utils._data_transforms_cifar10(args)

    train_data = dset.CIFAR10(root=args.data,
                              train=True,
                              download=True,
                              transform=train_transform)

    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(args.train_portion * num_train))

    train_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=True,
        num_workers=2)

    valid_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(
            indices[split:num_train]),
        pin_memory=True,
        num_workers=2)
    test_data = dset.CIFAR10(root=args.data,
                             train=False,
                             download=True,
                             transform=valid_transform)
    test_queue = torch.utils.data.DataLoader(test_data,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             pin_memory=True,
                                             num_workers=2)

    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs), eta_min=args.learning_rate_min)

    architect = Architect(model, args)
    bin_op = bin_utils_search.BinOp(model, args)
    best_acc = 0.
    best_genotypes = []
    for epoch in range(args.epochs):
        scheduler.step()
        lr = scheduler.get_lr()[0]
        logging.info('epoch %d lr %e', epoch, lr)

        genotype = model.genotype()
        genotype_img = model.genotype(args.gamma)
        logging.info('genotype = %s', genotype)
        logging.info(F.softmax(model.alphas_normal, dim=-1))
        logging.info(F.softmax(model.alphas_reduce, dim=-1))

        # training
        train_acc, train_obj = train(train_queue, valid_queue, model,
                                     architect, criterion, optimizer, lr,
                                     bin_op, epoch)
        logging.info('train_acc %f', train_acc)

        # validation
        valid_acc, valid_obj = infer(valid_queue, model, criterion, bin_op)
        logging.info('valid_acc %f', valid_acc)
        if best_acc < valid_acc:
            best_acc = valid_acc
            if len(best_genotypes) > 0:
                best_genotypes[0] = genotype
                best_genotypes[1] = genotype_img
            else:
                best_genotypes.append(genotype)
                best_genotypes.append(genotype_img)
        utils.save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'arch_param': model.arch_parameters(),
                'val_acc': valid_acc,
                'optimizer': optimizer.state_dict(),
            }, False, args.save)

    with open('./genotypes.py', 'a') as f:
        f.write(args.geno_name + ' = ' + str(best_genotypes[0]) + '\n')
        f.write(args.geno_name + '_img' + ' = ' + str(best_genotypes[1]) +
                '\n')
def train_search(gpu,args):
  print('START TRAIN')
  # Setting random seed
  print("Setting random seed",args.seed)
  np.random.seed(args.seed)
  cudnn.benchmark = True
  torch.manual_seed(args.seed)
  cudnn.enabled=True
  torch.cuda.manual_seed(args.seed)

  torch.cuda.set_device(gpu)

  num_gpu = len([int(i) for i in args.gpu.split(',')])    
  rank = args.nr * num_gpu + gpu
  dist.init_process_group(backend= 'nccl', init_method='env://',  world_size=args.world_size, rank=rank)

  # loss function 
  criterion = nn.CrossEntropyLoss()
  criterion = criterion.cuda(gpu) 
  
  # 初始化模型,构建一个超网,并将其部署到GPU上
  model = Network(args.init_channels, args.CIFAR_CLASSES, args.layers, criterion)
  model = model.cuda(gpu)
  arch_params = list(map(id, model.arch_parameters())) 
  weight_params = filter(lambda p: id(p) not in arch_params, #暂时没看到怎么用
                         model.parameters()) 

  logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

  optimizer = torch.optim.SGD(
      model.parameters(), # 优化器更新的参数
      # weight_params,
      args.learning_rate, # 学习率
      momentum=args.momentum,
      weight_decay=args.weight_decay)

  scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs), eta_min=args.learning_rate_min)


  train_transform, valid_transform = utils._data_transforms_cifar10(args)
  train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)
  # dset:torchvision.dataset的缩写
  '''
  #  FIXME: 在Distributed DataParallel中,看起来无法通过直接指定indices分割数据集
  num_train = len(train_data)
  indices = list(range(num_train))
  split = int(np.floor(args.train_portion * num_train)) # 
  print("使用多线程做dataloader会报错!")
  # 数据集划分为训练和验证集,并打包成有序的结构
  '''
  train_sampler = torch.utils.data.distributed.DistributedSampler(train_data,
      num_replicas= args.world_size,
      rank= rank)
  train_queue = torch.utils.data.DataLoader(
      dataset= train_data, 
      batch_size= args.batch_size,
      shuffle= False,
      sampler= train_sampler,
      pin_memory= True,
      num_workers= 0)
  valid_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform)
  # valid_sampler = torch.utils.data.distributed.DistributedSampler(
  #     valid_data,
  #     num_replicas= args.world_size,
  #     rank= rank
  # )
  valid_queue = torch.utils.data.DataLoader(
      dataset= valid_data, batch_size=args.batch_size,
      pin_memory=True, num_workers=0)
  '''
  # FIXME: 
  train_queue = torch.utils.data.DataLoader(
      train_data, batch_size=args.batch_size,
      sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
      pin_memory=True, num_workers=0)

  valid_queue = torch.utils.data.DataLoader(
      train_data, batch_size=args.batch_size,
      sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]),
      pin_memory=True, num_workers=0)
  '''
  # 在Architecture中创建架构参数和架构参数更新函数
  architect = Architect(model, criterion, args) #有一个专门的architect.py 不知道是干嘛的,train要输入
  
  model = nn.parallel.DistributedDataParallel(model,device_ids=[gpu]) 


  for epoch in range(args.epochs):
    lr = scheduler.get_lr()[0]
    logging.info('epoch %d lr %e', epoch, lr)
    genotype = model.module.genotype() # model_search.py里待搜索的Network类型自带的参数
    logging.info('genotype = %s', genotype)# 打印当前epoch 的cell的网络结构
    print(F.softmax(model.module.alphas_normal, dim=-1))
    print(F.softmax(model.module.alphas_reduce, dim=-1))

    # training
    train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr,
             args, gpu)
    logging.info('train_acc %f', train_acc)

    # validation
    with torch.no_grad():
      valid_acc, valid_obj = infer(valid_queue, model.module, criterion,
             args, gpu)
    logging.info('valid_acc %f', valid_acc)
    scheduler.step()

    if gpu == 0:
      utils.save(model.module, os.path.join(args.save, 'weights.pt'))
예제 #15
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)
    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)
    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()

    model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, args.eta_min, args.reg_flops,
                    args.mu)
    model = model.cuda()
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    optimizer_alpha = torch.optim.SGD(
        model.arch_parameters(),
        args.learning_rate_alpha,
        momentum=args.momentum,
        weight_decay=args.weight_decay_alpha)
    optimizer_omega = torch.optim.SGD(
        model.parameters(),
        args.learning_rate_omega,
        momentum=args.momentum,
        weight_decay=args.weight_decay)

    train_transform, valid_transform = utils._data_transforms_cifar10(args)
    train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)
    train_queue = torch.utils.data.DataLoader(
        train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2)

    epoch = 0
    flops_lambda = 0
    flops_lambda_delta = args.lambda0
    finished = False
    t = 0
    while not finished:
        epoch_start = time.time()
        lr = args.learning_rate_omega
        model.drop_path_prob = 0
        logging.info('epoch %d lr %e flops_weight %e', epoch, lr, flops_lambda)
        train_acc, train_obj = train(train_queue, model, criterion, optimizer_alpha, optimizer_omega, flops_lambda)
        logging.info('train_acc %f', train_acc)
        epoch_duration = time.time() - epoch_start
        logging.info('epoch time: %ds.', epoch_duration)
        pruning_epoch = prune_op(model, args)
        current_flops = model.current_flops() + args.base_flops
        logging.info('current model flops %e', current_flops)
        if pruning_epoch >= args.pruning_n0:
            flops_lambda_delta = args.lambda0
            flops_lambda = flops_lambda / args.c0
        else:
            flops_lambda_delta = flops_lambda_delta * args.c0
            flops_lambda = flops_lambda + flops_lambda_delta
        if current_flops < args.min_flops:
            finished = True
        if pruning_epoch == 0:
            t = t + 1
        else:
            if t > args.stable_round:
                genotype = model.genotype()
                logging.info('genotype = %s', genotype)
            t = 0
        epoch += 1
예제 #16
0
# Horovod: scale learning rate by the number of GPUs.
optimizer = optim.SGD(model.parameters(),
                      lr=args.base_lr * hvd.size(),
                      momentum=args.momentum,
                      weight_decay=args.wd)  #, nesterov=True)

# Horovod: (optional) compression algorithm.
compression = hvd.Compression.fp16 if args.fp16_allreduce else hvd.Compression.none

# Horovod: wrap optimizer with DistributedOptimizer.
optimizer = hvd.DistributedOptimizer(optimizer,
                                     named_parameters=model.named_parameters(),
                                     compression=compression)

arch_optimizer = torch.optim.Adam(model.arch_parameters(),
                                  lr=args.arch_learning_rate,
                                  betas=(0.5, 0.999),
                                  weight_decay=args.arch_weight_decay)

# Restore from a previous checkpoint, if initial_epoch is specified.
# Horovod: restore on the first worker which will broadcast weights to other workers.
if resume_from_epoch > 0 and hvd.rank() == 0:
    filepath = args.checkpoint_format.format(exp=args.save,
                                             epoch=resume_from_epoch)
    checkpoint = torch.load(filepath)
    model.load_state_dict(checkpoint['model'])
    optimizer.load_state_dict(checkpoint['optimizer'])

# Horovod: broadcast parameters & optimizer state.
hvd.broadcast_parameters(model.state_dict(), root_rank=0)
예제 #17
0
def nas(args: Namespace, task: Task, preprocess_func: Compose) -> Module:
    ''' Network Architecture Search method                                                                           
                                                                                                                     
    Given task and preprocess function, this method returns a model output by NAS.                                   
                                                                                                                     
    The implementation of DARTS is available at https://github.com/alphadl/darts.pytorch1.1                          
    '''

    # TODO: Replace model with the output by NAS

    args.save = 'search-{}-{}'.format(args.save,
                                      time.strftime("%Y%m%d-%H%M%S"))
    utils.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py'))

    log_format = '%(asctime)s %(message)s'
    logging.basicConfig(stream=sys.stdout,
                        level=logging.INFO,
                        format=log_format,
                        datefmt='%m/%d %I:%M:%S %p')
    fh = logging.FileHandler(os.path.join(args.save, 'log.txt'))
    fh.setFormatter(logging.Formatter(log_format))
    logging.getLogger().addHandler(fh)

    CLASSES = task.n_classes

    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    # torch.cuda.set_device(args.gpu)
    #gpus = [int(args.gpu)]
    gpus = [int(i) for i in args.gpu.split(',')]
    if len(gpus) == 1:
        torch.cuda.set_device(int(args.gpu))

    # cudnn.benchmark = True
    torch.manual_seed(args.seed)
    # cudnn.enabled=True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %s' % args.gpu)
    logging.info("args = %s", args)

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    model = Network(args.init_channels, CLASSES, args.layers, criterion)
    model = model.cuda()
    if len(gpus) > 1:
        print("True")
        model = nn.parallel.DataParallel(model,
                                         device_ids=gpus,
                                         output_device=gpus[0])
        model = model.module

    arch_params = list(map(id, model.arch_parameters()))
    weight_params = filter(lambda p: id(p) not in arch_params,
                           model.parameters())

    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    optimizer = torch.optim.SGD(
        # model.parameters(),
        weight_params,
        args.learning_rate,
        momentum=args.momentum,
        weight_decay=args.weight_decay)
    optimizer = nn.DataParallel(optimizer, device_ids=gpus)

    if task.name == 'cifar100':
        train_data = dset.CIFAR100(root=args.data,
                                   train=True,
                                   download=True,
                                   transform=preprocess_func)
        #train_transform, valid_transform = utils._data_transforms_cifar10(args)
        #train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=train_transform)

    elif task.name == 'cifar10':
        train_data = dset.CIFAR10(root=args.data,
                                  train=True,
                                  download=True,
                                  transform=preprocess_func)

    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(args.train_portion * num_train))

    train_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=True,
        num_workers=2)

    valid_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(
            indices[split:num_train]),
        pin_memory=True,
        num_workers=2)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer.module, float(args.epochs), eta_min=args.learning_rate_min)

    architect = Architect(model, criterion, args)

    for epoch in range(args.epochs):
        scheduler.step()
        lr = scheduler.get_lr()[0]
        logging.info('epoch %d lr %e', epoch, lr)

        genotype = model.genotype()
        logging.info('genotype = %s', genotype)

        print(F.softmax(model.alphas_normal, dim=-1))
        print(F.softmax(model.alphas_reduce, dim=-1))

        # training
        train_acc, train_obj = train(args, train_queue, valid_queue, model,
                                     architect, criterion, optimizer, lr)
        logging.info('train_acc %f', train_acc)

        # validation
        with torch.no_grad():
            valid_acc, valid_obj = infer(args, valid_queue, model, criterion)
        logging.info('valid_acc %f', valid_acc)

        utils.save(model, os.path.join(args.save, 'weights.pt'))

    # return a neural network model (torch.nn.Module)

    genotype = model.genotype()
    logging.info('genotype = %s', genotype)

    model = NetworkClassification(36, task.n_classes, 20, False, genotype)

    return model
예제 #18
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = {}'.format(args.gpus))
    logging.info("args = %s", args)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    criterion = nn.CrossEntropyLoss().to(device)
    criterion = criterion.to(device)
    model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion)

    model = model.to(device)
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    arch_optimizer = torch.optim.Adam(model.arch_parameters(),
                                      lr=args.arch_learning_rate,
                                      betas=(0.5, 0.999),
                                      weight_decay=args.arch_weight_decay)
    train_transform, valid_transform = utils._data_transforms_cifar10(args)
    train_data = dset.CIFAR10(root=args.data,
                              train=True,
                              download=True,
                              transform=train_transform)

    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(args.train_portion * num_train))

    train_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=True,
        num_workers=4)

    valid_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(
            indices[split:num_train]),
        pin_memory=True,
        num_workers=4)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs), eta_min=args.learning_rate_min)

    architect = Architect(model, arch_optimizer, args)

    for epoch in range(args.epochs):
        scheduler.step()
        lr = scheduler.get_lr()[0]
        logging.info('epoch %d lr %e', epoch, lr)

        genotype = model.genotype()
        logging.info('genotype = %s', genotype)

        print(F.softmax(model.alphas_normal, dim=-1))
        print(F.softmax(model.alphas_reduce, dim=-1))

        # training
        train_acc, train_obj = train(train_queue, valid_queue, model,
                                     architect, criterion, optimizer, lr,
                                     device)
        logging.info('train_acc %f', train_acc)

        # validation
        valid_acc, valid_obj = infer(valid_queue, model, criterion, device)
        logging.info('valid_acc %f', valid_acc)

        utils.save(model, os.path.join(args.save, 'weights.pt'))
예제 #19
0
def main():
    if not torch.cuda.is_available():
        logging.info('No GPU device available')
        sys.exit(1)
    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled=True
    torch.cuda.manual_seed(args.seed)
    logging.info('GPU device = %d' % args.gpu)
    logging.info("args = %s", args)
    #  prepare dataset
    train_transform, valid_transform = utils.data_transforms(args.dataset,args.cutout,args.cutout_length)
    if args.dataset == "CIFAR100":
        train_data = dset.CIFAR100(root=args.tmp_data_dir, train=True, download=True, transform=train_transform)
    elif args.dataset == "CIFAR10":
        train_data = dset.CIFAR10(root=args.tmp_data_dir, train=True, download=True, transform=train_transform)
    elif args.dataset == 'mit67':
        dset_cls = dset.ImageFolder
        data_path = '%s/MIT67/train' % args.tmp_data_dir  # 'data/MIT67/train'
        val_path = '%s/MIT67/test' % args.tmp_data_dir  # 'data/MIT67/val'
        train_data = dset_cls(root=data_path, transform=train_transform)
        valid_data = dset_cls(root=val_path, transform=valid_transform)
    elif args.dataset == 'sport8':
        dset_cls = dset.ImageFolder
        data_path = '%s/Sport8/train' % args.tmp_data_dir  # 'data/Sport8/train'
        val_path = '%s/Sport8/test' % args.tmp_data_dir  # 'data/Sport8/val'
        train_data = dset_cls(root=data_path, transform=train_transform)
        valid_data = dset_cls(root=val_path, transform=valid_transform)

    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(args.train_portion * num_train))
    random.shuffle(indices)
    
    train_queue = torch.utils.data.DataLoader(
        train_data, batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=True, num_workers=args.workers)

    valid_queue = torch.utils.data.DataLoader(
        train_data, batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]),
        pin_memory=True, num_workers=args.workers)
    
    # build Network
    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    switches = []
    for i in range(14):
        switches.append([True for j in range(len(PRIMITIVES))])
    switches_normal = copy.deepcopy(switches)
    switches_reduce = copy.deepcopy(switches)
    # To be moved to args
    num_to_keep = [5, 3, 1]
    num_to_drop = [3, 2, 2]
    if len(args.add_width) == 3:
        add_width = args.add_width
    else:
        add_width = [0, 0, 0]
    if len(args.add_layers) == 3:
        add_layers = args.add_layers
    else:
        add_layers = [0, 3, 6]
    if len(args.dropout_rate) ==3:
        drop_rate = args.dropout_rate
    else:
        drop_rate = [0.0, 0.0, 0.0]
    eps_no_archs = [10, 10, 10]
    for sp in range(len(num_to_keep)):
        model = Network(args.init_channels + int(add_width[sp]), CLASSES, args.layers + int(add_layers[sp]), criterion, switches_normal=switches_normal, switches_reduce=switches_reduce, p=float(drop_rate[sp]), largemode=args.dataset in utils.LARGE_DATASETS)
        
        model = model.cuda()
        logging.info("param size = %fMB", utils.count_parameters_in_MB(model))
        network_params = []
        for k, v in model.named_parameters():
            if not (k.endswith('alphas_normal') or k.endswith('alphas_reduce')):
                network_params.append(v)       
        optimizer = torch.optim.SGD(
                network_params,
                args.learning_rate,
                momentum=args.momentum,
                weight_decay=args.weight_decay)
        optimizer_a = torch.optim.Adam(model.arch_parameters(),
                    lr=args.arch_learning_rate, betas=(0.5, 0.999), weight_decay=args.arch_weight_decay)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
                optimizer, float(args.epochs), eta_min=args.learning_rate_min)
        sm_dim = -1
        epochs = args.epochs
        eps_no_arch = eps_no_archs[sp]
        scale_factor = 0.2
        for epoch in range(epochs):
            scheduler.step()
            lr = scheduler.get_lr()[0]
            logging.info('Epoch: %d lr: %e', epoch, lr)
            epoch_start = time.time()
            # training
            if epoch < eps_no_arch:
                model.p = float(drop_rate[sp]) * (epochs - epoch - 1) / epochs
                model.update_p()
                train_acc, train_obj = train(train_queue, valid_queue, model, network_params, criterion, optimizer, optimizer_a, lr, train_arch=False)
            else:
                model.p = float(drop_rate[sp]) * np.exp(-(epoch - eps_no_arch) * scale_factor) 
                model.update_p()                
                train_acc, train_obj = train(train_queue, valid_queue, model, network_params, criterion, optimizer, optimizer_a, lr, train_arch=True)
            logging.info('Train_acc %f', train_acc)
            epoch_duration = time.time() - epoch_start
            logging.info('Epoch time: %ds', epoch_duration)
            # validation
            if epochs - epoch < 5:
                valid_acc, valid_obj = infer(valid_queue, model, criterion)
                logging.info('Valid_acc %f', valid_acc)
        utils.save(model, os.path.join(args.save, 'weights.pt'))
        print('------Dropping %d paths------' % num_to_drop[sp])
        # Save switches info for s-c refinement. 
        if sp == len(num_to_keep) - 1:
            switches_normal_2 = copy.deepcopy(switches_normal)
            switches_reduce_2 = copy.deepcopy(switches_reduce)
        # drop operations with low architecture weights
        arch_param = model.arch_parameters()
        normal_prob = F.softmax(arch_param[0], dim=sm_dim).data.cpu().numpy()        
        for i in range(14):
            idxs = []
            for j in range(len(PRIMITIVES)):
                if switches_normal[i][j]:
                    idxs.append(j)
            if sp == len(num_to_keep) - 1:
                drop = get_min_k_no_zero(normal_prob[i, :], idxs, num_to_drop[sp])
            else:
                drop = get_min_k(normal_prob[i, :], num_to_drop[sp])
            for idx in drop:
                switches_normal[i][idxs[idx]] = False
        reduce_prob = F.softmax(arch_param[1], dim=-1).data.cpu().numpy()
        for i in range(14):
            idxs = []
            for j in range(len(PRIMITIVES)):
                if switches_reduce[i][j]:
                    idxs.append(j)
            if sp == len(num_to_keep) - 1:
                drop = get_min_k_no_zero(reduce_prob[i, :], idxs, num_to_drop[sp])
            else:
                drop = get_min_k(reduce_prob[i, :], num_to_drop[sp])
            for idx in drop:
                switches_reduce[i][idxs[idx]] = False
        logging.info('switches_normal = %s', switches_normal)
        logging_switches(switches_normal)
        logging.info('switches_reduce = %s', switches_reduce)
        logging_switches(switches_reduce)
        
        if sp == len(num_to_keep) - 1:
            arch_param = model.arch_parameters()
            normal_prob = F.softmax(arch_param[0], dim=sm_dim).data.cpu().numpy()
            reduce_prob = F.softmax(arch_param[1], dim=sm_dim).data.cpu().numpy()
            normal_final = [0 for idx in range(14)]
            reduce_final = [0 for idx in range(14)]
            # remove all Zero operations
            for i in range(14):
                if switches_normal_2[i][0] == True:
                    normal_prob[i][0] = 0
                normal_final[i] = max(normal_prob[i])
                if switches_reduce_2[i][0] == True:
                    reduce_prob[i][0] = 0
                reduce_final[i] = max(reduce_prob[i])                
            # Generate Architecture
            keep_normal = [0, 1]
            keep_reduce = [0, 1]
            n = 3
            start = 2
            for i in range(3):
                end = start + n
                tbsn = normal_final[start:end]
                tbsr = reduce_final[start:end]
                edge_n = sorted(range(n), key=lambda x: tbsn[x])
                keep_normal.append(edge_n[-1] + start)
                keep_normal.append(edge_n[-2] + start)
                edge_r = sorted(range(n), key=lambda x: tbsr[x])
                keep_reduce.append(edge_r[-1] + start)
                keep_reduce.append(edge_r[-2] + start)
                start = end
                n = n + 1
            for i in range(14):
                if not i in keep_normal:
                    for j in range(len(PRIMITIVES)):
                        switches_normal[i][j] = False
                if not i in keep_reduce:
                    for j in range(len(PRIMITIVES)):
                        switches_reduce[i][j] = False
            # translate switches into genotype
            genotype = parse_network(switches_normal, switches_reduce)
            logging.info(genotype)
            ## restrict skipconnect (normal cell only)
            logging.info('Restricting skipconnect...')
            for sks in range(0, len(PRIMITIVES)+1):
                max_sk = len(PRIMITIVES) - sks
                num_sk = check_sk_number(switches_normal)
                if num_sk < max_sk:
                    continue
                while num_sk > max_sk:
                    normal_prob = delete_min_sk_prob(switches_normal, switches_normal_2, normal_prob)
                    switches_normal = keep_1_on(switches_normal_2, normal_prob)
                    switches_normal = keep_2_branches(switches_normal, normal_prob)
                    num_sk = check_sk_number(switches_normal)
                logging.info('Number of skip-connect: %d', max_sk)
                genotype = parse_network(switches_normal, switches_reduce)
                logging.info(genotype)
    with open(args.save + "/best_genotype.txt", "w") as f:
        f.write(str(genotype))
예제 #20
0
from option.default_option import TrainOptions
import os 
import tqdm
import warnings
warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt

os.environ["CUDA_VISIBLE_DEVICES"] = '0'
device = torch.device('cuda')
opt = TrainOptions()
CIFAR_CLASSES = 10
criterion = nn.CrossEntropyLoss().cuda()
model = Network(opt.init_channels, CIFAR_CLASSES, opt.layers, criterion)
model.cuda()
optimizer_model = torch.optim.SGD(model.parameters(),lr= 0.025,momentum = 0.9, weight_decay=3e-4)
optimizer_arch = torch.optim.Adam(model.arch_parameters(),lr = 3e-4, betas=(0.5, 0.999), weight_decay = 1e-3)

train_transform, valid_transform = utils._data_transforms_cifar10(opt)
train_data = dset.CIFAR10(root='../', train=True, download=True, transform=train_transform)

num_train = len(train_data)
indices = list(range(num_train))

####DATALOADER 수정 필요한부분 
train_queue = torch.utils.data.DataLoader(
  train_data, batch_size=opt.batch_size,
  sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:5000]),
  pin_memory=True, num_workers=2)

valid_queue = torch.utils.data.DataLoader(
  train_data, batch_size=opt.batch_size,