Ejemplo n.º 1
0
    def __init__(self):
        super(Helper, self).__init__()

        self.args._save = copy(self.args.save)
        self.args.save = '{}/{}/{}/{}_{}-{}'.format(self.args.save,
                                                    self.args.space,
                                                    self.args.dataset,
                                                    self.args.drop_path_prob,
                                                    self.args.weight_decay,
                                                    self.args.job_id)

        utils.create_exp_dir(self.args.save)

        config_filename = os.path.join(self.args._save, 'config.yaml')
        if not os.path.exists(config_filename):
            with open(config_filename, 'w') as f:
                yaml.dump(self.args_to_log, f, default_flow_style=False)

        if self.args.dataset != 'cifar100':
            self.args.n_classes = 10
        else:
            self.args.n_classes = 100

        # set cutout to False if the drop_prob is 0
        if self.args.drop_path_prob == 0:
            self.args.cutout = False
Ejemplo n.º 2
0
def run_bohb(exp_name, log_dir='EXP', iterations=20):
    
    run_dir = 'bohb-{}-{}'.format(log_dir, exp_name)
    if not os.path.exists(run_dir):
        utils.create_exp_dir(run_dir, scripts_to_save=glob.glob('*.py'))

    # log_format = '%(asctime)s %(message)s'
    # logging.basicConfig(stream=sys.stdout, level=logging.INFO,
    #     format=log_format, datefmt='%m/%d %I:%M:%S %p')
    # fh = logging.FileHandler(os.path.join(run_dir, 'log.txt'))
    # fh.setFormatter(logging.Formatter(log_format))
    # logging.getLogger().addHandler(fh)

    result_logger = hpres.json_result_logger(directory=run_dir, overwrite=True)

    # Start a nameserver
    NS = hpns.NameServer(run_id=exp_name, host='127.0.0.1', port=0)
    ns_host, ns_port = NS.start()

    # Start a localserver
    worker = TorchWorker(run_id=exp_name, host='127.0.0.1', nameserver=ns_host, nameserver_port=ns_port,
                        timeout=120, run_dir=run_dir)
    worker.run(background=True)

    # Initialise optimiser
    bohb = BOHB(configspace=worker.get_configspace(),
                run_id=exp_name,
                host='127.0.0.1',
                nameserver=ns_host,
                nameserver_port=ns_port,
                result_logger=result_logger,
                min_budget=2, max_budget=5,
                )
    print('Worker running')
    res = bohb.run(n_iterations=iterations)
    # Store the results
    with open(os.path.join(run_dir, 'result.pkl'), 'wb') as file:
        pickle.dump(res, file)
    
    # Shutdown
    bohb.shutdown(shutdown_workers=True)
    NS.shutdown()

    # get all runs
    all_runs = res.get_all_runs()

    # get id to configuration mapping as dictionary
    id2conf = res.get_id2config_mapping()

    # get best/incubent run
    best_run = res.get_incumbent_id()
    best_config = id2conf[best_run]['config']
    
    print(f"Best run id:{best_run}, \n Config:{best_config}")

    # Store all run info
    file = open(os.path.join(run_dir, 'summary.txt'), 'w')
    file.write(f"{all_runs}")
    file.close()
Ejemplo n.º 3
0
    def __init__(self, config_file):
        self.args = utils.config_parser(config_file)
        utils.print_args(self.args)

        self.args._save = copy(self.args.save)
        self.args.save = '{}/{}'.format(self.args.save, self.args.dataset)

        utils.create_exp_dir(self.args.save)

        if self.args.dataset != 'cifar100':
            self.args.n_classes = 10
        else:
            self.args.n_classes = 100
Ejemplo n.º 4
0
    def __init__(self):
        super(Helper, self).__init__()

        self.args._save = copy(self.args.save)
        self.args.save = '{}/{}/{}/{}_{}-{}'.format(
            self.args.save, self.args.space, self.args.dataset,
            self.args.search_dp, self.args.search_wd, self.args.job_id)

        utils.create_exp_dir(self.args.save)

        config_filename = os.path.join(self.args._save, 'config.yaml')
        if not os.path.exists(config_filename):
            with open(config_filename, 'w') as f:
                yaml.dump(self.args_to_log, f, default_flow_style=False)

        if self.args.dataset != 'cifar100':
            self.args.n_classes = 10
        else:
            self.args.n_classes = 100
Ejemplo n.º 5
0
def main(genome,
         epochs,
         search_space='micro',
         save='Design_1',
         expr_root='search',
         seed=0,
         gpu=0,
         init_channels=24,
         layers=11,
         auxiliary=False,
         cutout=False,
         drop_path_prob=0.0,
         data_path="../data",
         dataset="CIFAR10"):

    # ---- train logger ----------------- #
    save_pth = os.path.join(expr_root, '{}'.format(save))
    utils.create_exp_dir(save_pth)
    log_format = '%(asctime)s %(message)s'
    logging.basicConfig(stream=sys.stdout,
                        level=logging.INFO,
                        format=log_format,
                        datefmt='%m/%d %I:%M:%S %p')
    # fh = logging.FileHandler(os.path.join(save_pth, 'log.txt'))
    # fh.setFormatter(logging.Formatter(log_format))
    # logging.getLogger().addHandler(fh)

    # ---- parameter values setting ----- #
    if dataset == "CIFAR10":
        CLASSES = 10
    elif dataset == "CIFAR100":
        CLASSES = 100
    elif dataset == "Sport8":
        CLASSES = 8
    elif dataset == "MIT67":
        CLASSES = 67
    elif dataset == "flowers102":
        CLASSES = 102
    learning_rate = 0.025
    momentum = 0.9
    weight_decay = 3e-4
    data_root = data_path
    batch_size = 128
    cutout_length = 16
    auxiliary_weight = 0.4
    grad_clip = 5
    report_freq = 50
    train_params = {
        'auxiliary': auxiliary,
        'auxiliary_weight': auxiliary_weight,
        'grad_clip': grad_clip,
        'report_freq': report_freq,
    }

    if search_space == 'micro':
        genotype = micro_encoding.decode(genome)
        if dataset == "CIFAR10" or dataset == "CIFAR100":
            model = NetworkCIFAR(init_channels, CLASSES, layers, auxiliary,
                                 genotype)
        else:
            model = NetworkImageNet(init_channels, CLASSES, layers, auxiliary,
                                    genotype)
    elif search_space == 'macro':
        genotype = macro_encoding.decode(genome)
        channels = [(3, init_channels), (init_channels, 2 * init_channels),
                    (2 * init_channels, 4 * init_channels)]
        model = EvoNetwork(genotype,
                           channels,
                           CLASSES, (32, 32),
                           decoder='residual')
    else:
        raise NameError('Unknown search space type')

    # logging.info("Genome = %s", genome)
    logging.info("Architecture = %s", genotype)

    torch.cuda.set_device(gpu)
    cudnn.benchmark = True
    torch.manual_seed(seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(seed)

    n_params = (np.sum(
        np.prod(v.size())
        for v in filter(lambda p: p.requires_grad, model.parameters())) / 1e6)
    model = model.to(device)

    logging.info("param size = %fMB", n_params)

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()

    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = torch.optim.SGD(parameters,
                                learning_rate,
                                momentum=momentum,
                                weight_decay=weight_decay)
    if dataset == "CIFAR10" or dataset == "CIFAR100":
        MEAN = [0.49139968, 0.48215827, 0.44653124]
        STD = [0.24703233, 0.24348505, 0.26158768]

        train_transform = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor()
        ])

        if cutout:
            train_transform.transforms.append(utils.Cutout(cutout_length))

        train_transform.transforms.append(transforms.Normalize(MEAN, STD))

        valid_transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(MEAN, STD),
        ])
    if dataset == "CIFAR10":
        train_data = my_cifar10.CIFAR10(root=data_root,
                                        train=True,
                                        download=True,
                                        transform=train_transform)
        valid_data = my_cifar10.CIFAR10(root=data_root,
                                        train=True,
                                        download=True,
                                        transform=valid_transform)  #dunno
    elif dataset == "CIFAR100":
        train_data = dset.CIFAR100(root=data_root,
                                   train=True,
                                   download=True,
                                   transform=train_transform)
        valid_data = dset.CIFAR100(root=data_root,
                                   train=True,
                                   download=True,
                                   transform=valid_transform)
    else:
        MEAN = [0.485, 0.456, 0.406]
        STD = [0.229, 0.224, 0.225]
        transf_train = [
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ColorJitter(brightness=0.4,
                                   contrast=0.4,
                                   saturation=0.4,
                                   hue=0.2)
        ]
        transf_val = [
            transforms.Resize(256),
            transforms.CenterCrop(224),
        ]
        normalize = [transforms.ToTensor(), transforms.Normalize(MEAN, STD)]

        train_transform = transforms.Compose(transf_train + normalize)
        valid_transform = transforms.Compose(transf_val + normalize)
        if cutout:
            train_transform.transforms.append(utils.Cutout(cutout_length))

        train_data = dset.ImageFolder(root=data_path + "/" + dataset +
                                      "/train",
                                      transform=train_transform)
        valid_data = dset.ImageFolder(root=data_path + "/" + dataset + "/test",
                                      transform=valid_transform)

    n_train = len(train_data)
    split = n_train // 2
    indices = list(range(n_train))
    random.shuffle(indices)
    train_sampler = torch.utils.data.sampler.SubsetRandomSampler(
        indices[:split])
    valid_sampler = torch.utils.data.sampler.SubsetRandomSampler(
        indices[split:])

    train_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=True,
        num_workers=4)

    valid_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(
            indices[split:n_train]),
        pin_memory=True,
        num_workers=4)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, int(epochs))

    for epoch in range(epochs):
        scheduler.step()
        logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0])
        model.droprate = drop_path_prob * epoch / epochs

        train_acc, train_obj = train(train_queue, model, criterion, optimizer,
                                     train_params)
        logging.info('train_acc %f', train_acc)

    valid_acc, valid_obj = infer(valid_queue, model, criterion)
    logging.info('valid_acc %f', valid_acc)

    # calculate for flops
    model = add_flops_counting_methods(model)
    model.eval()
    model.start_flops_count()
    random_data = torch.randn(1, 3, 32, 32)  #to change
    model(torch.autograd.Variable(random_data).to(device))
    n_flops = np.round(model.compute_average_flops_cost() / 1e6, 4)
    logging.info('flops = %f', n_flops)

    # save to file
    # os.remove(os.path.join(save_pth, 'log.txt'))
    with open(os.path.join(save_pth, 'log.txt'), "w") as file:
        file.write("Genome = {}\n".format(genome))
        file.write("Architecture = {}\n".format(genotype))
        file.write("param size = {}MB\n".format(n_params))
        file.write("flops = {}MB\n".format(n_flops))
        file.write("valid_acc = {}\n".format(valid_acc))
    # logging.info("Architecture = %s", genotype))
    with open(os.path.join(save_pth, 'genotype.txt'), "w") as f:
        f.write(str(genotype))
    return {
        'valid_acc': valid_acc,
        'params': n_params,
        'flops': n_flops,
    }
Ejemplo n.º 6
0
def main(genome,
         epochs,
         search_space='micro',
         save='Design_1',
         expr_root='search',
         seed=0,
         gpu=0,
         init_channels=24,
         layers=11,
         auxiliary=False,
         cutout=False,
         drop_path_prob=0.0,
         train_dataset="",
         val_dataset=""):

    # ---- train logger ----------------- #
    save_pth = os.path.join(expr_root, '{}'.format(save))
    utils.create_exp_dir(save_pth)
    log_format = '%(asctime)s %(message)s'
    logging.basicConfig(stream=sys.stdout,
                        level=logging.INFO,
                        format=log_format,
                        datefmt='%m/%d %I:%M:%S %p')
    # fh = logging.FileHandler(os.path.join(save_pth, 'log.txt'))
    # fh.setFormatter(logging.Formatter(log_format))
    # logging.getLogger().addHandler(fh)

    # ---- parameter values setting ----- #
    NUM_CLASSES = 4
    CIFAR_CLASSES = NUM_CLASSES
    DATA_SHAPE = (128, 128)
    INPUT_CHANNELS = 3
    learning_rate = 0.025
    momentum = 0.9
    weight_decay = 3e-4
    data_root = '../data'
    batch_size = 16
    cutout_length = 16
    auxiliary_weight = 0.4
    grad_clip = 5
    report_freq = 50
    train_params = {
        'auxiliary': auxiliary,
        'auxiliary_weight': auxiliary_weight,
        'grad_clip': grad_clip,
        'report_freq': report_freq,
    }

    if search_space == 'micro':
        genotype = micro_encoding.decode(genome)
        model = Network(init_channels, CIFAR_CLASSES, layers, auxiliary,
                        genotype)
    elif search_space == 'macro':
        genotype = macro_encoding.decode(genome)
        channels = [(INPUT_CHANNELS, init_channels),
                    (init_channels, 2 * init_channels),
                    (2 * init_channels, 4 * init_channels)]
        model = EvoNetwork(genotype,
                           channels,
                           CIFAR_CLASSES,
                           DATA_SHAPE,
                           decoder='residual')
    else:
        raise NameError('Unknown search space type')

    # logging.info("Genome = %s", genome)
    logging.info("Architecture = %s", genotype)

    torch.cuda.set_device(gpu)
    cudnn.benchmark = True
    torch.manual_seed(seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(seed)

    n_params = (np.sum(
        np.prod(v.size())
        for v in filter(lambda p: p.requires_grad, model.parameters())) / 1e6)
    model = model.to(device)

    logging.info("param size = %fMB", n_params)

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()

    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = torch.optim.SGD(parameters,
                                learning_rate,
                                momentum=momentum,
                                weight_decay=weight_decay)

    #TODO: change
    CIFAR_MEAN = [0.49139968, 0.48215827, 0.44653124]
    DATASET_MEAN = [0.4785047, 0.45649716, 0.42604172]
    CIFAR_MEAN = DATASET_MEAN
    DATASET_STD = [0.31962952, 0.3112294, 0.31206125]
    CIFAR_STD = [0.24703233, 0.24348505, 0.26158768]
    CIFAR_STD = DATASET_STD
    #     # data agumentation
    #     train_transform = transforms.Compose([
    #         transforms.RandomCrop(32, padding=4),
    #         transforms.RandomHorizontalFlip(),
    #         transforms.ToTensor()
    #     ])

    #     if cutout:
    #         train_transform.transforms.append(utils.Cutout(cutout_length))

    #     train_transform.transforms.append(transforms.Normalize(CIFAR_MEAN, CIFAR_STD))

    #     valid_transform = transforms.Compose([
    #         transforms.ToTensor(),
    #         transforms.Normalize(CIFAR_MEAN, CIFAR_STD),
    #     ])

    #     train_data = my_cifar10.CIFAR10(root=data_root, train=True, download=True, transform=train_transform)
    #     valid_data = my_cifar10.CIFAR10(root=data_root, train=False, download=True, transform=valid_transform)

    #     # num_train = len(train_data)
    #     # indices = list(range(num_train))
    #     # split = int(np.floor(train_portion * num_train))
    train_data = train_dataset
    valid_data = val_dataset
    train_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=batch_size,
        # sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=True,
        num_workers=4)

    valid_queue = torch.utils.data.DataLoader(
        valid_data,
        batch_size=batch_size,
        # sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]),
        pin_memory=True,
        num_workers=4)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, int(epochs))

    for epoch in range(epochs):
        scheduler.step()
        logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0])
        model.droprate = drop_path_prob * epoch / epochs

        train_acc, train_obj = train(train_queue, model, criterion, optimizer,
                                     train_params)
        logging.info('train_acc %f', train_acc)

    valid_acc, valid_obj = infer(valid_queue, model, criterion)
    logging.info('valid_acc %f', valid_acc)

    # calculate for flops
    model = add_flops_counting_methods(model)
    model.eval()
    model.start_flops_count()
    random_data = torch.randn(1, INPUT_CHANNELS, *DATA_SHAPE)
    model(torch.autograd.Variable(random_data).to(device))
    n_flops = np.round(model.compute_average_flops_cost() / 1e6, 4)
    logging.info('flops = %f', n_flops)

    # save to file
    # os.remove(os.path.join(save_pth, 'log.txt'))
    with open(os.path.join(save_pth, 'log.txt'), "w") as file:
        file.write("Genome = {}\n".format(genome))
        file.write("Architecture = {}\n".format(genotype))
        file.write("param size = {}MB\n".format(n_params))
        file.write("flops = {}MB\n".format(n_flops))
        file.write("valid_acc = {}\n".format(valid_acc))

    # logging.info("Architecture = %s", genotype))

    return {
        'valid_acc': valid_acc,
        'params': n_params,
        'flops': n_flops,
    }
Ejemplo n.º 7
0
      
        #print(step)
        #if (step + 1) % 10 == 0:
        #break
    #print("Finished in {} seconds".format((time.time() - valid_start) ))

    logging.info("[{} Generation] {}/{} finished with validation loss: {}, prec1: {}, prec5: {}".format(gen, i+1, len(population.get_population()), 
                                                      population.get_population()[i].objs.avg, 
                                                      population.get_population()[i].top1.avg, 
                                                      population.get_population()[i].top5.avg))
    #break

DIR = "search-{}-{}".format(time.strftime("%Y%m%d-%H%M%S"), args.dataset)
if args.dir is not None:
  if not os.path.exists(args.dir):
    utils.create_exp_dir(args.dir)
  DIR = os.path.join(args.dir, DIR)
else:
  DIR = os.path.join(os.getcwd(), DIR)
utils.create_exp_dir(DIR)
utils.create_exp_dir(os.path.join(DIR, "weights"))
log_format = '%(asctime)s %(message)s'
logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p')
fh = logging.FileHandler(os.path.join(DIR, 'log.txt'))
fh.setFormatter(logging.Formatter(log_format))
logging.getLogger().addHandler(fh)

# Initializing the summary writer
writer = SummaryWriter(os.path.join(DIR, 'runs'))

torch.manual_seed(args.seed)
Ejemplo n.º 8
0
def main():
    utils.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py'))
    print(args)

    seed = random.randint(1, 100000000)
    print(seed)

    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    cudnn.enabled = True

    n_channels = 3
    n_bins = 2.**args.n_bits

    # Define model and loss criteria
    model = SearchNetwork(n_channels,
                          args.n_flow,
                          args.n_block,
                          n_bins,
                          affine=args.affine,
                          conv_lu=not args.no_lu)
    model = nn.DataParallel(model, [args.gpu])
    model.load_state_dict(
        torch.load("architecture.pt", map_location="cuda:{}".format(args.gpu)))
    model = model.module
    genotype = model.sample_architecture()

    with open(args.save + '/genotype.pkl', 'wb') as fp:
        pickle.dump(genotype, fp)

    model_single = EnsembleNetwork(n_channels,
                                   args.n_flow,
                                   args.n_block,
                                   n_bins,
                                   genotype,
                                   affine=args.affine,
                                   conv_lu=not args.no_lu)
    model = model_single
    model = model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), args.learning_rate)

    dataset = iter(sample_cifar10(args.batch, args.img_size))

    # Sample generated images
    z_sample = []
    z_shapes = calc_z_shapes(n_channels, args.img_size, args.n_flow,
                             args.n_block)
    for z in z_shapes:
        z_new = torch.randn(args.n_sample, *z) * args.temp
        z_sample.append(z_new.to(device))

    with tqdm(range(args.iter)) as pbar:
        for i in pbar:
            # Training procedure
            model.train()

            # Get a random minibatch from the search queue with replacement
            input, _ = next(dataset)
            input = Variable(input,
                             requires_grad=False).cuda(non_blocking=True)

            log_p, logdet, _ = model(input + torch.rand_like(input) / n_bins)

            logdet = logdet.mean()
            loss, _, _ = likelihood_loss(log_p, logdet, args.img_size, n_bins)

            # Optimize model
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            pbar.set_description("Loss: {}".format(loss.item()))

            # Save generated samples
            if i % 100 == 0:
                with torch.no_grad():
                    tvutils.save_image(
                        model_single.reverse(z_sample).cpu().data,
                        "{}/samples/{}.png".format(args.save,
                                                   str(i + 1).zfill(6)),
                        normalize=False,
                        nrow=10,
                    )

            # Save checkpoint
            if i % 1000 == 0:
                utils.save(model, os.path.join(args.save, 'latest_weights.pt'))
Ejemplo n.º 9
0
def main():
    if args.load_path:
        args.save = Path(args.load_path) / 'eval-{}-{}'.format(
            args.save, time.strftime("%Y%m%d-%H%M%S"))
    else:
        args.save = Path('logs') / 'eval-{}-{}'.format(
            args.save, time.strftime("%Y%m%d-%H%M%S"))
    utils.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py'))

    log_format = '%(asctime)s %(message)s'
    logging.basicConfig(stream=sys.stdout,
                        level=logging.INFO,
                        format=log_format,
                        datefmt='%m/%d %I:%M:%S %p')
    fh = logging.FileHandler(args.save / 'log.txt')
    fh.setFormatter(logging.Formatter(log_format))
    logging.getLogger().addHandler(fh)

    np.random.seed(args.seed)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info("args = %s", args)

    model = eval(args.model)
    if args.gpu:
        model = model.cuda()

    if args.load_path:
        utils.load(model, os.path.join(args.load_path, 'weights.pt'))
        print("loaded")

    direct_model = model
    if args.gpu:
        model = torch.nn.DataParallel(model)

    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    train_transform, valid_transform = utils._data_transforms_cifar10(args)
    train_data = dset.CIFAR10(root=args.data,
                              train=True,
                              download=True,
                              transform=train_transform)
    valid_data = dset.CIFAR10(root=args.data,
                              train=False,
                              download=True,
                              transform=valid_transform)

    train_queue = torch.utils.data.DataLoader(train_data,
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              pin_memory=True,
                                              num_workers=args.num_workers)

    valid_queue = torch.utils.data.DataLoader(valid_data,
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              pin_memory=True,
                                              num_workers=args.num_workers)

    if args.eval:
        direct_model.drop_path_prob = 0
        valid_acc, valid_obj = infer(valid_queue, model, args.gpu)
        logging.info('valid_acc %f', valid_acc)
        return

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs))

    for epoch in range(args.start_epoch, args.epochs):
        scheduler.step(epoch)
        logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0])
        direct_model.drop_path_prob = args.drop_path_prob * epoch / args.epochs

        train_acc, train_obj = train(train_queue, model, optimizer, args.gpu)
        logging.info('train_acc %f', train_acc)

        valid_acc, valid_obj = infer(valid_queue, model, args.gpu)
        logging.info('valid_acc %f', valid_acc)

        if epoch >= args.epochs - 50 or epoch % args.save_frequency == 0:
            utils.save(model.module,
                       os.path.join(args.save, f'weights_{epoch}.pt'))
Ejemplo n.º 10
0
parser.add_argument('--search_space', choices=['1', '2', '3'], default='1')
parser.add_argument(
    '--warm_start_epochs',
    type=int,
    default=0,
    help='Warm start one-shot model before starting architecture updates.')
parser.add_argument('--s3_bucket',
                    type=str,
                    default='megadarts',
                    help='s3 bucket for saving to remote')
args = parser.parse_args()

args.save = 'experiments/pc_darts/search_space_{}/search-{}-{}-{}-{}-{}'.format(
    args.search_space, args.save, time.strftime("%Y%m%d-%H%M%S"), args.seed,
    args.learning_rate, args.search_space)
utils.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py'))

# Dump the config of the run
with open(os.path.join(args.save, 'config.json'), 'w') as fp:
    json.dump(args.__dict__, fp)

for handler in logging.root.handlers[:]:
    logging.root.removeHandler(handler)

log_format = '%(asctime)s %(message)s'
logging.basicConfig(stream=sys.stdout,
                    level=logging.INFO,
                    format=log_format,
                    datefmt='%m/%d %I:%M:%S %p')
fh = logging.FileHandler(os.path.join(args.save, 'log.txt'))
fh.setFormatter(logging.Formatter(log_format))
Ejemplo n.º 11
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    parser = argparse.ArgumentParser("imagenet")
    parser.add_argument('--data',
                        type=Path,
                        default=DATA_DIRECTORY / 'imagenet',
                        help='location of the data corpus')
    parser.add_argument('--batchsz', type=int, default=128, help='batch size')
    parser.add_argument('--lr',
                        type=float,
                        default=0.1,
                        help='init learning rate')
    parser.add_argument('--momentum', type=float, default=0.9, help='momentum')
    parser.add_argument('--wd', type=float, default=3e-5, help='weight decay')
    parser.add_argument('--report_freq',
                        type=float,
                        default=100,
                        help='report frequency')
    parser.add_argument('--gpu', type=str, help='gpu device id')
    parser.add_argument('--epochs',
                        type=int,
                        default=250,
                        help='num of training epochs')
    parser.add_argument('--init_ch',
                        type=int,
                        default=48,
                        help='num of init channels')
    parser.add_argument('--layers',
                        type=int,
                        default=14,
                        help='total number of layers')
    parser.add_argument('--checkpoint_path',
                        type=Path,
                        help='path to checkpoint for restart')
    parser.add_argument('--auxiliary',
                        action='store_true',
                        default=False,
                        help='use auxiliary tower')
    parser.add_argument('--auxiliary_weight',
                        type=float,
                        default=0.4,
                        help='weight for auxiliary loss')
    parser.add_argument('--drop_path_prob',
                        type=float,
                        default=0,
                        help='drop path probability')
    parser.add_argument('--exp_path',
                        type=Path,
                        default=Path('exp_imagenet'),
                        help='experiment name')
    parser.add_argument('--seed', type=int, default=0, help='random seed')
    parser.add_argument('--arch',
                        type=str,
                        default='',
                        help='which architecture to use')
    parser.add_argument('--arch_path',
                        type=str,
                        default='',
                        help='which architecture of json to use')
    parser.add_argument('--grad_clip',
                        type=float,
                        default=5.,
                        help='gradient clipping')
    parser.add_argument('--label_smooth',
                        type=float,
                        default=0.1,
                        help='label smoothing')
    parser.add_argument('--gamma',
                        type=float,
                        default=0.97,
                        help='learning rate decay')
    parser.add_argument('--decay_period',
                        type=int,
                        default=1,
                        help='epochs between two learning rate decays')
    args = parser.parse_args()

    my_dataset = MyDataset.ImageNet
    args.save = args.exp_path / f'ImageNet-{time.strftime("%Y%m%d-%H%M%S")}'
    utils.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py'))
    genotype = eval(f'genotypes.{args.arch}'
                    ) if not args.arch_path else utils.load_genotype(
                        args.arch_path)
    trainer = Trainer(args, genotype, my_dataset)
    _, _, _ = trainer.train()

    args.seed = 0
    test_model = trainer.model.module if isinstance(
        trainer.model, DataParallel) else trainer.model
    tester = Tester(test_args=args, my_dataset=my_dataset, model=test_model)
    valid_acc_top1, valid_acc_top5, valid_obj = tester.infer()
    logging.info('valid_acc_top1 %f', valid_acc_top1)
    logging.info('valid_acc_top5 %f', valid_acc_top5)
    logging.info('valid_err_top1 %f', 100 - valid_acc_top1)
    logging.info('valid_err_top5 %f', 100 - valid_acc_top5)
Ejemplo n.º 12
0
    def initialize_run(self):
        """
        TODO This is the same as NAO one.
        :return:
        """
        args = self.args
        utils = project_utils
        if not self.args.continue_train:
            self.sub_directory_path = 'WeightSharingNasBenchNetRandom-{}_SEED_{}'.format(
                self.args.save, self.args.seed)
            self.exp_dir = os.path.join(self.args.main_path,
                                        self.sub_directory_path)
            utils.create_exp_dir(self.exp_dir)

        if self.args.visualize:
            self.viz_dir_path = utils.create_viz_dir(self.exp_dir)

        if self.args.tensorboard:
            self.tb_dir = self.exp_dir
            tboard_dir = os.path.join(self.args.tboard_dir,
                                      self.sub_directory_path)
            self.writer = SummaryWriter(tboard_dir)

        if self.args.debug:
            torch.autograd.set_detect_anomaly(True)

        self.nasbench = self.search_space.nasbench

        # Set logger.
        self.logger = utils.get_logger(
            "train_search",
            file_handler=utils.get_file_handler(
                os.path.join(self.exp_dir, 'log.txt')),
            level=logging.INFO if not args.debug else logging.DEBUG)
        logging.info(f"setting random seed as {args.seed}")
        utils.torch_random_seed(args.seed)
        logging.info('gpu number = %d' % args.gpus)
        logging.info("args = %s", args)

        criterion = nn.CrossEntropyLoss().cuda()
        eval_criterion = nn.CrossEntropyLoss().cuda()
        self.eval_loss = eval_criterion

        train_transform, valid_transform = utils._data_transforms_cifar10(
            args.cutout_length if args.cutout else None)
        train_data = dset.CIFAR10(root=args.data,
                                  train=True,
                                  download=True,
                                  transform=train_transform)
        valid_data = dset.CIFAR10(root=args.data,
                                  train=True,
                                  download=True,
                                  transform=valid_transform)
        test_data = dset.CIFAR10(root=args.data,
                                 train=False,
                                 download=True,
                                 transform=valid_transform)

        num_train = len(train_data)
        indices = list(range(num_train))
        split = int(np.floor(args.enas_search_config.ratio * num_train))

        train_queue = torch.utils.data.DataLoader(
            train_data,
            batch_size=args.batch_size,
            sampler=torch.utils.data.sampler.SubsetRandomSampler(
                indices[:split]),
            pin_memory=True,
            num_workers=2)

        valid_queue = torch.utils.data.DataLoader(
            valid_data,
            batch_size=args.enas_search_config.child_eval_batch_size,
            sampler=torch.utils.data.sampler.SubsetRandomSampler(
                indices[split:num_train]),
            pin_memory=True,
            num_workers=2)

        test_queue = torch.utils.data.DataLoader(
            test_data,
            batch_size=args.evaluate_batch_size,
            shuffle=False,
            pin_memory=True,
            num_workers=8)

        repeat_valid_loader = RepeatedDataLoader(valid_queue)
        return train_queue, valid_queue, test_queue, repeat_valid_loader, criterion, eval_criterion
def main():
  '''
  data:数据集的目录   batchsize:不能调太大
  learning_rate   learning_rate_min   momentum   weight_decay: optimizer4件套
  report_freq: 打印报告的频率 
  epoch: 默认50
  init_channels: 初始特征通道数,随着网络加深特征通道数会成倍增长
  layers: 进行cell的搜索时,网络框架由几个cell组成
  cutout, cutout_length: TODO 是否使用cutout及其参数???
  drop_path_prob: 减少搜索过程中的计算时间以及内存占用的一个参数
  save: 保存路径名    seed:随机种子
  grad_clip:梯度裁剪用以解决梯度爆炸 train_portion:训练数据的比例,剩下的会当作“验证数据”(但不在验证集中
  unrolled: one-step unrolled validation loss TODO
  arch_learning_rate/arch_weight_decay: 架构参数学习率,用以更新网络架构参数
  '''
  parser = argparse.ArgumentParser("cifar")
  parser.add_argument('--data', type=str, default='../data', 
                      help='location of the data corpus')
  parser.add_argument('--batch_size', type=int, default=64, 
                      help='batch size')
  parser.add_argument('--learning_rate', type=float, default=0.025, 
                      help='init learning rate')
  parser.add_argument('--learning_rate_min', type=float, default=0.001, 
                      help='min learning rate')
  parser.add_argument('--momentum', type=float, default=0.9,
                      help='momentum')
  parser.add_argument('--weight_decay', type=float, default=3e-4, 
                      help='weight decay')
  parser.add_argument('--report_freq', type=float, default=50, 
                      help='report frequency')
  parser.add_argument('--gpu', type=str, default='0,1', 
                      help='gpu device id, split with ","')
  parser.add_argument('--epochs', type=int, default=50,
                      help='num of training epochs')
  parser.add_argument('--init_channels', type=int, default=16, 
                      help='num of init channels')
  parser.add_argument('--layers', type=int, default=8, 
                      help='total number of layers')
  parser.add_argument('--model_path', type=str, default='saved_models', 
                      help='path to save the model')
  parser.add_argument('--cutout', action='store_true', default=False, 
                      help='use cutout')
  parser.add_argument('--cutout_length', type=int, default=16, 
                      help='cutout length')
  parser.add_argument('--drop_path_prob', type=float, default=0.3, 
                      help='drop path probability')
  parser.add_argument('--save', type=str, default='EXP', 
                      help='experiment name')
  parser.add_argument('--seed', type=int, default=12450, 
                      help='random seed')
  parser.add_argument('--grad_clip', type=float, default=5, 
                      help='gradient clipping')
  parser.add_argument('--train_portion', type=float, default=0.5, 
                      help='portion of training data')
  parser.add_argument('--unrolled', action='store_true', default=False, 
                      help='use one-step unrolled validation loss')
  parser.add_argument('--arch_learning_rate', type=float, default=3e-4, 
                      help='learning rate for arch encoding')
  parser.add_argument('--arch_weight_decay', type=float, default=1e-3, 
                      help='weight decay for arch encoding')
  # pasers about distributed data parallel
  parser.add_argument('-n', '--nodes', default=1, type=int, metavar='N',
                      help='number of data loading workers(default:1)')
  parser.add_argument('-nr','--nr',default=0, type=int,
                      help='ranking within the nodes')
  args = parser.parse_args()

  args.save = 'search-{}-{}'.format(args.save, time.strftime("%Y%m%d-%H%M%S")) # 生成search目录
  utils.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py')) # 把cnn内所有py脚本拷到search目录里
  # glob.glob()查找符合特定规则的文件路径名
  '''
  log
  '''
  log_format = '%(asctime)s %(message)s' # %(asctime)s 当前时间,%(message)s 用户输出的消息 
  logging.basicConfig(stream=sys.stdout, level=logging.INFO,
      format=log_format, datefmt='%m/%d %I:%M:%S %p')
  fh = logging.FileHandler(os.path.join(args.save, 'log.txt'))
  fh.setFormatter(logging.Formatter(log_format))
  logging.getLogger().addHandler(fh)


  args.CIFAR_CLASSES = 10
  # Setting GPU device
  os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu

  if not torch.cuda.is_available():
    logging.info('no gpu device available')
    sys.exit(1)
  gpus = [int(i) for i in args.gpu.split(',')] # argparser传入的参数转为int list
  num_gpu = len(gpus)
  if num_gpu == 1:
    torch.cuda.set_device(int(args.gpu))

  logging.info('gpu device = %s' % args.gpu)
  logging.info("args = %s", args)

  args.world_size = num_gpu * args.nodes 
  os.environ['MASTER_ADDR'] = 'localhost'
  os.environ['MASTER_PORT'] = '23456'
  print('OMG')
  mp.spawn(train_search, nprocs=num_gpu, args=(args,))
Ejemplo n.º 14
0
def main(macro_genome, micro_genome, epochs, search_space='micro',
         save='Design_1', expr_root='search', seed=0, gpu=0, init_channels=24,
         layers=11, auxiliary=False, cutout=False, drop_path_prob=0.0, batch_size=128):

    # ---- train logger ----------------- #
    save_pth = os.path.join(expr_root, '{}'.format(save))
    utils.create_exp_dir(save_pth)
    log_format = '%(asctime)s %(message)s'
    logging.basicConfig(stream=sys.stdout, level=logging.INFO,
                        format=log_format, datefmt='%m/%d %I:%M:%S %p')

    # ---- parameter values setting ----- #
    CIFAR_CLASSES = config_dict()['n_classes']
    INPUT_CHANNELS = config_dict()['n_channels']
    learning_rate = 0.025
    momentum = 0.9
    weight_decay = 3e-4
    data_root = '../data'
    cutout_length = 16
    auxiliary_weight = 0.4
    grad_clip = 5
    report_freq = 50
    train_params = {
        'auxiliary': auxiliary,
        'auxiliary_weight': auxiliary_weight,
        'grad_clip': grad_clip,
        'report_freq': report_freq,
    }

    if search_space == 'micro' or search_space == 'micro_garbage':
        genome = micro_genome
        genotype = micro_encoding.decode(genome)
        model = Network(init_channels, CIFAR_CLASSES, config_dict()['n_channels'], layers, auxiliary, genotype)
    elif search_space == 'macro' or search_space == 'macro_garbage':
        genome = macro_genome
        genotype = macro_encoding.decode(genome)
        channels = [(INPUT_CHANNELS, init_channels),
                    (init_channels, 2*init_channels),
                    (2*init_channels, 4*init_channels)]
        model = EvoNetwork(genotype, channels, CIFAR_CLASSES, (config_dict()['INPUT_HEIGHT'], config_dict()['INPUT_WIDTH']), decoder='residual')
    elif search_space == 'micromacro':
        genome = [macro_genome, micro_genome]
        macro_genotype = macro_encoding.decode(macro_genome)
        micro_genotype = micro_encoding.decode(micro_genome)
        genotype = [macro_genotype, micro_genotype]
        set_config('micro_creator', make_micro_creator(micro_genotype, convert=False))
        channels = [(INPUT_CHANNELS, init_channels),
                    (init_channels, 2 * init_channels),
                    (2 * init_channels, 4 * init_channels)]
        model = EvoNetwork(macro_genotype, channels, CIFAR_CLASSES,
                           (config_dict()['INPUT_HEIGHT'], config_dict()['INPUT_WIDTH']), decoder='residual')

    else:
        raise NameError('Unknown search space type')

    # logging.info("Genome = %s", genome)
    logging.info("Architecture = %s", genotype)

    torch.cuda.set_device(gpu)
    cudnn.benchmark = True
    torch.manual_seed(seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(seed)

    n_params = (np.sum(np.prod(v.size()) for v in filter(lambda p: p.requires_grad, model.parameters())) / 1e6)
    model = model.to(device)

    logging.info("param size = %fMB", n_params)

    if config_dict()['problem'] == 'classification':
        criterion = nn.CrossEntropyLoss()
    else:
        criterion = nn.MSELoss()
    criterion = criterion.cuda()


    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = torch.optim.SGD(
        parameters,
        learning_rate,
        momentum=momentum,
        weight_decay=weight_decay
    )

    CIFAR_MEAN = [0.49139968, 0.48215827, 0.44653124]
    CIFAR_STD = [0.24703233, 0.24348505, 0.26158768]

    train_transform = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor()
    ])

    if cutout:
        train_transform.transforms.append(utils.Cutout(cutout_length))

    train_transform.transforms.append(transforms.Normalize(CIFAR_MEAN, CIFAR_STD))

    valid_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(CIFAR_MEAN, CIFAR_STD),
    ])

    train_data = my_cifar10.CIFAR10(root=data_root, train=True, download=False, transform=train_transform)
    valid_data = my_cifar10.CIFAR10(root=data_root, train=False, download=False, transform=valid_transform)

    train_queue = torch.utils.data.DataLoader(
        train_data, batch_size=batch_size,
        # sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=True, num_workers=1)

    valid_queue = torch.utils.data.DataLoader(
        valid_data, batch_size=batch_size,
        # sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]),
        pin_memory=True, num_workers=1)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, int(epochs))

    for epoch in range(epochs):
        scheduler.step()
        logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0])
        model.droprate = drop_path_prob * epoch / epochs

        train_acc, train_obj = train(train_queue, model, criterion, optimizer, train_params)
        logging.info(f'train_{config_dict()["performance_measure"]} %f', train_acc)

    valid_acc, valid_obj = infer(valid_queue, model, criterion)
    logging.info(f'valid_{config_dict()["performance_measure"]} %f', valid_acc)

    # calculate for flops
    model = add_flops_counting_methods(model)
    model.eval()
    model.start_flops_count()
    random_data = torch.randn(1, INPUT_CHANNELS, config_dict()['INPUT_HEIGHT'], config_dict()['INPUT_WIDTH'])
    model(torch.autograd.Variable(random_data).to(device))
    n_flops = np.round(model.compute_average_flops_cost() / 1e6, 4)
    logging.info('flops = %f', n_flops)

    # save to file
    # os.remove(os.path.join(save_pth, 'log.txt'))
    with open(os.path.join(save_pth, 'log.txt'), "w") as file:
        file.write("Genome = {}\n".format(genome))
        file.write("Architecture = {}\n".format(genotype))
        file.write("param size = {}MB\n".format(n_params))
        file.write("flops = {}MB\n".format(n_flops))
        file.write("valid_acc = {}\n".format(valid_acc))

    # logging.info("Architecture = %s", genotype))

    return {
        'valid_acc': valid_acc,
        'params': n_params,
        'flops': n_flops,
    }
def main():
    if args.load_checkpoint:
        args.save = Path(args.load_checkpoint) / 'eval-imagenet-{}-{}'.format(args.save, time.strftime("%Y%m%d-%H%M%S"))
    else:
        args.save = Path('logs') / 'eval-imagenet-{}-{}'.format(args.save, time.strftime("%Y%m%d-%H%M%S"))
    utils.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py'))

    log_format = '%(asctime)s %(message)s'
    logging.basicConfig(stream=sys.stdout, level=logging.INFO,
                        format=log_format, datefmt='%m/%d %I:%M:%S %p')
    fh = logging.FileHandler(args.save / 'log.txt')
    fh.setFormatter(logging.Formatter(log_format))
    logging.getLogger().addHandler(fh)

    np.random.seed(args.seed)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    model = eval(args.model)

    # flops, params = profile(model, input_size=(1, 3, 224, 224))
    # print("flops" + str(flops) + " params" + str(params))
    if args.load_checkpoint:
        dictionary = torch.load(args.load_checkpoint)
        start_epoch = dictionary['epoch'] if args.start_epoch == -1 else args.start_epoch
        model.load_state_dict(dictionary['state_dict'])
    else:
        start_epoch = 0 if args.start_epoch == -1 else args.start_epoch

    direct_model = model

    if args.gpu:
        model = nn.DataParallel(model)

    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    optimizer = torch.optim.SGD(
        model.parameters(),
        args.learning_rate,
        momentum=args.momentum,
        weight_decay=args.weight_decay
    )

    # if args.load_checkpoint:
    #   optimizer.load_state_dict(dictionary['optimizer'])
    #   del dictionary

    traindir = os.path.join(args.data, 'train')
    validdir = os.path.join(args.data, 'val')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    train_data = dset.ImageFolder(
        traindir,
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ColorJitter(
                brightness=0.4,
                contrast=0.4,
                saturation=0.4,
                hue=0.2),
            transforms.ToTensor(),
            normalize,
        ]))
    valid_data = dset.ImageFolder(
        validdir,
        transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
        ]))

    train_queue = torch.utils.data.DataLoader(
        train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=args.num_workers)

    valid_queue = torch.utils.data.DataLoader(
        valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=args.num_workers)

    if args.eval:
        direct_model.drop_path_prob = 0
        valid_acc_top1, valid_acc_top5, valid_obj = infer(valid_queue, model, args.gpu)
        logging.info('valid_acc_top1 %f', valid_acc_top1)
        logging.info('valid_acc_top5 %f', valid_acc_top5)
        return

    if args.period is not None:
        periods = args.period.split(',')
        periods = [int(p) for p in periods]
        totals = []
        total = 0
        for p in periods:
            total += p
            totals.append(total)
        scheduler = CosineAnnealingLR(optimizer, periods[0])
    else:
        periods = None
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, args.decay_period, gamma=args.gamma)

    best_acc_top1 = 0
    for epoch in range(start_epoch, args.epochs):
        if args.period is None:
            scheduler.step(epoch)
        else:
            assert len(periods) > 0
            index = bisect.bisect_left(totals, epoch)
            scheduler.T_max = periods[index]
            if index == 0:
                e = epoch
            else:
                e = epoch - totals[index - 1]
            scheduler.step(e % periods[index])
            logging.info("schedule epoch:" + str(e % periods[index]))
            logging.info("schedule period:" + str(periods[index]))
        logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0])
        direct_model.drop_path_prob = args.drop_path_prob * epoch / args.epochs

        train_acc, train_obj = train(train_queue, model, optimizer, args.gpu)
        logging.info('train_acc %f', train_acc)

        valid_acc_top1, valid_acc_top5, valid_obj = infer(valid_queue, model, args.gpu)
        logging.info('valid_acc_top1 %f', valid_acc_top1)
        logging.info('valid_acc_top5 %f', valid_acc_top5)

        is_best = False
        if valid_acc_top1 > best_acc_top1:
            best_acc_top1 = valid_acc_top1
            is_best = True

        utils.save_checkpoint({
            'epoch': epoch + 1,
            'state_dict': model.module.state_dict(),
            'best_acc_top1': best_acc_top1,
            'optimizer': optimizer.state_dict(),
        }, is_best, args.save)
Ejemplo n.º 16
0
def main():
    utils.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py'))
    print(args)

    # Basic Setup
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)

    torch.cuda.set_device(2)
    cudnn.benchmark = True
    cudnn.enabled = True

    n_channels = 3
    n_bins = 2.**args.n_bits
    approx_samples = 4

    # Define model
    model_single = Network(n_channels,
                           args.n_flow,
                           args.n_block,
                           n_bins,
                           affine=args.affine,
                           conv_lu=not args.no_lu)
    model = nn.DataParallel(model_single, device_ids=[2, 3])
    model = model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), args.learning_rate)
    dataset = iter(sample_cifar10(args.batch, args.img_size))

    # Sample generated images
    z_sample = []
    z_shapes = calc_z_shapes(n_channels, args.img_size, args.n_flow,
                             args.n_block)
    for z in z_shapes:
        z_new = torch.randn(args.n_sample, *z) * args.temp
        z_sample.append(z_new.to(device))

    with tqdm(range(args.iter)) as pbar:
        for i in pbar:
            # Training procedure
            model.train()

            # Get a random minibatch from the search queue with replacement
            input, _ = next(dataset)
            input = Variable(input,
                             requires_grad=False).cuda(non_blocking=True)
            input = input.repeat(approx_samples, 1, 1, 1)

            log_p, logdet, _ = model(input + torch.rand_like(input) / n_bins)

            loss, _, _ = likelihood_loss(log_p, logdet, args.img_size, n_bins)

            loss_variance = likelihood_loss_variance(log_p, logdet,
                                                     args.img_size, n_bins,
                                                     approx_samples)

            loss = loss + loss_variance

            # Optimize model
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            pbar.set_description("Loss: {}".format(loss.item()))

            # Save generated samples
            if i % 100 == 0:
                with torch.no_grad():
                    tvutils.save_image(
                        model_single.reverse(z_sample).cpu().data,
                        "{}/samples/{}.png".format(args.save,
                                                   str(i + 1).zfill(6)),
                        normalize=False,
                        nrow=10,
                    )

            # Save checkpoint
            if i % 1000 == 0:
                model_single.genotype()
                torch.save(
                    model.state_dict(),
                    "{}/checkpoint/model_{}.pt".format(args.save,
                                                       str(i + 1).zfill(6)))

            # Save latest weights
            utils.save(model, os.path.join(args.save, 'latest_weights.pt'))
Ejemplo n.º 17
0
                    default=False,
                    help="trans the embedding or not!")
parser.add_argument('--first_order',
                    action='store_true',
                    default=False,
                    help="use first order or not!")
args = parser.parse_args()
print("args ofm:", args.ofm)
print("embedding_num:", args.embedding_num)
save_name = 'experiments/{}/search-{}-{}-{}-{}-{}-{}-{}-{}'.format(
    args.dataset, time.strftime("%Y%m%d-%H%M%S"), args.mode, args.save,
    args.embedding_dim, args.opt, args.lr, args.arch_lr, args.seed)
if args.unrolled:
    save_name += '-unrolled'
save_name += '-' + str(np.random.randint(10000))
utils.create_exp_dir(save_name, scripts_to_save=glob.glob('*.py'))

log_format = '%(asctime)s %(message)s'
logging.basicConfig(stream=sys.stdout,
                    level=logging.INFO,
                    format=log_format,
                    datefmt='%m/%d %I:%M:%S %p')
fh = logging.FileHandler(os.path.join(save_name, 'log.txt'))
fh.setFormatter(logging.Formatter(log_format))
logging.getLogger().addHandler(fh)


def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
    np.random.seed(args.seed)
Ejemplo n.º 18
0
args.world_size = torch.distributed.get_world_size()

# Set up primitives.
from darts.genotypes import set_primitives
set_primitives(-1)
from util.datasets import imagenet_lmdb_dataset
from darts import genotypes
from util import utils
from darts.model import NetworkImageNet as Network
from darts.compute_flops import find_max_channels

# Set up logging.
assert args.root_dir
args.save = args.root_dir + '/eval_imagenet-{}'.format(args.save)
if args.local_rank == 0:
    utils.create_exp_dir(args.save)
logging = utils.Logger(args.local_rank, args.save)
writer = utils.Writer(args.local_rank, args.save)

CLASSES = 1000


class CrossEntropyLabelSmooth(nn.Module):
    """Smoothed xentropy loss."""
    def __init__(self, num_classes, epsilon):
        super(CrossEntropyLabelSmooth, self).__init__()
        self.num_classes = num_classes
        self.epsilon = epsilon
        self.logsoftmax = nn.LogSoftmax(dim=1)

    def forward(self, inputs, targets):
Ejemplo n.º 19
0
parser.add_argument('--name', type=str, default="runs", help='name for log')
parser.add_argument('--train_portion',
                    type=float,
                    default=0.9,
                    help='portion of training data')
parser.add_argument('-j',
                    '--workers',
                    default=1,
                    type=int,
                    metavar='N',
                    help='number of data loading workers (default: 1)')
args = parser.parse_args()

args.save = 'eval-{}-{}'.format(args.save, time.strftime("%Y%m%d-%H%M%S"))
utils.create_exp_dir(args.save,
                     scripts_to_save=glob.glob('*.py'),
                     exec_script=args.exec_script)

# Logging configuration
utils.setup_logger(args)

# tensorboard_logger configuration
configure('{}/{}'.format(args.save, args.name))

CIFAR_CLASSES = 10

os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)


def main():
    root = logging.getLogger()