예제 #1
0
import torch.distributed as dist

from utils.config import SPOSConfig
from model.network import get_shufflenas_oneshot

from utils.imagenet_dataloader import get_imagenet_iter_torch
from utils import utils

config = SPOSConfig()
device = torch.device("cuda")

# tensorboard
writer = SummaryWriter(log_dir=os.path.join(config.path, "tb"))
writer.add_text('config', config.as_markdown(), 0)

logger = utils.get_logger(
    os.path.join(config.path, "{}.log".format(config.name)))
config.print_params(logger.info)


def main():
    start = time.time()
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    torch.cuda.set_device(config.local_rank % len(config.gpus))
    torch.distributed.init_process_group(backend='nccl', init_method='env://')
    config.world_size = torch.distributed.get_world_size()
    config.total_batch = config.world_size * config.batch_size

    np.random.seed(config.seed)
예제 #2
0
    def initialize_run(self):
        """
        TODO This is the same as NAO one.
        :return:
        """
        args = self.args
        utils = project_utils
        if not self.args.continue_train:
            self.sub_directory_path = 'WeightSharingNasBenchNetRandom-{}_SEED_{}'.format(
                self.args.save, self.args.seed)
            self.exp_dir = os.path.join(self.args.main_path,
                                        self.sub_directory_path)
            utils.create_exp_dir(self.exp_dir)

        if self.args.visualize:
            self.viz_dir_path = utils.create_viz_dir(self.exp_dir)

        if self.args.tensorboard:
            self.tb_dir = self.exp_dir
            tboard_dir = os.path.join(self.args.tboard_dir,
                                      self.sub_directory_path)
            self.writer = SummaryWriter(tboard_dir)

        if self.args.debug:
            torch.autograd.set_detect_anomaly(True)

        self.nasbench = self.search_space.nasbench

        # Set logger.
        self.logger = utils.get_logger(
            "train_search",
            file_handler=utils.get_file_handler(
                os.path.join(self.exp_dir, 'log.txt')),
            level=logging.INFO if not args.debug else logging.DEBUG)
        logging.info(f"setting random seed as {args.seed}")
        utils.torch_random_seed(args.seed)
        logging.info('gpu number = %d' % args.gpus)
        logging.info("args = %s", args)

        criterion = nn.CrossEntropyLoss().cuda()
        eval_criterion = nn.CrossEntropyLoss().cuda()
        self.eval_loss = eval_criterion

        train_transform, valid_transform = utils._data_transforms_cifar10(
            args.cutout_length if args.cutout else None)
        train_data = dset.CIFAR10(root=args.data,
                                  train=True,
                                  download=True,
                                  transform=train_transform)
        valid_data = dset.CIFAR10(root=args.data,
                                  train=True,
                                  download=True,
                                  transform=valid_transform)
        test_data = dset.CIFAR10(root=args.data,
                                 train=False,
                                 download=True,
                                 transform=valid_transform)

        num_train = len(train_data)
        indices = list(range(num_train))
        split = int(np.floor(args.enas_search_config.ratio * num_train))

        train_queue = torch.utils.data.DataLoader(
            train_data,
            batch_size=args.batch_size,
            sampler=torch.utils.data.sampler.SubsetRandomSampler(
                indices[:split]),
            pin_memory=True,
            num_workers=2)

        valid_queue = torch.utils.data.DataLoader(
            valid_data,
            batch_size=args.enas_search_config.child_eval_batch_size,
            sampler=torch.utils.data.sampler.SubsetRandomSampler(
                indices[split:num_train]),
            pin_memory=True,
            num_workers=2)

        test_queue = torch.utils.data.DataLoader(
            test_data,
            batch_size=args.evaluate_batch_size,
            shuffle=False,
            pin_memory=True,
            num_workers=8)

        repeat_valid_loader = RepeatedDataLoader(valid_queue)
        return train_queue, valid_queue, test_queue, repeat_valid_loader, criterion, eval_criterion
예제 #3
0
parser.add_argument('--compress_rate',
                    type=str,
                    default=None,
                    help='compress rate of each conv')

args = parser.parse_args()

CLASSES = 10
print_freq = (256 * 50) // args.batch_size

if not os.path.isdir(args.job_dir):
    os.mkdir(args.job_dir)

utils.record_config(args)
logger = utils.get_logger(os.path.join(args.job_dir, 'logger.log'))


def load_vgg_model(model, oristate_dict, random_rule):
    logger.info('random rule: ' + random_rule)

    state_dict = model.state_dict()
    last_select_index = None  #Conv index selected in the previous layer

    cnt = 0
    for name, module in model.named_modules():

        if isinstance(module, nn.Conv2d):

            cnt += 1
            oriweight = oristate_dict[name + '.weight']
예제 #4
0
def main():
    seed = args.seed

    np.random.seed(seed)

    cudnn.benchmark = True
    torch.manual_seed(seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(seed)
    timestamp = str(utils.get_unix_timestamp())
    path = os.path.join(args.save, timestamp)
    logger = utils.get_logger(args.save, timestamp, file_type='txt')
    tb_logger = tensorboardX.SummaryWriter('../runs/{}'.format(timestamp))

    logger.info("time = %s, args = %s", str(utils.get_unix_timestamp()), args)

    train_data, test_data, input_shape = utils.get_data(
        args.data, args.observ_window, args.downsampling, args.multi_slice)

    train_queue = torch.utils.data.DataLoader(train_data,
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              pin_memory=True,
                                              num_workers=2)
    test_queue = torch.utils.data.DataLoader(test_data,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             pin_memory=True,
                                             num_workers=2)

    model = Network(input_shape, args.num_drones)
    model = model.to(device)

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.to(device)

    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs), eta_min=args.learning_rate_min)

    for epoch in range(args.epochs):
        lr = scheduler.get_lr()[0]
        logger.info('time = %s, epoch %d lr %e',
                    str(utils.get_unix_timestamp()), epoch, lr)
        print('time = {}, epoch {} lr {}'.format(
            str(utils.get_unix_timestamp()), epoch, lr))
        model.train()
        train_loss, train_acc = train(train_queue, model, criterion, optimizer,
                                      logger)
        logger.info('time = %s, train_loss %f train_acc %f',
                    str(utils.get_unix_timestamp()), train_loss, train_acc)
        print('time = {}, train_loss {} train_acc {}'.format(
            str(utils.get_unix_timestamp()), train_loss, train_acc))
        tb_logger.add_scalar("epoch_train_loss", train_loss, epoch)
        tb_logger.add_scalar("epoch_train_acc", train_acc, epoch)

        scheduler.step()

        model.eval()
        test_loss, test_acc = test(test_queue, model, criterion, logger)
        logger.info('time = %s, test_loss %f test_acc %f',
                    str(utils.get_unix_timestamp()), test_loss, test_acc)
        print('time = {}, test_loss {} test_acc {}'.format(
            str(utils.get_unix_timestamp()), test_loss, test_acc))
        tb_logger.add_scalar("epoch_test_loss", test_loss, epoch)
        tb_logger.add_scalar("epoch_test_acc", test_acc, epoch)

        utils.save(model, os.path.join(path, 'weights.pt'))