Ejemplo n.º 1
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    genotype_path = os.path.join(utils.get_dir(), os.path.split(args.model_path)[0], 'genotype.txt')
    if os.path.isfile(genotype_path):
        with open(genotype_path, "r") as f:
            geno_raw = f.read()
            genotype = eval(geno_raw)
    else:
        genoname = os.path.join(utils.get_dir(), os.path.split(args.model_path)[0], 'genoname.txt')
        if os.path.isfile(genoname):
            with open(genoname, "r") as f:
                args.arch = f.read()
            genotype = eval("genotypes.%s" % args.arch)
        else:
            genotype = eval("genotypes.BATH")
    model = Network(args.init_channels, 1, args.layers, args.auxiliary, genotype, input_channels=4)
    model = model.cuda()
    print(os.path.join(utils.get_dir(), args.model_path))
    utils.load(model, os.path.join(utils.get_dir(), args.model_path))

    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    criterion = nn.MSELoss()
    criterion = criterion.cuda()

    test_data_tne = utils.BathymetryDataset(args, "../29TNE.csv", root_dir="dataset/bathymetry/29TNE/dataset_29TNE",
                                            to_trim="/tmp/pbs.6233542.admin01/tmp_portugal/", to_filter=False)

    test_queue_tne = torch.utils.data.DataLoader(
        test_data_tne, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2)

    model.drop_path_prob = args.drop_path_prob
    test_obj, targets, preds = infer(test_queue_tne, model, criterion, args.depth_normalization)
    logging.info('test_obj tne %f', test_obj)

    test_data_tne.write_results(targets, preds, os.path.join(args.save, 'tne_results.csv'))

    test_data_smd = utils.BathymetryDataset(args, "../29SMD.csv", root_dir="dataset/bathymetry/29SMD/dataset_29SMD",
                                            to_trim="/tmp/pbs.6233565.admin01/tmp_portugal/", to_filter=False)

    test_queue_smd = torch.utils.data.DataLoader(
        test_data_smd, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2)

    test_obj, targets, preds = infer(test_queue_smd, model, criterion, args.depth_normalization)
    logging.info('test_obj smd %f', test_obj)

    test_data_smd.write_results(targets, preds, os.path.join(args.save, 'smd_results.csv'))
Ejemplo n.º 2
0
parser.add_argument('--auxiliary',
                    action='store_true',
                    default=False,
                    help='use auxiliary tower')
parser.add_argument('--drop_path_prob',
                    type=float,
                    default=0,
                    help='drop path probability')
parser.add_argument('--seed', type=int, default=0, help='random seed')
parser.add_argument('--arch',
                    type=str,
                    default='DARTS',
                    help='which architecture to use')
args = parser.parse_args()

args.save = os.path.join(utils.get_dir(),
                         os.path.split(args.model_path)[0], "test")
utils.create_exp_dir(args.save)

log_format = '%(asctime)s %(message)s'
logging.basicConfig(stream=sys.stdout,
                    level=logging.INFO,
                    format=log_format,
                    datefmt='%m/%d %I:%M:%S %p')

fh = logging.FileHandler(os.path.join(args.save, 'testlog.txt'))
fh.setFormatter(logging.Formatter(log_format))
logging.getLogger().addHandler(fh)

CLASSES = 1000
Ejemplo n.º 3
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion, args.rho, args.ewma)
    model = model.cuda()
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    optimizer = torch.optim.SGD(
        model.parameters(),
        args.learning_rate,
        momentum=args.momentum,
        weight_decay=args.weight_decay)

    train_transform, valid_transform = utils._data_transforms_cifar10(args)
    datapath = os.path.join(utils.get_dir(), args.data)
    train_data = dset.CIFAR10(root=datapath, train=True, download=True, transform=train_transform)

    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(args.train_portion * num_train))

    train_queue = torch.utils.data.DataLoader(
        train_data, batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=True, num_workers=2)

    valid_queue = torch.utils.data.DataLoader(
        train_data, batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]),
        pin_memory=True, num_workers=2)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, int(args.epochs), eta_min=args.learning_rate_min)

    architect = Architect(model, args)

    model.initialize_Z_and_U()

    loggers = {"train": {"loss": [], "acc": [], "step": []},
               "val": {"loss": [], "acc": [], "step": []},
               "infer": {"loss": [], "acc": [], "step": []},
               "ath": {"threshold": [], "step": []},
               "zuth": {"threshold": [], "step": []},
               "astep": [],
               "zustep": []}

    if args.constant_alpha_threshold < 0:
        alpha_threshold = args.init_alpha_threshold
    else:
        alpha_threshold = args.constant_alpha_threshold
    zu_threshold = args.init_zu_threshold
    alpha_counter = 0
    ewma = -1

    for epoch in range(args.epochs):
        valid_iter = iter(valid_queue)
        model.clear_U()

        scheduler.step()
        lr = scheduler.get_last_lr()[0]

        logging.info('epoch %d lr %e', epoch, lr)

        genotype = model.genotype()
        logging.info('genotype = %s', genotype)

        print(torch.clamp(model.alphas_normal, min=0.1, max=1.0))
        print(torch.clamp(model.alphas_reduce, min=0.1, max=1.0))

        # training
        train_acc, train_obj, alpha_threshold, zu_threshold, alpha_counter, ewma = train(train_queue, valid_iter, model,
                                                                                         architect, criterion,
                                                                                         optimizer, lr,
                                                                                         loggers, alpha_threshold,
                                                                                         zu_threshold, alpha_counter,
                                                                                         ewma,
                                                                                         args)
        logging.info('train_acc %f', train_acc)

        # validation
        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        utils.log_loss(loggers["infer"], valid_obj, valid_acc, model.clock)
        logging.info('valid_acc %f', valid_acc)

        utils.plot_loss_acc(loggers, args.save)

        # model.update_history()

        utils.save_file(recoder=model.alphas_normal_history, path=os.path.join(args.save, 'normalalpha'),
                        steps=loggers["train"]["step"])
        utils.save_file(recoder=model.alphas_reduce_history, path=os.path.join(args.save, 'reducealpha'),
                        steps=loggers["train"]["step"])
        utils.save_file(recoder=model.FI_normal_history, path=os.path.join(args.save, 'normalFI'),
                        steps=loggers["train"]["step"])
        utils.save_file(recoder=model.FI_reduce_history, path=os.path.join(args.save, 'reduceFI'),
                        steps=loggers["train"]["step"])

        scaled_FI_normal = scale(model.FI_normal_history, model.alphas_normal_history)
        scaled_FI_reduce = scale(model.FI_reduce_history, model.alphas_reduce_history)
        utils.save_file(recoder=scaled_FI_normal, path=os.path.join(args.save, 'normalFIscaled'),
                        steps=loggers["train"]["step"])
        utils.save_file(recoder=scaled_FI_reduce, path=os.path.join(args.save, 'reduceFIscaled'),
                        steps=loggers["train"]["step"])

        utils.plot_FI(loggers["train"]["step"], model.FI_history, args.save, "FI", loggers["ath"], loggers['astep'])
        utils.plot_FI(loggers["train"]["step"], model.FI_ewma_history, args.save, "FI_ewma", loggers["ath"],
                      loggers['astep'])
        utils.plot_FI(model.FI_alpha_history_step, model.FI_alpha_history, args.save, "FI_alpha", loggers["zuth"],
                      loggers['zustep'])

        utils.save(model, os.path.join(args.save, 'weights.pt'))

    genotype = model.genotype()
    logging.info('genotype = %s', genotype)

    f = open(os.path.join(args.save, 'genotype.txt'), "w")
    f.write(str(genotype))
    f.close()
Ejemplo n.º 4
0
parser.add_argument('--layers', type=int, default=8, help='total number of layers')
parser.add_argument('--model_path', type=str, default='EXP/model.pt', help='path of pretrained model')
parser.add_argument('--auxiliary', action='store_true', default=False, help='use auxiliary tower')
parser.add_argument('--cutout', action='store_true', default=False, help='use cutout')
parser.add_argument('--cutout_length', type=int, default=16, help='cutout length')
parser.add_argument('--drop_path_prob', type=float, default=0.0, help='drop path probability')
parser.add_argument('--seed', type=int, default=0, help='random seed')
parser.add_argument('--arch', type=str, default='BATH', help='which architecture to use')
parser.add_argument('--min_energy', type=float, default=0.1, help='minimum energy')
parser.add_argument('--max_energy', type=float, default=4.0, help='maximum energy')
parser.add_argument('--max_depth', type=float, default=40.0, help='maximum unnormalized depth')
parser.add_argument('--depth_normalization', type=float, default=0.1, help='depth normalization factor')

args = parser.parse_args()

args.save = os.path.join(utils.get_dir(), args.model_path[:-3])
utils.create_exp_dir(args.save, scripts_to_save=glob.glob('src/*.py'))

log_format = '%(asctime)s %(message)s'
logging.basicConfig(stream=sys.stdout, level=logging.INFO,
                    format=log_format, datefmt='%m/%d %I:%M:%S %p')
fh = logging.FileHandler(os.path.join(args.save, 'testlog.txt'))
fh.setFormatter(logging.Formatter(log_format))
logging.getLogger().addHandler(fh)


def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)
Ejemplo n.º 5
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    torch.cuda.empty_cache()
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    genotype_path = os.path.join(utils.get_dir(), os.path.split(args.model_path)[0], 'genotype.txt')
    print(genotype_path)
    if os.path.isfile(genotype_path):
        with open(genotype_path, "r") as f:
            geno_raw = f.read()
            genotype = eval(geno_raw)
    else:
        genoname = os.path.join(utils.get_dir(), os.path.split(args.model_path)[0], 'genoname.txt')
        if os.path.isfile(genoname):
            with open(genoname, "r") as f:
                args.arch = f.read()
            genotype = eval("genotypes.%s" % args.arch)
        else:
            genotype = eval("genotypes.ADMM")
    model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype)
    model = model.cuda()
    utils.load(model, os.path.join(utils.get_dir(), args.model_path))

    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()

    _, test_transform = utils._data_transforms_cifar10(args)
    datapath = os.path.join(utils.get_dir(), args.data)
    test_data = dset.CIFAR10(root=datapath, train=False, download=True, transform=test_transform)

    test_queue = torch.utils.data.DataLoader(
        test_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2)

    if args.task == "CIFAR100cf":
        _, test_transform = utils._data_transforms_cifar100(args)

        test_data = utils.CIFAR100C2F(root=datapath, train=False, download=True, transform=test_transform)

        test_indices = test_data.filter_by_fine(args.test_filter)

        test_queue = torch.utils.data.DataLoader(
            torch.utils.data.Subset(test_data, test_indices), batch_size=args.batch_size,
            shuffle=False, pin_memory=True, num_workers=2)

        # TODO: extend each epoch or multiply number of epochs by 20%*args.class_filter

    else:
        if args.task == "CIFAR100":
            _, test_transform = utils._data_transforms_cifar100(args)
            test_data = dset.CIFAR100(root=datapath, train=False, download=True, transform=test_transform)
        else:
            _, test_transform = utils._data_transforms_cifar10(args)
            test_data = dset.CIFAR10(root=datapath, train=False, download=True, transform=test_transform)

        test_queue = torch.utils.data.DataLoader(
            test_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2)

    model.drop_path_prob = args.drop_path_prob
    test_acc, test_obj = infer(test_queue, model, criterion)
    logging.info('test_acc %f', test_acc)
Ejemplo n.º 6
0
parser.add_argument('--arch_learning_rate', type=float, default=3e-4, help='learning rate for arch encoding')
parser.add_argument('--arch_weight_decay', type=float, default=1e-3, help='weight decay for arch encoding')
parser.add_argument('--rho', type=float, default=1e-3, help='admm relative weight')
parser.add_argument('--admm_freq', type=int, default=10, help='admm update frequency (if not dynamically scheduled')
parser.add_argument('--init_alpha_threshold', type=float, default=1.0, help='initial alpha threshold')
parser.add_argument('--init_zu_threshold', type=float, default=1.0, help='initial zu threshold')
parser.add_argument('--threshold_multiplier', type=float, default=1.1, help='threshold multiplier')
parser.add_argument('--threshold_divider', type=float, default=0.2, help='threshold divider')
parser.add_argument('--scheduled_zu', action='store_true', default=False, help='use dynamically scheduled z,u steps')
parser.add_argument('--constant_alpha_threshold', type=float, default=-1.0,
                    help='use constant threshold (-1 to use dynamic threshold)')
parser.add_argument('--ewma', type=float, default=1.0, help='weight for exp weighted moving average (1.0 for no ewma)')
args = parser.parse_args()

if len(args.save) == 0:
    args.save = os.path.join(utils.get_dir(),
                             'exp/admmsched-{}-{}'.format(os.getenv('SLURM_JOB_ID'), time.strftime("%Y%m%d-%H%M%S")))
else:
    args.save = os.path.join(utils.get_dir(), 'exp', args.save)
utils.create_exp_dir(args.save, scripts_to_save=glob.glob('src/*.py'))

log_format = '%(asctime)s %(message)s'
logging.basicConfig(stream=sys.stdout, level=logging.INFO,
                    format=log_format, datefmt='%m/%d %I:%M:%S %p')
fh = logging.FileHandler(os.path.join(args.save, 'log.txt'))
fh.setFormatter(logging.Formatter(log_format))
logging.getLogger().addHandler(fh)

CIFAR_CLASSES = 10

Ejemplo n.º 7
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion)
    model = model.cuda()
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    train_transform, valid_transform = utils._data_transforms_cifar10(args)
    datapath = os.path.join(utils.get_dir(), args.data)
    train_data = dset.CIFAR10(root=datapath,
                              train=True,
                              download=True,
                              transform=train_transform)

    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(args.train_portion * num_train))

    train_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=True,
        num_workers=2)

    valid_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(
            indices[split:num_train]),
        pin_memory=True,
        num_workers=2)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, int(args.epochs), eta_min=args.learning_rate_min)

    architect = Architect(model, args)

    loggers = {
        "train": {
            "loss": [],
            "acc": [],
            "step": []
        },
        "val": {
            "loss": [],
            "acc": [],
            "step": []
        },
        "infer": {
            "loss": [],
            "acc": [],
            "step": []
        },
        "ath": {
            "threshold": [],
            "step": []
        },
        "astep": [],
        "zustep": []
    }

    for epoch in range(args.epochs):
        scheduler.step()
        lr = scheduler.get_last_lr()[0]

        logging.info('epoch %d lr %e', epoch, lr)

        genotype = model.genotype()
        logging.info('genotype = %s', genotype)

        print(F.softmax(model.alphas_normal, dim=-1))
        print(F.softmax(model.alphas_reduce, dim=-1))

        # training
        train_acc, train_obj = train(train_queue, valid_queue, model,
                                     architect, criterion, optimizer, loggers)
        logging.info('train_acc %f', train_acc)

        # validation
        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        logging.info('valid_acc %f', valid_acc)

        utils.save(model, os.path.join(args.save, 'weights.pt'))

        utils.save_file(recoder=model.alphas_normal_history,
                        path=os.path.join(args.save, 'Normalalpha'),
                        steps=loggers["train"]["step"])
        utils.save_file(recoder=model.alphas_reduce_history,
                        path=os.path.join(args.save, 'Reducealpha'),
                        steps=loggers["train"]["step"])

    print(F.softmax(model.alphas_normal, dim=-1))
    print(F.softmax(model.alphas_reduce, dim=-1))

    np.save(os.path.join(os.path.join(args.save, 'normal_weight.npy')),
            F.softmax(model.alphas_normal, dim=-1).data.cpu().numpy())
    np.save(os.path.join(os.path.join(args.save, 'reduce_weight.npy')),
            F.softmax(model.alphas_reduce, dim=-1).data.cpu().numpy())

    genotype = model.genotype()
    logging.info('genotype = %s', genotype)

    f = open(os.path.join(args.save, 'genotype.txt'), "w")
    f.write(str(genotype))
    f.close()
Ejemplo n.º 8
0
                    help='CIFAR100cf fine classes to filter per coarse class in test')
parser.add_argument('--batch_size', type=int, default=96, help='batch size')
parser.add_argument('--report_freq', type=float, default=50, help='report frequency')
parser.add_argument('--gpu', type=int, default=0, help='gpu device id')
parser.add_argument('--init_channels', type=int, default=36, help='num of init channels')
parser.add_argument('--layers', type=int, default=20, help='total number of layers')
parser.add_argument('--model_path', type=str, default='EXP/model.pt', help='path of pretrained model')
parser.add_argument('--auxiliary', action='store_true', default=False, help='use auxiliary tower')
parser.add_argument('--cutout', action='store_true', default=False, help='use cutout')
parser.add_argument('--cutout_length', type=int, default=16, help='cutout length')
parser.add_argument('--drop_path_prob', type=float, default=0.2, help='drop path probability')
parser.add_argument('--seed', type=int, default=0, help='random seed')
parser.add_argument('--arch', type=str, default='DARTS', help='which architecture to use')
args = parser.parse_args()

args.save = os.path.join(utils.get_dir(), os.path.split(args.model_path)[0], "test")
utils.create_exp_dir(args.save)

log_format = '%(asctime)s %(message)s'
logging.basicConfig(stream=sys.stdout, level=logging.INFO,
                    format=log_format, datefmt='%m/%d %I:%M:%S %p')

fh = logging.FileHandler(os.path.join(args.save, 'testlog.txt'))
fh.setFormatter(logging.Formatter(log_format))
logging.getLogger().addHandler(fh)

if args.task == "CIFAR100":
    CIFAR_CLASSES = 100
elif args.task == "CIFAR100cf":
    CIFAR_CLASSES = 20
else:
Ejemplo n.º 9
0
                    default=0.1,
                    help='minimum energy')
parser.add_argument('--max_energy',
                    type=float,
                    default=4.0,
                    help='maximum energy')
parser.add_argument('--max_depth',
                    type=float,
                    default=40.0,
                    help='maximum unnormalized depth')

args = parser.parse_args()

if args.genotype_path is not None:
    args.save = os.path.join(
        utils.get_dir(), args.genotype_path,
        'batheval-{}-{}'.format(os.getenv('SLURM_JOB_ID'),
                                time.strftime("%Y%m%d-%H%M%S")))
else:
    args.save = os.path.join(
        utils.get_dir(),
        'exp/batheval-{}-{}'.format(os.getenv('SLURM_JOB_ID'),
                                    time.strftime("%Y%m%d-%H%M%S")))
utils.create_exp_dir(args.save, scripts_to_save=glob.glob('src/*.py'))

log_format = '%(asctime)s %(message)s'
logging.basicConfig(stream=sys.stdout,
                    level=logging.INFO,
                    format=log_format,
                    datefmt='%m/%d %I:%M:%S %p')
fh = logging.FileHandler(os.path.join(args.save, 'log_eval.txt'))
Ejemplo n.º 10
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    genotype_path = os.path.join(utils.get_dir(), args.genotype_path,
                                 'genotype.txt')
    if os.path.isfile(genotype_path):
        with open(genotype_path, "r") as f:
            geno_raw = f.read()
            genotype = eval(geno_raw)
    else:
        genotype = eval("genotypes.%s" % args.arch)

    f = open(os.path.join(args.save, 'genotype.txt'), "w")
    f.write(str(genotype))
    f.close()

    model = Network(args.init_channels,
                    1,
                    args.layers,
                    args.auxiliary,
                    genotype,
                    input_channels=4)
    model = model.cuda()

    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    criterion = nn.MSELoss()
    criterion = criterion.cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # train_transform, valid_transform = utils._data_transforms_cifar10(args)
    # datapath = os.path.join(utils.get_dir(), args.data)
    # train_data = dset.CIFAR10(root=datapath, train=True, download=True, transform=train_transform)
    # valid_data = dset.CIFAR10(root=datapath, train=False, download=True, transform=valid_transform)
    train_data = utils.BathymetryDataset(args,
                                         "../mixed_train.csv",
                                         to_filter=False)
    valid_data = utils.BathymetryDataset(args,
                                         "../mixed_validation.csv",
                                         to_filter=False)

    train_queue = torch.utils.data.DataLoader(train_data,
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              pin_memory=True,
                                              num_workers=2)

    valid_queue = torch.utils.data.DataLoader(valid_data,
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              pin_memory=True,
                                              num_workers=2)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, args.epochs)

    loggers = {
        "train": {
            "loss": [],
            "step": []
        },
        "val": {
            "loss": [],
            "step": []
        }
    }

    for epoch in range(args.epochs):
        scheduler.step()
        logging.info('epoch %d lr %e', epoch, scheduler.get_last_lr()[0])
        model.drop_path_prob = args.drop_path_prob * epoch / args.epochs

        _ = train(train_queue, model, criterion, optimizer, loggers["train"])

        infer_loss = infer(valid_queue, model, criterion)
        utils.log_loss(loggers["val"], infer_loss, None, 1)

        utils.plot_loss_acc(loggers, args.save)

        utils.save(model, os.path.join(args.save, 'weights.pt'))
        if (epoch + 1) % 50 == 0:
            utils.save(
                model,
                os.path.join(args.save,
                             'checkpoint' + str(epoch) + 'weights.pt'))
def main():
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if args.gpu != -1:
        if not torch.cuda.is_available():
            logging.info('no gpu device available')
            sys.exit(1)
        torch.cuda.set_device(args.gpu)
        cudnn.benchmark = True
        cudnn.enabled = True
        torch.cuda.manual_seed(args.seed)
        logging.info('gpu device = %d' % args.gpu)
    else:
        logging.info('using cpu')

    if args.dyno_schedule:
        args.threshold_divider = np.exp(-np.log(args.threshold_multiplier) *
                                        args.schedfreq)
        print(
            args.threshold_divider, -np.log(args.threshold_multiplier) /
            np.log(args.threshold_divider))
    if args.dyno_split:
        args.train_portion = 1 - 1 / (1 + args.schedfreq)

    logging.info("args = %s", args)

    criterion = nn.CrossEntropyLoss()
    if args.gpu != -1:
        criterion = criterion.cuda()
    model = Network(args.init_channels,
                    CIFAR_CLASSES,
                    args.layers,
                    criterion,
                    args.rho,
                    args.crb,
                    args.epochs,
                    args.gpu,
                    ewma=args.ewma,
                    reg=args.reg)
    if args.gpu != -1:
        model = model.cuda()
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    datapath = os.path.join(utils.get_dir(), args.data)
    if args.task == "CIFAR100cf":
        train_transform, valid_transform = utils._data_transforms_cifar100(
            args)
        train_data = utils.CIFAR100C2F(root=datapath,
                                       train=True,
                                       download=True,
                                       transform=train_transform)
        num_train = len(train_data)
        indices = list(range(num_train))

        split = int(np.floor(args.train_portion * len(indices)))

        orig_num_train = len(indices[:split])
        orig_num_valid = len(indices[split:num_train])

        train_indices = train_data.filter_by_fine(args.train_filter,
                                                  indices[:split])
        valid_indices = train_data.filter_by_fine(args.valid_filter,
                                                  indices[split:num_train])

        train_queue = torch.utils.data.DataLoader(
            train_data,
            batch_size=args.batch_size,
            sampler=utils.FillingSubsetRandomSampler(train_indices,
                                                     orig_num_train,
                                                     reshuffle=True),
            pin_memory=True,
            num_workers=2)

        valid_queue = torch.utils.data.DataLoader(
            train_data,
            batch_size=args.batch_size,
            sampler=utils.FillingSubsetRandomSampler(valid_indices,
                                                     orig_num_valid,
                                                     reshuffle=True),
            pin_memory=True,
            num_workers=2)
        # TODO: extend each epoch or multiply number of epochs by 20%*args.class_filter
    elif args.task == "CIFAR100split":
        train_transform, valid_transform = utils._data_transforms_cifar100(
            args)
        train_data = utils.CIFAR100C2F(root=datapath,
                                       train=True,
                                       download=True,
                                       transform=train_transform)
        if not args.evensplit:
            train_indices, valid_indices = train_data.split(args.train_portion)
        else:
            num_train = len(train_data)
            indices = list(range(num_train))

            split = int(np.floor(args.train_portion * num_train))

            train_indices = indices[:split]
            valid_indices = indices[split:num_train]

        train_queue = torch.utils.data.DataLoader(
            train_data,
            batch_size=args.batch_size,
            sampler=torch.utils.data.sampler.SubsetRandomSampler(
                train_indices),
            pin_memory=True,
            num_workers=2)

        valid_queue = torch.utils.data.DataLoader(
            train_data,
            batch_size=args.batch_size,
            sampler=torch.utils.data.sampler.SubsetRandomSampler(
                valid_indices),
            pin_memory=True,
            num_workers=2)
    else:
        if args.task == "CIFAR100":
            train_transform, valid_transform = utils._data_transforms_cifar100(
                args)
            train_data = dset.CIFAR100(root=datapath,
                                       train=True,
                                       download=True,
                                       transform=train_transform)
        else:
            train_transform, valid_transform = utils._data_transforms_cifar10(
                args)
            train_data = dset.CIFAR10(root=datapath,
                                      train=True,
                                      download=True,
                                      transform=train_transform)
        num_train = len(train_data)
        indices = list(range(num_train))

        split = int(np.floor(args.train_portion * num_train))

        train_indices = indices[:split]
        valid_indices = indices[split:num_train]

        train_queue = torch.utils.data.DataLoader(
            train_data,
            batch_size=args.batch_size,
            sampler=torch.utils.data.sampler.SubsetRandomSampler(
                train_indices),
            pin_memory=True,
            num_workers=4)

        valid_queue = torch.utils.data.DataLoader(
            train_data,
            batch_size=args.batch_size,
            sampler=torch.utils.data.sampler.SubsetRandomSampler(
                valid_indices),
            pin_memory=True,
            num_workers=4)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, int(args.epochs), eta_min=args.learning_rate_min)

    architect = Architect(model, args)

    loggers = {
        "train": {
            "loss": [],
            "acc": [],
            "step": []
        },
        "val": {
            "loss": [],
            "acc": [],
            "step": []
        },
        "infer": {
            "loss": [],
            "acc": [],
            "step": []
        },
        "ath": {
            "threshold": [],
            "step": []
        },
        "astep": [],
        "zustep": []
    }

    alpha_threshold = args.init_alpha_threshold
    alpha_counter = 0
    ewma = -1

    for epoch in range(args.epochs):
        scheduler.step()
        lr = scheduler.get_last_lr()[0]

        logging.info('epoch %d lr %e', epoch, lr)

        genotype = model.genotype()
        logging.info('genotype = %s', genotype)
        if args.ckpt_interval > 0 and epoch > 0 and (
                epoch) % args.ckpt_interval == 0:
            logging.info('checkpointing genotype')
            os.mkdir(os.path.join(args.save, 'genotypes', str(epoch)))
            with open(
                    os.path.join(args.save, 'genotypes', str(epoch),
                                 'genotype.txt'), "w") as f:
                f.write(str(genotype))

        print(model.activate(model.alphas_normal))
        print(model.activate(model.alphas_reduce))

        # training
        train_acc, train_obj, alpha_threshold, alpha_counter, ewma = train(
            train_queue, valid_queue, model, architect, criterion, optimizer,
            loggers, alpha_threshold, alpha_counter, ewma, args)
        logging.info('train_acc %f', train_acc)

        # validation
        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        utils.log_loss(loggers["infer"], valid_obj, valid_acc, model.clock)
        logging.info('valid_acc %f', valid_acc)

        utils.plot_loss_acc(loggers, args.save)

        utils.save_file(recoder=model.alphas_normal_history,
                        path=os.path.join(args.save, 'Normalalpha'),
                        steps=loggers["train"]["step"])
        utils.save_file(recoder=model.alphas_reduce_history,
                        path=os.path.join(args.save, 'Reducealpha'),
                        steps=loggers["train"]["step"])

        utils.plot_FI(loggers["train"]["step"], model.FI_history, args.save,
                      "FI", loggers["ath"], loggers['astep'])
        utils.plot_FI(loggers["train"]["step"], model.FI_ewma_history,
                      args.save, "FI_ewma", loggers["ath"], loggers['astep'])

        utils.save(model, os.path.join(args.save, 'weights.pt'))

    genotype = model.genotype()
    logging.info('genotype = %s', genotype)

    f = open(os.path.join(args.save, 'genotype.txt'), "w")
    f.write(str(genotype))
    f.close()
Ejemplo n.º 12
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    genotype_path = os.path.join(utils.get_dir(), args.genotype_path,
                                 'genotype.txt')
    if os.path.isfile(genotype_path):
        with open(genotype_path, "r") as f:
            geno_raw = f.read()
            genotype = eval(geno_raw)
    else:
        genotype = eval("genotypes.%s" % args.arch)

    f = open(os.path.join(args.save, 'genotype.txt'), "w")
    f.write(str(genotype))
    f.close()

    model = Network(args.init_channels, CIFAR_CLASSES, args.layers,
                    args.auxiliary, genotype)
    model = model.cuda()

    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    datapath = os.path.join(utils.get_dir(), args.data)

    if args.task == "CIFAR100cf":
        train_transform, valid_transform = utils._data_transforms_cifar100(
            args)

        train_data = utils.CIFAR100C2F(root=datapath,
                                       train=True,
                                       download=True,
                                       transform=train_transform)
        valid_data = utils.CIFAR100C2F(root=datapath,
                                       train=False,
                                       download=True,
                                       transform=valid_transform)

        train_indices = train_data.filter_by_fine(args.eval_filter)
        valid_indices = valid_data.filter_by_fine(args.eval_filter)

        train_queue = torch.utils.data.DataLoader(
            train_data,
            batch_size=args.batch_size,
            sampler=torch.utils.data.sampler.SubsetRandomSampler(
                train_indices),
            pin_memory=True,
            num_workers=2)

        valid_queue = torch.utils.data.DataLoader(torch.utils.data.Subset(
            valid_data, valid_indices),
                                                  batch_size=args.batch_size,
                                                  shuffle=False,
                                                  pin_memory=True,
                                                  num_workers=2)

        # TODO: extend each epoch or multiply number of epochs by 20%*args.class_filter

    else:
        if args.task == "CIFAR100":
            train_transform, valid_transform = utils._data_transforms_cifar100(
                args)
            train_data = dset.CIFAR100(root=datapath,
                                       train=True,
                                       download=True,
                                       transform=train_transform)
            valid_data = dset.CIFAR100(root=datapath,
                                       train=False,
                                       download=True,
                                       transform=valid_transform)
        else:
            train_transform, valid_transform = utils._data_transforms_cifar10(
                args)
            train_data = dset.CIFAR10(root=datapath,
                                      train=True,
                                      download=True,
                                      transform=train_transform)
            valid_data = dset.CIFAR10(root=datapath,
                                      train=False,
                                      download=True,
                                      transform=valid_transform)

        train_queue = torch.utils.data.DataLoader(train_data,
                                                  batch_size=args.batch_size,
                                                  shuffle=True,
                                                  pin_memory=True,
                                                  num_workers=2)

        valid_queue = torch.utils.data.DataLoader(valid_data,
                                                  batch_size=args.batch_size,
                                                  shuffle=False,
                                                  pin_memory=True,
                                                  num_workers=2)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, args.epochs)

    for epoch in range(args.epochs):
        scheduler.step()
        logging.info('epoch %d lr %e', epoch, scheduler.get_last_lr()[0])
        model.drop_path_prob = args.drop_path_prob * epoch / args.epochs

        train_acc, train_obj = train(train_queue, model, criterion, optimizer)
        logging.info('train_acc %f', train_acc)

        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        logging.info('valid_acc %f', valid_acc)

        utils.save(model, os.path.join(args.save, 'weights.pt'))

    if args.test:
        torch.cuda.empty_cache()
        os.system(
            'python src/test.py --batch_size 8 --auxiliary --model_path %s --task %s --test_filter %s'
            % (os.path.join(args.save,
                            'weights.pt'), args.task, args.test_filter))
Ejemplo n.º 13
0
                    default='DARTS',
                    help='which architecture to use')
parser.add_argument('--grad_clip',
                    type=float,
                    default=5,
                    help='gradient clipping')
args = parser.parse_args()

if len(args.save) == 0:
    args.save = 'eval-{}-{}'.format(os.getenv('SLURM_JOB_ID'),
                                    time.strftime("%Y%m%d-%H%M%S"))

if args.genotype_path is not None:
    if "exp" not in args.genotype_path:
        args.genotype_path = os.path.join('exp', args.genotype_path)
    args.save = os.path.join(utils.get_dir(), args.genotype_path, args.save)
else:
    args.genotype_path = os.path.join('exp', args.genotype_path)
    args.save = os.path.join(utils.get_dir(), args.save)
utils.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py'))

log_format = '%(asctime)s %(message)s'
logging.basicConfig(stream=sys.stdout,
                    level=logging.INFO,
                    format=log_format,
                    datefmt='%m/%d %I:%M:%S %p')
fh = logging.FileHandler(os.path.join(args.save, 'log.txt'))
fh.setFormatter(logging.Formatter(log_format))
logging.getLogger().addHandler(fh)

if args.task == "CIFAR100":
Ejemplo n.º 14
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    genotype_path = os.path.join(utils.get_dir(), args.genotype_path, 'genotype.txt')
    if os.path.isfile(genotype_path):
        with open(genotype_path, "r") as f:
            geno_raw = f.read()
            genotype = eval(geno_raw)
    else:
        genotype = eval("genotypes.%s" % args.arch)

    f = open(os.path.join(args.save, 'genotype.txt'), "w")
    f.write(str(genotype))
    f.close()

    model = Network(args.init_channels, CLASSES, args.layers, args.auxiliary, genotype)
    if args.parallel:
        model = nn.DataParallel(model).cuda()
    else:
        model = model.cuda()

    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    criterion_smooth = CrossEntropyLabelSmooth(CLASSES, args.label_smooth)
    criterion_smooth = criterion_smooth.cuda()

    optimizer = torch.optim.SGD(
        model.parameters(),
        args.learning_rate,
        momentum=args.momentum,
        weight_decay=args.weight_decay
    )

    traindir = os.path.join(utils.get_dir(), args.data, 'train')
    validdir = os.path.join(utils.get_dir(), args.data, 'val')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    train_data = dset.ImageFolder(
        traindir,
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ColorJitter(
                brightness=0.4,
                contrast=0.4,
                saturation=0.4,
                hue=0.2),
            transforms.ToTensor(),
            normalize,
        ]))
    valid_data = dset.ImageFolder(
        validdir,
        transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
        ]))

    train_queue = torch.utils.data.DataLoader(
        train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=4)

    valid_queue = torch.utils.data.DataLoader(
        valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=4)

    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, args.decay_period, gamma=args.gamma)

    best_acc_top1 = 0
    for epoch in range(args.epochs):
        scheduler.step()
        logging.info('epoch %d lr %e', epoch, scheduler.get_last_lr()[0])
        model.drop_path_prob = args.drop_path_prob * epoch / args.epochs

        train_acc, train_obj = train(train_queue, model, criterion_smooth, optimizer)
        logging.info('train_acc %f', train_acc)

        valid_acc_top1, valid_acc_top5, valid_obj = infer(valid_queue, model, criterion)
        logging.info('valid_acc_top1 %f', valid_acc_top1)
        logging.info('valid_acc_top5 %f', valid_acc_top5)

        is_best = False
        if valid_acc_top1 > best_acc_top1:
            best_acc_top1 = valid_acc_top1
            is_best = True

        utils.save_checkpoint({
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'best_acc_top1': best_acc_top1,
            'optimizer': optimizer.state_dict(),
        }, is_best, args.save)

        if args.test:
            torch.cuda.empty_cache()
            os.system(
                'python src/test_imagenet.py --batch_size 8 --auxiliary --model_path %s ' %
                os.path.join(args.save, 'weights.pt'))
Ejemplo n.º 15
0
parser.add_argument('--arch_weight_decay',
                    type=float,
                    default=1e-3,
                    help='weight decay for arch encoding')
parser.add_argument('--rho',
                    type=float,
                    default=1e-3,
                    help='admm relative weight')
parser.add_argument('--admm_freq',
                    type=int,
                    default=10,
                    help='admm update frequency')
args = parser.parse_args()

args.save = os.path.join(
    utils.get_dir(), 'exp/admm-{}-{}'.format(os.getenv('SLURM_JOB_ID'),
                                             time.strftime("%Y%m%d-%H%M%S")))
utils.create_exp_dir(args.save, scripts_to_save=glob.glob('src/*.py'))

log_format = '%(asctime)s %(message)s'
logging.basicConfig(stream=sys.stdout,
                    level=logging.INFO,
                    format=log_format,
                    datefmt='%m/%d %I:%M:%S %p')
fh = logging.FileHandler(os.path.join(args.save, 'log.txt'))
fh.setFormatter(logging.Formatter(log_format))
logging.getLogger().addHandler(fh)

CIFAR_CLASSES = 10