コード例 #1
0
def dist_main(size, genotype):
    # parser = argparse.ArgumentParser()
    # parser.add_argument('rank', default=0, type=int)
    # parser.add_argument('genotype', default="", type=str)
    # parser.add_argument('size', default=7000, type=int)
    # args = parser.parse_args()

    genotype = json.loads(genotype)
    # torch.cuda.set_device(rank)
    #
    cudnn.benchmark = True
    cudnn.enabled = True

    # dist.init_process_group(backend="gloo", init_method="tcp://127.0.0.1:7000", world_size=size, rank=rank)
    model = NetworkCIFAR(
        # C=36,
        C=8,
        num_classes=10,
        layers=8,
        auxiliary=False,
        genotype=genotype,
    )
    model = torch.nn.parallel.DataParallel(model, device_ids=list(range(size))).cuda()

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    optimizer = torch.optim.SGD(
        model.parameters(),
        # lr=0.025,
        lr=0.01,
        momentum=0.9,
        weight_decay=3e-4,
    )

    train_transform, valid_transform = utils._data_transforms_cifar10(cutout=False, cutout_length=16)
    train_data = dset.CIFAR10(root="data/", train=True, download=True, transform=train_transform)
    valid_data = dset.CIFAR10(root="data/", train=False, download=True, transform=valid_transform)
    # train_sampler = torch.utils.data.distributed.DistributedSampler(train_data)

    train_queue = torch.utils.data.DataLoader(
        train_data, batch_size=64, shuffle=False, pin_memory=True, num_workers=2)
    # train_data, batch_size=16, shuffle=True, num_workers=2)

    valid_queue = torch.utils.data.DataLoader(
        valid_data, batch_size=64, shuffle=False, pin_memory=True, num_workers=2)
    # valid_data, batch_size=16, shuffle=False, num_workers=2)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, 50)
    prev_valid_acc = 0
    max_acc = 0

    for epoch in range(50):
        # train_sampler.set_epoch(epoch)
        scheduler.step()
        # print('epoch {} lr {}'.format(epoch, scheduler.get_lr()[0]))
        print('epoch {}'.format(epoch))
        drop_path_prob = 0.2 * epoch / 50

        train_acc, train_obj = train(train_queue, model, criterion, optimizer, drop_path_prob)
        print('train_acc {}'.format(train_acc))

        valid_acc, valid_obj = infer(valid_queue, model, criterion, drop_path_prob)
        print('valid_acc {}'.format(valid_acc))

        # if prev_valid_acc > valid_acc:
        #     return prev_valid_acc
        # prev_valid_acc = valid_acc
        if valid_acc > max_acc:
            max_acc = valid_acc

        utils.save(model, 'weights.pt')

    # print("type", type(valid_acc))
    return max_acc
コード例 #2
0
ファイル: train_eval.py プロジェクト: JamesTuna/ArchSearch
def main(genotype, index):
    genotype = json.loads(genotype)
    torch.cuda.set_device(index)
    #
    cudnn.benchmark = True
    cudnn.enabled = True

    model = NetworkCIFAR(
        C=36,
        num_classes=10,
        layers=20,
        auxiliary=False,
        genotype=genotype,
    )
    # print(model)
    model = model.cuda()

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    optimizer = torch.optim.SGD(
        model.parameters(),
        lr=0.025,
        momentum=0.9,
        weight_decay=3e-4,
    )

    train_transform, valid_transform = utils._data_transforms_cifar10(
        cutout=False, cutout_length=16)
    train_data = dset.CIFAR10(root="data/",
                              train=True,
                              download=True,
                              transform=train_transform)
    valid_data = dset.CIFAR10(root="data/",
                              train=False,
                              download=True,
                              transform=valid_transform)

    train_queue = torch.utils.data.DataLoader(train_data,
                                              batch_size=32,
                                              shuffle=True,
                                              pin_memory=True,
                                              num_workers=2)
    # train_data, batch_size=16, shuffle=True, num_workers=2)

    valid_queue = torch.utils.data.DataLoader(valid_data,
                                              batch_size=32,
                                              shuffle=False,
                                              pin_memory=True,
                                              num_workers=2)
    # valid_data, batch_size=16, shuffle=False, num_workers=2)

    # scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.5)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, 50)
    prev_valid_acc = 0
    max_acc = 0

    for epoch in range(50):
        scheduler.step()
        print('epoch {} lr {}'.format(epoch, scheduler.get_lr()[0]))
        print('epoch {}'.format(epoch))
        drop_path_prob = 0.2 * epoch / 50

        train_acc, train_obj = train(train_queue, model, criterion, optimizer,
                                     drop_path_prob)
        print('train_acc {}'.format(train_acc))

        valid_acc, valid_obj = infer(valid_queue, model, criterion,
                                     drop_path_prob)
        print('valid_acc {}'.format(valid_acc))

        # if prev_valid_acc > valid_acc:
        #     return prev_valid_acc
        # prev_valid_acc = valid_acc
        if valid_acc > max_acc:
            max_acc = valid_acc

        utils.save(model, 'weights.pt')

    # print("type", type(valid_acc))
    return max_acc