Exemple #1
0
def main():
    logger.info("Logger is set - training start")

    # set default gpu device id
    torch.cuda.set_device(config.gpus[0])

    # set seed
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)

    torch.backends.cudnn.benchmark = True

    # get data with meta info
    input_size, input_channels, n_classes, train_data, val_dat, test_dat = utils.get_data(
        config.dataset,
        config.data_path,
        cutout_length=0,
        validation=True,
        validation2=True,
        img_resize=config.img_resize)

    net_crit = nn.CrossEntropyLoss().to(device)
    model = SearchCNNController(input_channels,
                                config.init_channels,
                                n_classes,
                                config.layers,
                                net_crit,
                                device_ids=config.gpus)
    #comment if generating onnix graph
    model = model.to(device)

    # weights optimizer
    w_optim = torch.optim.SGD(model.weights(),
                              config.w_lr,
                              momentum=config.w_momentum,
                              weight_decay=config.w_weight_decay)
    # alphas optimizer
    alpha_optim = torch.optim.Adam(model.alphas(),
                                   config.alpha_lr,
                                   betas=(0.5, 0.999),
                                   weight_decay=config.alpha_weight_decay)

    #balanced split to train/validation
    print(train_data)

    # split data to train/validation
    n_train = len(train_data) // int(config.data_train_proportion)
    n_val = len(val_dat)
    n_test = len(test_dat)
    split = n_train // 2
    indices1 = list(range(n_train))
    indices2 = list(range(n_val))
    indices3 = list(range(n_test))
    train_sampler = torch.utils.data.sampler.SubsetRandomSampler(indices1)
    valid_sampler = torch.utils.data.sampler.SubsetRandomSampler(indices2)
    test_sampler = torch.utils.data.sampler.SubsetRandomSampler(indices3)

    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=config.batch_size,
                                               sampler=train_sampler,
                                               num_workers=config.workers,
                                               pin_memory=True)
    valid_loader = torch.utils.data.DataLoader(val_dat,
                                               batch_size=config.batch_size,
                                               sampler=valid_sampler,
                                               num_workers=config.workers,
                                               pin_memory=True)
    test_loader = torch.utils.data.DataLoader(test_dat,
                                              batch_size=config.batch_size,
                                              sampler=test_sampler,
                                              num_workers=config.workers,
                                              pin_memory=True)

    #load
    if (config.load):
        model, config.epochs, w_optim, alpha_optim, net_crit = utils.load_checkpoint(
            model, config.epochs, w_optim, alpha_optim, net_crit,
            '/content/MyDarts/searchs/custom/checkpoint.pth.tar')
    #uncomment if saving onnix graph
    """
    dummy_input = Variable(torch.randn(1, 3, 64, 64))
    torch.onnx.export(model, dummy_input, "rsdarts.onnx", verbose=True)
    input_np = np.random.uniform(0, 1, (1, 3, 64, 64))
    input_var = Variable(torch.FloatTensor(input_np))
    from pytorch2keras.converter import pytorch_to_keras
    # we should specify shape of the input tensor
    output = model(input_var)
    k_model = pytorch_to_keras(model, input_var, (3, 64, 64,), verbose=True)

    error = check_error(output, k_model, input_np)
    if max_error < error:
        max_error = error

    print('Max error: {0}'.format(max_error))
    a=2/0
    """
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        w_optim, config.epochs, eta_min=config.w_lr_min)
    architect = Architect(model, config.w_momentum, config.w_weight_decay)

    #model  = torch.load('/content/pt.darts/searchs/custom/checkpoint.pth.tar')

    #print("Loaded!")
    # training loop
    best_top1 = 0.
    best_top_overall = -999
    config.epochs = 300  #BUG, config epochs ta com algum erro
    for epoch in range(config.epochs):
        lr_scheduler.step()
        lr = lr_scheduler.get_lr()[0]

        model.print_alphas(logger)

        print("###################TRAINING#########################")
        # training
        #sample rs arch
        arch = sample_arch(model)
        #import pickle
        #arch = pickle.load( open( "best_arch.p", "rb" ) )
        train(train_loader, valid_loader, model, arch, w_optim, alpha_optim,
              lr, epoch)
        print("###################END TRAINING#########################")

        # validation
        cur_step = (epoch + 1) * len(train_loader)
        print("###################VALID#########################")
        top1, top_overall, _, _ = validate(valid_loader,
                                           model,
                                           arch,
                                           epoch,
                                           cur_step,
                                           overall=True)
        print("###################END VALID#########################")

        # test
        print("###################TEST#########################")
        _, _, preds, targets = validate(test_loader,
                                        model,
                                        arch,
                                        epoch,
                                        cur_step,
                                        overall=True,
                                        debug=True)
        s = [preds, targets]
        import pickle
        pickle.dump(s, open("predictions_" + str(epoch + 1) + ".p", "wb"))
        #print("predictions: ",preds)
        #print("targets:",targets)
        print("###################END TEST#########################")

        # log
        # genotype
        #print("Model Alpha:",model.alpha_normal)
        genotype = model.genotype()
        logger.info("genotype = {}".format(genotype))

        # genotype as a image
        plot_path = os.path.join(config.plot_path,
                                 "EP{:02d}".format(epoch + 1))
        caption = "Epoch {}".format(epoch + 1)
        print("Genotype normal:", genotype.normal)
        plot(genotype.normal, plot_path + "-normal", caption)
        plot(genotype.reduce, plot_path + "-reduce", caption)

        # save
        if best_top1 < top1:
            best_top1 = top1
            best_genotype = genotype
            best_arch = arch
            is_best = True
            import pickle
            pickle.dump(best_arch, open("best_arch.p", "wb"))
            print('best_arch:', best_arch)
            print("saved!")
        else:
            is_best = False
        #save best overall(macro avg of f1 prec and recall)
        if (best_top_overall < top_overall):
            best_top_overall = top_overall
            best_genotype_overall = genotype
            is_best_overall = True
        else:
            is_best_overall = False

        utils.save_checkpoint(model, epoch, w_optim, alpha_optim, net_crit,
                              config.path, is_best, is_best_overall)

    logger.info("Final best Prec@1 = {:.4%}".format(best_top1))
    logger.info("Best Genotype = {}".format(best_genotype))
    logger.info("Best Genotype Overall = {}".format(best_genotype_overall))
Exemple #2
0
def main():
    logger.info("Logger is set - training start")

    # set default gpu device id
    torch.cuda.set_device(config.gpus[0])

    # set seed
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)

    torch.backends.cudnn.benchmark = True

    # get data with meta info
    dahai_train_dataset = utils.MyDataset(data_dir=TRAIN_DATA_PATH, )
    dahai_dev_dataset = utils.MyDataset(data_dir=DEV_DAHAI_DATA_PATH, )
    # zhikang_test_dataset = utils.MyDataset(window_size=WINDOW_SIZE,
    #                                     window_step=WINDOW_STEP_DEV,
    #                                     data_path=TEST_ZHIKANG_DATA_PATH,
    #                                     voice_embed_path=TEST_ZHIKANG_VOICE_EMBEDDING_PATH,
    #                                     w2i=w2i,
    #                                     sent_max_len=SENT_MAX_LEN,
    #                                     )

    train_data = utils.DataProvider(batch_size=config.batch_size,
                                    dataset=dahai_train_dataset,
                                    is_cuda=config.is_cuda)
    dev_data = utils.DataProvider(batch_size=config.batch_size,
                                  dataset=dahai_dev_dataset,
                                  is_cuda=config.is_cuda)
    # test_data = utils.DataProvider(batch_size=config.batch_size, dataset=zhikang_test_dataset, is_cuda=config.is_cuda)

    print("train data nums:", len(train_data.dataset), "dev data nums:",
          len(dev_data.dataset))

    net_crit = nn.CrossEntropyLoss(reduction="none").to(device)
    model = SearchCNNController(config.embedding_dim,
                                config.init_channels,
                                config.n_classes,
                                config.layers,
                                net_crit,
                                config=config,
                                n_nodes=config.n_nodes,
                                device_ids=config.gpus)
    model = model.to(device).float()

    # weights optimizer
    w_optim = torch.optim.SGD(model.weights(),
                              config.w_lr,
                              momentum=config.w_momentum,
                              weight_decay=config.w_weight_decay)
    # alphas optimizer
    alpha_optim = torch.optim.Adam(model.alphas(),
                                   config.alpha_lr,
                                   betas=(0.5, 0.999),
                                   weight_decay=config.alpha_weight_decay)

    ######  余弦退火-调整学习率
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        w_optim, config.epochs, eta_min=config.w_lr_min)
    architect = Architect(model, config.w_momentum, config.w_weight_decay)

    # training loop
    best_acc = 0.
    best_genotype = model.genotype()
    while True:
        epoch = train_data.epoch
        if epoch > config.epochs - 1:
            break
        lr_scheduler.step()
        lr = lr_scheduler.get_lr()[0]

        model.print_alphas(logger)

        # training
        train(train_data, dev_data, epoch, model, architect, w_optim,
              alpha_optim, lr)

        # validation
        cur_step = train_data.iteration
        valid_acc = validate(dev_data, model, epoch, cur_step)

        # log
        # genotype
        genotype = model.genotype()
        logger.info("genotype = {}".format(genotype))

        # genotype as a image
        plot_path = os.path.join(config.plot_path,
                                 "EP{:02d}".format(epoch + 1))
        caption = "Epoch {}".format(epoch + 1)
        plot(genotype.normal, plot_path + "-normal", caption)
        plot(genotype.reduce, plot_path + "-reduce", caption)

        # save
        if best_acc < valid_acc:
            best_acc = valid_acc
            best_genotype = genotype
            is_best = True
        else:
            is_best = False
        utils.save_checkpoint(model, config.path, is_best)
        print("")

    logger.info("Final best Prec@1 = {:.4%}".format(best_acc))
    logger.info("Best Genotype = {}".format(best_genotype))
Exemple #3
0
def main():
    logger.info("Logger is set - training start")

    # set default gpu device id
    torch.cuda.set_device(config.gpus[0])

    # set seed
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)

    torch.backends.cudnn.benchmark = True

    # get data with meta info
    input_size, input_channels, n_classes, train_data = utils.get_data(
        config.dataset, config.data_path, cutout_length=0, validation=False)

    net_crit = nn.CrossEntropyLoss().to(device)
    model = SearchCNNController(input_channels,
                                config.init_channels,
                                n_classes,
                                config.layers,
                                net_crit,
                                device_ids=config.gpus)
    model = model.to(device)

    # weights optimizer
    w_optim = torch.optim.SGD(model.weights(),
                              config.w_lr,
                              momentum=config.w_momentum,
                              weight_decay=config.w_weight_decay)
    # alphas optimizer
    alpha_optim = torch.optim.Adam(model.alphas(),
                                   config.alpha_lr,
                                   betas=(0.5, 0.999),
                                   weight_decay=config.alpha_weight_decay)

    # split data to train/validation
    n_train = len(train_data)
    split = n_train // 2
    indices = list(range(n_train))
    train_sampler = torch.utils.data.sampler.SubsetRandomSampler(
        indices[:split])
    valid_sampler = torch.utils.data.sampler.SubsetRandomSampler(
        indices[split:])
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=config.batch_size,
                                               sampler=train_sampler,
                                               num_workers=config.workers,
                                               pin_memory=False)
    valid_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=config.batch_size,
                                               sampler=valid_sampler,
                                               num_workers=config.workers,
                                               pin_memory=False)
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        w_optim, config.epochs, eta_min=config.w_lr_min)
    architect = Architect(model, config.w_momentum, config.w_weight_decay)

    # training loop
    best_top1 = -1.0
    best_epoch = 0
    ################################ restore from last time #############################################
    epoch_restore = config.epoch_restore
    if config.restore:
        utils.load_state_dict(model,
                              config.path,
                              extra='model',
                              parallel=(len(config.gpus) > 1))
        if not config.model_only:
            utils.load_state_dict(w_optim,
                                  config.path,
                                  extra='w_optim',
                                  parallel=False)
            utils.load_state_dict(alpha_optim,
                                  config.path,
                                  extra='alpha_optim',
                                  parallel=False)
            utils.load_state_dict(lr_scheduler,
                                  config.path,
                                  extra='lr_scheduler',
                                  parallel=False)
            utils.load_state_dict(epoch_restore,
                                  config.path,
                                  extra='epoch_restore',
                                  parallel=False)
    #####################################################################################################
    for epoch in range(epoch_restore, config.epochs):
        lr_scheduler.step()
        lr = lr_scheduler.get_lr()[0]

        model.print_alphas(logger)

        # training
        train(train_loader, valid_loader, model, architect, w_optim,
              alpha_optim, lr, epoch)

        # validation
        cur_step = (epoch + 1) * len(train_loader)
        top1 = validate(valid_loader, model, epoch, cur_step)
        # top1 = 0.0

        # log
        # genotype
        genotype = model.genotype()
        logger.info("genotype = {}".format(genotype))

        # genotype as a image
        plot_path = os.path.join(config.plot_path,
                                 "EP{:02d}".format(epoch + 1))
        caption = "Epoch {}".format(epoch + 1)
        plot(genotype.normal, plot_path + "-normal", caption)
        plot(genotype.reduce, plot_path + "-reduce", caption)

        # save
        if best_top1 < top1:
            best_top1 = top1
            best_genotype = genotype
            is_best = True
            best_epoch = epoch + 1
        else:
            is_best = False
        utils.save_checkpoint(model, config.path, is_best)

        ######################################## save all state ###################################################
        utils.save_state_dict(model,
                              config.path,
                              extra='model',
                              is_best=is_best,
                              parallel=(len(config.gpus) > 1),
                              epoch=epoch + 1,
                              acc=top1,
                              last_state=((epoch + 1) >= config.epochs))
        utils.save_state_dict(lr_scheduler,
                              config.path,
                              extra='lr_scheduler',
                              is_best=is_best,
                              parallel=False,
                              epoch=epoch + 1,
                              acc=top1,
                              last_state=((epoch + 1) >= config.epochs))
        utils.save_state_dict(alpha_optim,
                              config.path,
                              extra='alpha_optim',
                              is_best=is_best,
                              parallel=False,
                              epoch=epoch + 1,
                              acc=top1,
                              last_state=((epoch + 1) >= config.epochs))
        utils.save_state_dict(w_optim,
                              config.path,
                              extra='w_optim',
                              is_best=is_best,
                              parallel=False,
                              epoch=epoch + 1,
                              acc=top1,
                              last_state=((epoch + 1) >= config.epochs))
        ############################################################################################################
        print("")
    logger.info("Best Genotype at {} epch.".format(best_epoch))
    logger.info("Final best Prec@1 = {:.4%}".format(best_top1))
    logger.info("Best Genotype = {}".format(best_genotype))
Exemple #4
0
def main():
    logger.info("Logger is set - training start")

    torch.cuda.set_device(config.gpus[0])

    # seed setting
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)

    torch.backends.cudnn.benchmark = True

    # get data with meta infomation
    input_size, input_channels, n_classes, train_data = utils.get_data(
        config.dataset, config.data_path, cutout_length=0, validation=False)

    # set model
    net_crit = nn.CrossEntropyLoss().to(device)
    model = SearchCNNController(input_channels,
                                config.init_channels,
                                n_classes,
                                config.layers,
                                net_crit,
                                n_nodes=config.nodes,
                                device_ids=config.gpus)
    model = model.to(device)

    # weight optim
    w_optim = torch.optim.SGD(model.weights(),
                              config.w_lr,
                              momentum=config.w_momentum,
                              weight_decay=config.alpha_weight_decay)

    # alpha optim
    alpha_optim = torch.optim.Adam(model.alphas(),
                                   config.alpha_lr,
                                   betas=(0.5, 0.999),
                                   weight_decay=config.alpha_weight_decay)

    # split data (train,validation)
    n_train = len(train_data)
    split = n_train // 2
    indices = list(range(n_train))
    train_sampler = torch.utils.data.sampler.SubsetRandomSampler(
        indices[:split])
    valid_sampler = torch.utils.data.sampler.SubsetRandomSampler(
        indices[split:])

    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=config.batch_size,
                                               sampler=train_sampler,
                                               num_workers=config.workers,
                                               pin_memory=True)
    valid_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=config.batch_size,
                                               sampler=valid_sampler,
                                               num_workers=config.workers,
                                               pin_memory=True)

    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        w_optim, config.epochs, eta_min=config.w_lr_min)

    arch = Architect(model, config.w_momentum, config.w_weight_decay)

    # training loop-----------------------------------------------------------------------------
    best_top1 = 0.
    for epoch in range(config.epochs):
        lr_scheduler.step()
        lr = lr_scheduler.get_lr()[0]

        model.print_alphas(logger)

        #training
        train(train_loader, valid_loader, model, arch, w_optim, alpha_optim,
              lr, epoch)

        #validation
        cur_step = (epoch + 1) * len(train_loader)
        top1 = validate(valid_loader, model, epoch, cur_step)

        #log
        #genotype
        genotype = model.genotype()
        logger.info("genotype = {}".format(genotype))

        # genotype as a image
        plot_path = os.path.join(config.plot_path,
                                 "EP{:02d}".format(epoch + 1))
        caption = "Epoch {}".format(epoch + 1)
        plot(genotype.normal, plot_path + "-normal", caption)
        plot(genotype.reduce, plot_path + "-reduce", caption)

        # output alpha per epochs to tensorboard data
        for i, tensor in enumerate(model.alpha_normal):
            for j, lsn in enumerate(F.softmax(tensor, dim=-1)):
                tb_writer.add_scalars(
                    'epoch_alpha_normal/%d ~~ %d' % ((j - 2), i), {
                        'max_pl3': lsn[0],
                        'avg_pl3': lsn[1],
                        'skip_cn': lsn[2],
                        'sep_conv3': lsn[3],
                        'sep_conv5': lsn[4],
                        'dil_conv3': lsn[5],
                        'dil_conv5': lsn[6],
                        'none': lsn[7]
                    }, epoch)
        for i, tensor in enumerate(model.alpha_reduce):
            for j, lsr in enumerate(F.softmax(tensor, dim=-1)):
                tb_writer.add_scalars(
                    'epoch_alpha_reduce/%d ~~ %d' % ((j - 2), i), {
                        'max_pl3': lsr[0],
                        'avg_pl3': lsr[1],
                        'skip_cn': lsr[2],
                        'sep_conv3': lsr[3],
                        'sep_conv5': lsr[4],
                        'dil_conv3': lsr[5],
                        'dil_conv5': lsr[6],
                        'none': lsr[7]
                    }, epoch)

        #save
        if best_top1 < top1:
            best_top1 = top1
            best_genotype = genotype
            is_best = True
        else:
            is_best = False
        utils.save_checkpoint(model, config.path, is_best)
        print("")

    logger.info("Final best Prec@1 = {:.4%}".format(best_top1))
    logger.info("Best Genotype is = {}".format(best_genotype))
Exemple #5
0
def main():
    logger.info("Logger is set - training start")

    # set default gpu device id
    torch.cuda.set_device(config.gpus[0])

    # get data with meta info
    input_size, input_channels, n_classes, train_data = utils.get_data(
        config.dataset, config.data_path, cutout_length=0, validation=False)

    net_crit = nn.CrossEntropyLoss().to(device)

    # set seed
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)
    model_1 = SearchCNNController(input_channels,
                                  config.init_channels,
                                  n_classes,
                                  config.layers,
                                  net_crit,
                                  device_ids=config.gpus)

    torch.manual_seed(config.seed + 1)
    torch.cuda.manual_seed_all(config.seed + 1)
    model_2 = SearchCNNController(input_channels,
                                  config.init_channels,
                                  n_classes,
                                  config.layers,
                                  net_crit,
                                  device_ids=config.gpus)

    torch.backends.cudnn.benchmark = True

    model_1 = model_1.to(device)
    model_2 = model_2.to(device)

    # weights optimizer
    w_optim_1 = torch.optim.SGD(model_1.weights(),
                                config.w_lr,
                                momentum=config.w_momentum,
                                weight_decay=config.w_weight_decay)
    # alphas optimizer
    alpha_optim_1 = torch.optim.Adam(model_1.alphas(),
                                     config.alpha_lr,
                                     betas=(0.5, 0.999),
                                     weight_decay=config.alpha_weight_decay)

    # weights optimizer
    w_optim_2 = torch.optim.SGD(model_2.weights(),
                                config.w_lr,
                                momentum=config.w_momentum,
                                weight_decay=config.w_weight_decay)
    # alphas optimizer
    alpha_optim_2 = torch.optim.Adam(model_2.alphas(),
                                     config.alpha_lr,
                                     betas=(0.5, 0.999),
                                     weight_decay=config.alpha_weight_decay)

    # split data to train/validation
    n_train = len(train_data)
    split = n_train // 2
    indices = list(range(n_train))
    train_sampler = torch.utils.data.sampler.SubsetRandomSampler(
        indices[:split])
    valid_sampler = torch.utils.data.sampler.SubsetRandomSampler(
        indices[split:])
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=config.batch_size,
                                               sampler=train_sampler,
                                               num_workers=config.workers,
                                               pin_memory=True)
    valid_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=config.batch_size,
                                               sampler=valid_sampler,
                                               num_workers=config.workers,
                                               pin_memory=True)
    lr_scheduler_1 = torch.optim.lr_scheduler.CosineAnnealingLR(
        w_optim_1, config.epochs, eta_min=config.w_lr_min)
    lr_scheduler_2 = torch.optim.lr_scheduler.CosineAnnealingLR(
        w_optim_2, config.epochs, eta_min=config.w_lr_min)
    architect = Architect(model_1, model_2, config.w_momentum,
                          config.w_weight_decay)

    # training loop
    best_top1_1 = 0.
    best_top1_2 = 0.
    for epoch in range(config.epochs):
        lr_scheduler_1.step()
        lr_1 = lr_scheduler_1.get_lr()[0]
        lr_scheduler_2.step()
        lr_2 = lr_scheduler_2.get_lr()[0]

        model_1.print_alphas(logger)
        model_2.print_alphas(logger)

        # training
        train(train_loader, valid_loader, model_1, model_2, architect,
              w_optim_1, w_optim_2, alpha_optim_1, alpha_optim_2, lr_1, lr_2,
              epoch, config.lmbda)

        # validation
        cur_step = (epoch + 1) * len(train_loader)
        top1_1, top1_2 = validate(valid_loader, model_1, model_2, epoch,
                                  cur_step)

        # log
        # genotype
        genotype_1 = model_1.genotype()
        genotype_2 = model_2.genotype()
        logger.info("genotype_1 = {}".format(genotype_1))
        logger.info("genotype_2 = {}".format(genotype_2))

        # genotype as a image
        # plot_path = os.path.join(config.plot_path, "EP{:02d}".format(epoch+1))
        # caption = "Epoch {}".format(epoch+1)
        # plot(genotype_1.normal, plot_path + "-normal", caption)
        # plot(genotype_1.reduce, plot_path + "-reduce", caption)
        # plot(genotype_2.normal, plot_path + "-normal", caption)
        # plot(genotype_2.reduce, plot_path + "-reduce", caption)

        # save
        if best_top1_1 < top1_1:
            best_top1_1 = top1_1
            best_genotype_1 = genotype_1
            is_best_1 = True
        else:
            is_best_1 = False

        if best_top1_2 < top1_2:
            best_top1_2 = top1_2
            best_genotype_2 = genotype_2
            is_best_2 = True
        else:
            is_best_2 = False

        utils.save_checkpoint(model_1, config.path, 1, is_best_1)
        utils.save_checkpoint(model_2, config.path, 2, is_best_2)
        print("")

    logger.info("Final best Prec@1_1 = {:.4%}".format(best_top1_1))
    logger.info("Best Genotype_1 = {}".format(best_genotype_1))
    logger.info("Final best Prec@1_2 = {:.4%}".format(best_top1_2))
    logger.info("Best Genotype_2 = {}".format(best_genotype_2))
Exemple #6
0
def main():
    logger.info("Logger is set - training start")

    # set default gpu device id
    torch.cuda.set_device(config.gpus[0])

    # set seed
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)

    torch.backends.cudnn.benchmark = True

    # get data with meta info
    input_size, input_channels, n_classes, train_data = utils.get_data(
        config.dataset, config.data_path, cutout_length=0, validation=False)

    if config.ops_set == 2:
        gt.PRIMITIVES = gt.PRIMITIVES2

    if config.ops_set == 3:
        gt.PRIMITIVES = gt.PRIMITIVES_NO_SKIP

    if config.smart_sample:
        gt.smart_sample = True

    """ Initialize the distributed environment. """
    if config.multi_avg_size > 0:
        init_processes(config.multi_avg_rank, config.multi_avg_size, backend='Gloo')

    net_crit = nn.CrossEntropyLoss().to(device)
    model = SearchCNNController(input_channels, config.init_channels, n_classes, config.layers,
                                net_crit, n_nodes = config.n_nodes, device_ids=config.gpus, proxyless = config.proxyless)
    model = model.to(device)

    # weights optimizer
    w_optim = torch.optim.SGD(model.weights(), config.w_lr, momentum=config.w_momentum,
                              weight_decay=config.w_weight_decay)
    # alphas optimizer
    alpha_optim = torch.optim.Adam(model.alphas(), config.alpha_lr, betas=(0.5, 0.999),
                                   weight_decay=config.alpha_weight_decay)

    # split data to train/validation
    n_train = len(train_data)
    split = n_train // 2
    indices = list(range(n_train))
    train_sampler = torch.utils.data.sampler.SubsetRandomSampler(indices[:split])
    valid_sampler = torch.utils.data.sampler.SubsetRandomSampler(indices[split:])
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=config.batch_size,
                                               sampler=train_sampler,
                                               num_workers=config.workers,
                                               pin_memory=True)
    valid_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=config.batch_size,
                                               sampler=valid_sampler,
                                               num_workers=config.workers,
                                               pin_memory=True)
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        w_optim, config.epochs, eta_min=config.w_lr_min)
    architect = Architect(model, config.w_momentum, config.w_weight_decay, config.hv_type)

    # training loop
    best_top1 = 0.
    for epoch in range(config.epochs):
        lr_scheduler.step()
        lr = lr_scheduler.get_lr()[0]

        model.print_alphas(logger)

        # training
        train(train_loader, valid_loader, model, architect, w_optim, alpha_optim, lr, epoch)

        # validation
        cur_step = (epoch+1) * len(train_loader)
        top1 = validate(valid_loader, model, epoch, cur_step)

        # log
        # genotype
        genotype = model.genotype()
        logger.info("genotype = {}".format(genotype))

        # genotype as a image
        plot_path = os.path.join(config.plot_path, "EP{:02d}".format(epoch+1))
        caption = "Epoch {}".format(epoch+1)
        #plot(genotype.normal, plot_path + "-normal", caption)
        #plot(genotype.reduce, plot_path + "-reduce", caption)

        # save
        if best_top1 < top1:
            best_top1 = top1
            best_genotype = genotype
            is_best = True
        else:
            is_best = False
        utils.save_checkpoint(model, config.path, is_best)
        print("")

    logger.info("Final best Prec@1 = {:.4%}".format(best_top1))
    logger.info("Best Genotype = {}".format(best_genotype))
Exemple #7
0
def main():
    logger.info("Logger is set - training start")

    # set default gpu device id
    # torch.cuda.set_device(config.gpus[0])

    # set seed
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    # torch.cuda.manual_seed_all(config.seed)

    torch.backends.cudnn.benchmark = True

    # get data with meta info
    input_size, input_channels, n_classes, train_data = utils.get_data(
        config.dataset, config.data_path, cutout_length=0, validation=False
    )

    net_crit = nn.CrossEntropyLoss().to(device)
    model = SearchCNNController(
        input_channels,
        config.init_channels,
        n_classes,
        config.layers,
        net_crit,
        device_ids=config.gpus,
        imagenet_mode=config.dataset.lower() in utils.LARGE_DATASETS,
    )
    model = model.to(device)

    # weights optimizer
    w_optim = torch.optim.SGD(
        model.weights(),
        config.w_lr,
        momentum=config.w_momentum,
        weight_decay=config.w_weight_decay,
    )
    # alphas optimizer
    alpha_optim = torch.optim.Adam(
        model.alphas(),
        config.alpha_lr,
        betas=(0.5, 0.999),
        weight_decay=config.alpha_weight_decay,
    )

    # split data to train/validation
    n_train = len(train_data)
    split = n_train // 2
    indices = list(range(n_train))
    random.shuffle(indices)
    train_sampler = torch.utils.data.sampler.SubsetRandomSampler(indices[:split])
    valid_sampler = torch.utils.data.sampler.SubsetRandomSampler(indices[split:])
    train_loader = torch.utils.data.DataLoader(
        train_data,
        batch_size=config.batch_size,
        sampler=train_sampler,
        num_workers=config.workers,
        pin_memory=True,
    )
    valid_loader = torch.utils.data.DataLoader(
        train_data,
        batch_size=config.batch_size,
        sampler=valid_sampler,
        num_workers=config.workers,
        pin_memory=True,
    )
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        w_optim, config.epochs, eta_min=config.w_lr_min
    )
    architect = Architect(model, config.w_momentum, config.w_weight_decay)

    # training loop
    best_top1 = 0.0
    for epoch in range(config.epochs):
        lr_scheduler.step()
        lr = lr_scheduler.get_lr()[0]

        model.print_alphas(logger)

        # training
        train(
            train_loader, valid_loader, model, architect, w_optim, alpha_optim, lr, epoch
        )

        # validation
        cur_step = (epoch + 1) * len(train_loader)
        top1 = validate(valid_loader, model, epoch, cur_step)

        # log
        # genotype
        genotype = model.genotype()
        logger.info("genotype = {}".format(genotype))

        # genotype as a image
        plot_path = os.path.join(config.plot_path, "EP{:02d}".format(epoch + 1))
        caption = "Epoch {}".format(epoch + 1)
        plot(genotype.normal, plot_path + "-normal", caption)
        plot(genotype.reduce, plot_path + "-reduce", caption)

        # save
        if best_top1 < top1:
            best_top1 = top1
            best_genotype = genotype
            is_best = True
        else:
            is_best = False
        utils.save_checkpoint(model, config.path, is_best)
        print("")

    # restrict skip-co
    count = 0
    indices = []
    for i in range(4):
        _, primitive_indices = torch.topk(model.alpha_normal[i][:, :], 1)
        for j in range(2 + i):
            if primitive_indices[j].item() == 2:
                count = count + 1
                indices.append((i, j))

    while count > 2:
        alpha_min, indice_min = model.alpha_normal[indices[0][0]][indices[0][1], 2], 0
        for i in range(1, count):
            alpha_c = model.alpha_normal[indices[i][0]][indices[i][1], 2]
            if alpha_c < alpha_min:
                alpha_min, indice_min = alpha_c, i
        model.alpha_normal[indices[indice_min][0]][indices[indice_min][1], 2] = 0
        indices.pop(indice_min)
        print(indices)
        count = count - 1

    best_genotype = model.genotype()

    with open(config.path + "/best_genotype.txt", "w") as f:
        f.write(str(best_genotype))
    logger.info("Final best Prec@1 = {:.4%}".format(best_top1))
    logger.info("Best Genotype = {}".format(best_genotype))
Exemple #8
0
def main():
    logger.info("Logger is set - training start")
    fileRoot = r'/home/hlu/Data/VIPL'
    saveRoot = r'/home/hlu/Data/VIPL_STMap' + str(config.fold_num) + str(
        config.fold_index)
    n_classes = 1
    input_channels = 3
    # set default gpu device id
    torch.cuda.set_device(config.gpus[0])
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    toTensor = transforms.ToTensor()
    resize = transforms.Resize(size=(64, 300))
    # set seed
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)

    torch.backends.cudnn.benchmark = True  # 网络加速

    if config.reData == 1:
        test_index, train_index = MyDataset.CrossValidation(
            fileRoot, fold_num=config.fold_num, fold_index=config.fold_index)
    train_data = MyDataset.Data_STMap(root_dir=(saveRoot + '_Train'),
                                      frames_num=300,
                                      transform=transforms.Compose(
                                          [resize, toTensor, normalize]))
    net_crit = nn.L1Loss().to(device)
    model = SearchCNNController(input_channels,
                                config.init_channels,
                                n_classes,
                                config.layers,
                                net_crit,
                                device_ids=config.gpus)
    model._init_weight()
    model = model.to(device)
    # weights optimizer
    w_optim = torch.optim.SGD(model.weights(),
                              config.w_lr,
                              momentum=config.w_momentum,
                              weight_decay=config.w_weight_decay)
    # w_optim = torch.optim.Adam(model.weights(), config.w_lr)
    # alphas optimizer
    alpha_optim = torch.optim.Adam(model.alphas(),
                                   config.alpha_lr,
                                   betas=(0.5, 0.999),
                                   weight_decay=config.alpha_weight_decay)

    # split data to train/validation
    n_train = len(train_data)
    split = n_train // 2
    indices = list(range(n_train))
    train_sampler = torch.utils.data.sampler.SubsetRandomSampler(
        indices[:split])
    valid_sampler = torch.utils.data.sampler.SubsetRandomSampler(
        indices[split:])
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=config.batch_size,
                                               sampler=train_sampler,
                                               num_workers=config.workers,
                                               pin_memory=True)
    valid_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=config.batch_size,
                                               sampler=valid_sampler,
                                               num_workers=config.workers,
                                               pin_memory=True)
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        w_optim, config.epochs, eta_min=config.w_lr_min)
    architect = Architect(model, config.w_momentum, config.w_weight_decay)
    # training loop
    best_losses = 100
    for epoch in range(config.epochs):
        lr_scheduler.step()
        lr = lr_scheduler.get_lr()[0]
        model.print_alphas(logger)
        # training
        train(train_loader, valid_loader, model, architect, w_optim,
              alpha_optim, lr, epoch)
        # validation
        cur_step = (epoch + 1) * len(train_loader)
        losses = validate(valid_loader, model, epoch, cur_step)
        # log
        # genotype
        genotype = model.genotype()
        logger.info("genotype = {}".format(genotype))

        # save
        if losses < best_losses:
            best_losses = losses
            best_genotype = genotype
            is_best = True
        else:
            is_best = False
        utils.save_checkpoint(model, config.path, is_best)
        print("")

    logger.info("Best Genotype = {}".format(best_genotype))
Exemple #9
0
async def main():
    logger.info("Logger is set - training start")

    # set default gpu device id
    torch.cuda.set_device(config.gpus[0])

    # set seed
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)

    torch.backends.cudnn.benchmark = True

    # get data with meta info
    input_size, input_channels, n_classes, train_data = utils.get_data(
        config.dataset, config.data_path, cutout_length=0, validation=False)

    net_crit = nn.CrossEntropyLoss().to(default_device)
    model = SearchCNNController(input_channels,
                                config.init_channels,
                                n_classes,
                                config.layers,
                                net_crit,
                                device_ids=config.gpus)
    model = model.to(default_device)

    # weights optimizer
    w_optim = torch.optim.SGD(model.weights(),
                              config.w_lr,
                              momentum=config.w_momentum,
                              weight_decay=config.w_weight_decay)
    # alphas optimizer
    alpha_optim = torch.optim.Adam(model.alphas(),
                                   config.alpha_lr,
                                   betas=(0.5, 0.999),
                                   weight_decay=config.alpha_weight_decay)

    # split data to train/validation
    n_train = len(train_data)
    split = n_train // 2
    indices = list(range(n_train))
    train_sampler = torch.utils.data.sampler.SubsetRandomSampler(
        indices[:split])
    valid_sampler = torch.utils.data.sampler.SubsetRandomSampler(
        indices[split:])
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=config.batch_size,
                                               sampler=train_sampler,
                                               num_workers=config.workers,
                                               pin_memory=True)
    valid_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=config.batch_size,
                                               sampler=valid_sampler,
                                               num_workers=config.workers,
                                               pin_memory=True)

    for idx, (data, target) in enumerate(train_loader):
        wid = idx % len(workers)
        data = data.send(workers[wid])
        target = target.send(workers[wid])
        remote_train_data[wid].append((data, target))

    for idx, (data, target) in enumerate(valid_loader):
        wid = idx % len(workers)
        data = data.send(workers[wid])
        target = target.send(workers[wid])
        remote_valid_data[wid].append((data, target))

    print("finish sampler")

    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        w_optim, config.epochs, eta_min=config.w_lr_min)
    architect = Architect(model, config.w_momentum, config.w_weight_decay)

    # training loop
    best_top1 = 0.
    for epoch in range(config.epochs):
        lr_scheduler.step()
        lr = lr_scheduler.get_lr()[0]

        model.print_alphas(logger)

        # training
        await train(train_loader, valid_loader, model, architect, w_optim,
                    alpha_optim, lr, epoch)

        # validation
        cur_step = (epoch + 1) * len(train_loader)
        top1 = validate(valid_loader, model, epoch, cur_step)

        # log
        # genotype
        genotype = model.genotype()
        logger.info("genotype = {}".format(genotype))

        # genotype as a image
        plot_path = os.path.join(config.plot_path,
                                 "EP{:02d}".format(epoch + 1))
        caption = "Epoch {}".format(epoch + 1)
        plot(genotype.normal, plot_path + "-normal", caption)
        plot(genotype.reduce, plot_path + "-reduce", caption)

        # save
        if best_top1 < top1:
            best_top1 = top1
            best_genotype = genotype
            is_best = True
        else:
            is_best = False
        utils.save_checkpoint(model, config.path, is_best)

    logger.info("Final best Prec@1 = {:.4%}".format(best_top1))
    logger.info("Best Genotype = {}".format(best_genotype))
Exemple #10
0
def main(config, writer, logger):
    logger.info("Logger is set - training start")

    # set seed
    random.seed(config.seed)
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

    # get data with meta info
    input_size, input_channels, n_classes, train_data, test_data = utils.get_data(
        config.dataset, config.data_path, cutout_length=config.cutout_length, validation=True)

    net_crit = nn.CrossEntropyLoss().cuda()
    model = SearchCNNController(input_channels, config.init_channels, n_classes, config.n_layers, net_crit,
                                n_nodes=config.n_nodes, stem_multiplier=config.stem_multiplier,
                                bn_momentum=config.bn_momentum)
    model.cuda()

    # weights optimizer
    w_optim = optim.SGD(model.weights(), config.w_lr, momentum=config.w_momentum,
                        weight_decay=config.w_weight_decay)

    if not config.search_all_alpha:
        # alphas optimizer
        alpha_optim = optim.Adam(model.alphas(), config.alpha_lr, betas=(0.5, 0.999),
                                 weight_decay=config.alpha_weight_decay)
        # split data to train/validation
        n_train = len(train_data)
        indices = list(range(n_train))
        split = n_train // 2
        train_sampler = torch.utils.data.sampler.SubsetRandomSampler(indices[:split])
        valid_sampler = torch.utils.data.sampler.SubsetRandomSampler(indices[split:])
        train_loader = torch.utils.data.DataLoader(train_data,
                                                   batch_size=config.batch_size,
                                                   sampler=train_sampler,
                                                   num_workers=config.workers,
                                                   pin_memory=True)
        valid_loader = torch.utils.data.DataLoader(train_data,
                                                   batch_size=config.batch_size,
                                                   sampler=valid_sampler,
                                                   num_workers=config.workers,
                                                   pin_memory=True)
    else:
        alpha_optim = SubgraphSearchOptimizer(logger, config, model, w_optim)

        train_loader = torch.utils.data.DataLoader(train_data,
                                                   batch_size=config.batch_size,
                                                   num_workers=config.workers,
                                                   pin_memory=True,
                                                   shuffle=True)
        valid_loader = torch.utils.data.DataLoader(test_data,
                                                   batch_size=config.batch_size,
                                                   num_workers=config.workers,
                                                   pin_memory=True,
                                                   shuffle=True)

    if config.w_lr_scheduler == "cosine":
        lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(w_optim, T_max=config.epochs, eta_min=config.w_lr_min)
    elif config.w_lr_scheduler == "plateau":
        lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(w_optim, mode="max", patience=config.w_lr_patience,
                                                            factor=config.w_lr_factor, verbose=True)
    else:
        raise NotImplementedError
    architect = Architect(model, config.w_momentum, config.w_weight_decay)

    # training loop
    best_top1 = 0.
    best_genotype = None
    final_result_reported = False
    for epoch in range(config.epochs):

        if config.cutoff_epochs is not None and epoch >= config.cutoff_epochs:
            logger.info("Cutoff epochs detected, exiting.")
            break

        lr = w_optim.param_groups[0]["lr"]
        logger.info("Current learning rate: {}".format(lr))

        if lr < config.w_lr_min:
            logger.info("Learning rate is less than {}, exiting.".format(config.w_lr_min))
            break

        if not config.search_all_alpha:
            model.print_alphas(logger)
            valid_loader_for_training = valid_loader
        else:
            # make dummy input
            valid_loader_for_training = itertools.cycle([(torch.tensor(1), torch.tensor(1))])

        # training
        train(config, writer, logger, train_loader, valid_loader_for_training,
              model, architect, w_optim, alpha_optim, lr, epoch, valid_loader)

        if config.w_lr_scheduler == "cosine":
            lr_scheduler.step()

        # validation
        if config.validate_epochs == 0 or (epoch + 1) % config.validate_epochs != 0:
            logger.info("Valid: Skipping validation for epoch {}".format(epoch + 1))
            continue

        cur_step = (epoch + 1) * len(train_loader)
        if config.search_all_alpha:
            top1 = validate_all(config, writer, logger, valid_loader, model, epoch, cur_step, alpha_optim)
            if best_top1 < top1:
                best_top1 = top1

            # checkpoint saving is not supported yet
        else:
            top1 = validate(config, writer, logger, valid_loader, model, epoch, cur_step)

            # log
            # genotype
            genotype = model.genotype()
            logger.info("genotype = {}".format(genotype))

            # genotype as a image
            plot_path = os.path.join(config.plot_path, "EP{:02d}".format(epoch + 1))
            caption = "Epoch {}".format(epoch + 1)
            plot(genotype.normal, plot_path + "-normal", caption)

            # save
            if best_top1 < top1:
                best_top1 = top1
                best_genotype = genotype
                is_best = True
            else:
                is_best = False
            utils.save_checkpoint(model, config.path, is_best)

        if config.nni:
            nni_tools.report_result(top1, epoch + 1 == config.epochs)
            if epoch + 1 == config.epochs:
                final_result_reported = True

        if config.w_lr_scheduler == "plateau":
            lr_scheduler.step(top1)
        print("")

    if config.nni and not final_result_reported:
        try:
            nni_tools.report_result(top1, True)
        except:
            logger.warning("Final result not reported and top1 not found")

    logger.info("Final best Prec@1 = {:.4%}".format(best_top1))
    if best_genotype is not None:
        logger.info("Best Genotype = {}".format(best_genotype))
    print("")