Beispiel #1
0
def train(model, early_stopping):
    optimizer = optim.Adam(model.parameters(), lr=1e-3)

    # data_iter = data_loader.get_loader(batch_size=args.batch_size)
    data_iter = data_loader.get_train_loader(batch_size=args.batch_size)

    for epoch in range(args.epochs):
        model.train()

        run_loss = 0.0

        for idx, data in enumerate(data_iter):
            data = utils.to_var(data)
            ret = model.run_on_batch(data, optimizer, epoch)

            run_loss += ret['loss'].item()

            print('\r Progress epoch {}, {:.2f}%, average loss {}'.format(
                epoch, (idx + 1) * 100.0 / len(data_iter),
                run_loss / (idx + 1.0)))

        test_data_iter = data_loader.get_test_loader(
            batch_size=args.batch_size)
        valid_loss = evaluate(model, test_data_iter)

        # early stop
        early_stopping(valid_loss, model)

        if early_stopping.early_stop:
            print("Early stopping")
            break
Beispiel #2
0
def train(model):
    optimizer = optim.Adam(model.parameters(), lr=1e-3)

    data_iter = data_loader.get_train_loader(batch_size=args.batch_size)
    val_iter = data_loader.get_val_loader(batch_size=args.batch_size)

    for epoch in range(args.epochs):
        model.train()

        if epoch % 100 == 0:
            print('Save checkpoint')
            torch.save(model, './result/models/model_{}_'.format(epoch) \
                       + args.exp_name + '.pth')

        run_loss = 0.0

        for idx, data in enumerate(data_iter):
            data = utils.to_var(data)
            ret = model.run_on_batch(data, optimizer, epoch)

            run_loss += ret['loss'].item()

            print('\r Progress epoch {}, {:.2f}%, average loss {}'.format(epoch, (idx + 1) * 100.0 / len(data_iter), run_loss / (idx + 1.0))),

        if epoch % 10 == 0:
            evaluate(model, val_iter)
Beispiel #3
0
def run(config):
    kwargs = {}
    if config.use_gpu:
        # ensure reproducibility
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        torch.manual_seed(0)
        np.random.seed(0)
        kwargs = {'num_workers': config.num_workers}

    # instantiate data loaders
    if config.is_train:
        data_loader = get_train_loader(config.data_dir,
                                       config.batch_size,
                                       is_shuffle=True**kwargs)
    else:
        data_loader = get_test_loader(config.data_dir,
                                      config.batch_size,
                                      is_shuffle=False,
                                      **kwargs)
    # instantiate trainer
    trainer = Trainer(config, data_loader)

    # either train
    if config.is_train:
        trainer.train()
    # or load a pretrained model and test
    else:
        trainer.test()
def get_experiment_dataloaders(experiment_name: str,
                               data_dir: str = './data/'):

    first_level = get_num_levels(experiment_name)[0]
    config = get_experiment_config(experiment_name, first_level)
    config = config_dict_to_namespace(config)
    config.data_dir = os.path.join(data_dir, config.dataset)

    torch.manual_seed(config.random_seed)
    kwargs = {}
    if not config.disable_cuda and torch.cuda.is_available():
        use_gpu = True
        torch.cuda.manual_seed_all(config.random_seed)
        kwargs = {
            'num_workers': config.num_workers,
            'pin_memory': config.pin_memory
        }
    else:
        use_gpu = False

    data_dict = get_dataset(config.dataset, config.data_dir, 'test')
    kwargs.update(data_dict)
    config.num_classes = data_dict['num_classes']
    test_loader = get_test_loader(batch_size=config.batch_size, **kwargs)

    if 'cifar' in config.dataset:
        valid_loader = test_loader

    else:
        valid_loader = get_test_loader(batch_size=config.batch_size, **kwargs)

    if config.is_train:
        data_dict = get_dataset(config.dataset, config.data_dir, 'train')
        teachers = []
        kwargs.update(data_dict)
        train_loader = get_train_loader(batch_size=config.batch_size,
                                        padding=config.padding,
                                        padding_mode=config.padding_mode,
                                        random_seed=config.random_seed,
                                        shuffle=config.shuffle,
                                        model_num=len(config.model_names),
                                        teachers=teachers,
                                        cuda=use_gpu,
                                        **kwargs)
    else:
        train_loader = None

    return train_loader, valid_loader, test_loader
Beispiel #5
0
def train(model, fine_tune, pseudo, num_epochs=100, data_sets=None):
    init_lr = 0.0001
    criterion = nn.BCELoss()

    if fine_tune:
        arch = model.name

        if arch.startswith('resnet') or arch.startswith("inception"):
            dense_layers = model.fc
        elif arch.startswith("densenet") or arch.startswith("vgg"):
            dense_layers = model.classifier
        else:
            raise Exception('unknown model')

        optimizer_ft = optim.SGD(dense_layers.parameters(),
                                 lr=init_lr,
                                 momentum=0.9)
        init_lr = 0.001
    else:
        optimizer_ft = optim.SGD(model.parameters(), lr=init_lr, momentum=0.9)

    max_num = 2
    if pseudo:
        pseudo_data, valid_data = data_sets
        data_loaders = {
            'train': data_loader.get_pseudo_train_loader(model, pseudo_data),
            'valid': data_loader.get_val_loader(model, valid_data)
        }
        max_num += 2
    else:
        train_data, valid_data = data_sets
        data_loaders = {
            'train': data_loader.get_train_loader(model, train_data),
            'valid': data_loader.get_val_loader(model, valid_data)
        }

    model = train_model(model,
                        criterion,
                        optimizer_ft,
                        lr_scheduler,
                        max_num=max_num,
                        init_lr=init_lr,
                        num_epochs=num_epochs,
                        data_loaders=data_loaders,
                        fine_tune=fine_tune,
                        pseudo=pseudo)
    return model
Beispiel #6
0
def main(config):

    # ensure directories are setup
    prepare_dirs(config)

    # ensure reproducibility
    #torch.manual_seed(config.random_seed)
    kwargs = {}
    if config.use_gpu:
        #torch.cuda.manual_seed_all(config.random_seed)
        kwargs = {
            'num_workers': config.num_workers,
            'pin_memory': config.pin_memory
        }
        #torch.backends.cudnn.deterministic = True

    # instantiate data loaders
    test_data_loader = get_test_loader(config.data_dir, config.batch_size,
                                       **kwargs)

    if config.is_train:
        train_data_loader = get_train_loader(config.data_dir,
                                             config.batch_size,
                                             config.random_seed,
                                             config.shuffle, **kwargs)
        data_loader = (train_data_loader, test_data_loader)
    else:
        data_loader = test_data_loader

    # instantiate trainer
    trainer = Trainer(config, data_loader)

    # either train
    if config.is_train:
        save_config(config)
        trainer.train()

    # or load a pretrained model and test
    else:
        trainer.test()
Beispiel #7
0
def main(config):

    # ensure reproducibility
    torch.manual_seed(config.random_seed)

    scores = []
    # instantiate data loaders
    count = 0
    times = []

    for i in range(1, 4):
        start = time.time()
        count = i
        train_data, test_data = load_dataset(config.data_dir, str(count))
        # instantiate data loaders
        data_loader = get_train_loader(train_data, config.batch_size,
                                       config.random_seed, config.shuffle)

        test_loader = get_test_loader(test_data, config.batch_size)

        # instantiate trainer
        trainer = Trainer(config, count, data_loader, test_loader)

        trainer.train()
        result = trainer.test()

        scores.append(result)
        elapsed = time.time() - start
        times.append(elapsed)

    scores = np.array(scores)
    times = np.array(times)
    print('aver time', times.mean())
    # print('avg\tacc\tf1\tprec\trec\tauc')
    print('acc:',
          scores.mean(axis=0)[0], '\nf1',
          scores.mean(axis=0)[1], '\nprec',
          scores.mean(axis=0)[2], '\nrec',
          scores.mean(axis=0)[3])
Beispiel #8
0
def init_loaders(train_batch_size, test_batch_size):
    import json
    import h5py
    from gensim.models.keyedvectors import KeyedVectors

    with open('./data/datainfo-v1.1.json', 'r') as f:
        data = json.load(f)

    f = h5py.File('./data/resnet_features.hdf5', 'r')
    img_features = f['resnet152_features'][()]
    f.close()

    wordvectors_file_vec = './data/fasttext-sbwc.vec'
    # count = 1000
    wordvectors = KeyedVectors.load_word2vec_format(
        wordvectors_file_vec)  #, limit=count)

    train_loader = get_train_loader(wordvectors, data, img_features,
                                    train_batch_size)
    test_loader = get_test_loader(wordvectors, data, img_features,
                                  test_batch_size)

    return train_loader, test_loader
Beispiel #9
0
def main(args):
    # for fast training.
    torch.backends.cudnn.benchmark = True

    setup_seed(args.seed)

    # create directories if not exist.
    create_folder(args.save_root_dir, args.version, args.model_save_path)
    create_folder(args.save_root_dir, args.version, args.sample_path)
    create_folder(args.save_root_dir, args.version, args.log_path)
    create_folder(args.save_root_dir, args.version, args.val_result_path)
    create_folder(args.save_root_dir, args.version, args.test_result_path)

    if args.mode == 'train':
        loaders = Munch(ref=get_train_loader(root=args.train_img_dir,
                                             img_size=args.image_size,
                                             resize_size=args.resize_size,
                                             batch_size=args.train_batch_size,
                                             shuffle=args.shuffle,
                                             num_workers=args.num_workers,
                                             drop_last=args.drop_last),
                        val=get_test_loader(root=args.val_img_dir,
                                            batch_size=args.val_batch_size,
                                            shuffle=True,
                                            num_workers=args.num_workers))
        trainer = Trainer(loaders, args)
        trainer.train()
    elif args.mode == 'test':
        loaders = Munch(tes=get_test_loader(root=args.test_img_dir,
                                            img_size=args.test_img_size,
                                            batch_size=args.val_batch_size,
                                            shuffle=True,
                                            num_workers=args.num_workers))
        tester = Tester(loaders, args)
        tester.test()
    else:
        raise NotImplementedError('Mode [{}] is not found'.format(args.mode))
Beispiel #10
0
args.cuda = not args.no_cuda and torch.cuda.is_available()

args.span_range_height = args.span_range_width = args.span_range
args.grid_height = args.grid_width = args.grid_size
args.image_height = args.image_width = 28

torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)

model = mnist_model.get_model(args)
if args.cuda:
    model.cuda()

optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
train_loader = data_loader.get_train_loader(args)
test_loader = data_loader.get_test_loader(args)


def train(epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        if args.cuda:
            data, target = data.cuda(), target.cuda()
        # print(data.shape)
        data, target = Variable(data), Variable(target)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
Beispiel #11
0
            k: v
            for k, v in pretrained_state.items()
            if k in model_state and v.size() == model_state[k].size()
        }
        model_state.update(pretrained_state)
        net.load_state_dict(model_state)

    if cuda:
        net.cuda()

    criterion = net_sphere.AngleLoss()
    # train_dir = '/Users/josephrobinson/Downloads/'
    train_dir = args.data_dir + '/train/'
    val_dir = args.data_dir + '/val/'
    # 'train'
    train_loader = get_train_loader(train_dir, batch_size=args.batch_size)

    print('start: time={}'.format(dt()))
    # optimizer = optim.Adam(net.parameters(), lr=args.lr)
    best_acc = 0
    if not args.train:
        print('Begin train')
        for epoch in range(args.n_epochs):
            if epoch in [0, 2, 4, 6, 8]:
                if epoch != 0:
                    args.lr *= 0.1  # hardcoded for now (n_epochs = 3)
                params = [x for x in net.parameters() if x.requires_grad]
                optimizer = optim.SGD(params,
                                      lr=args.lr,
                                      momentum=0.9,
                                      weight_decay=5e-4)
Beispiel #12
0
with open("data.pickle", "rb") as f:
    dataset = pickle.load(f)

# data cut off and shuffle
#data_in,data_out = bf.data_cutoff(dataset,output_size,cut_off=70)
#data_loader.update_dataset(dataset,data_in,data_out)
# spilt dataset
train_dataset, test_dataset = data_loader.spilt_train_test_dataset(dataset)
#train_dataset,test_dataset = data_loader.advanced_spilt_train_test_dataset(dataset,output_size)
# balance train part
#train_in,train_out = bf.balance_avg(train_dataset,output_size)
#print(f"Before balance:\n{bf.view_count(train_dataset,output_size)}")
#data_loader.update_dataset(train_dataset,train_in,train_out)
#print(f"After balance:\n{bf.view_count(train_dataset,output_size)}")
validate_loader = data_loader.get_validate_loader(test_dataset, 32)
train_loader = data_loader.get_train_loader(train_dataset, 32)


def validate_one_epoch(device, model, criterion, validate_loader):
    model.eval()
    num_validate = len(validate_loader.sampler.indices)
    if num_validate == 0:
        print("number of data is 0")
        return -1, -1
    val_loss = 0.
    num_correct = 0
    for b, (batch_input, batch_label) in enumerate(validate_loader):
        for i in range(len(batch_input)):
            # read data
            data_input, data_label = batch_input[i], batch_label[i]
            print(data_input)
def main_one(csnum):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # prepare neural network
    validate_size = 0.1
    num_bands = 100
    tmp = []
    for hs in range(48):
        tmp.append(hs)
    hs_indices = tmp
    #hs_indices = [0, 1, 3, 4, 5, 7, 8, 13, 31, 34, 37]  # 11 hs points in Brillouin zone out of 40

    cs_sizes = crystalsystem.crystalsystem_sizes()
    output_size = cs_sizes[csnum - 1] - cs_sizes[csnum -
                                                 2] + 1 if csnum > 1 else 3
    """
    model = torch.nn.Sequential(
        torch.nn.LeakyReLU(),
        torch.nn.Linear(len(hs_indices)*num_bands, 300),
        torch.nn.LeakyReLU(),
        torch.nn.Linear(300, 100),
        torch.nn.LeakyReLU(),
        torch.nn.Linear(100, output_size),
        torch.nn.LeakyReLU(),
    )
    """

    model = torch.nn.Sequential(
        #torch.nn.LeakyReLU(),
        torch.nn.Linear(len(hs_indices) * num_bands, 128),
        torch.nn.LeakyReLU(),
        torch.nn.Linear(128, 64),
        torch.nn.LeakyReLU(),
        torch.nn.Linear(64, output_size),
        torch.nn.LeakyReLU(),
    )

    model = model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=1,
                                                gamma=0.75)
    criterion = torch.nn.CrossEntropyLoss()

    with open("data.pickle", "rb") as f:
        dataset = pickle.load(f)

    # data cut off and shuffle
    #data_in,data_out = bf.data_cutoff(dataset,output_size,cut_off=0)
    #data_loader.update_dataset(dataset,data_in,data_out)
    # spilt dataset
    #train_dataset,test_dataset = data_loader.spilt_train_test_dataset(dataset)
    #train_dataset,test_dataset = data_loader.advanced_spilt_train_test_dataset(dataset,output_size)
    # balance train part
    train_in, train_out = bf.balance_avg(dataset, output_size)
    #print(f"Before balance:\n{bf.view_count(train_dataset,output_size)}")
    data_loader.update_dataset(dataset, train_in, train_out)
    #print(f"After balance:\n{bf.view_count(train_dataset,output_size)}")
    train_dataset, test_dataset = data_loader.advanced_spilt_train_test_dataset(
        dataset, output_size)
    validate_loader = data_loader.get_validate_loader(test_dataset, 32)
    train_loader = data_loader.get_train_loader(train_dataset, 32)

    # train
    ech, loss, ech_a, acc = function_training.validate_train_loop(
        device,
        model,
        optimizer,
        scheduler,
        criterion,
        validate_loader,
        train_loader,
        num_epoch=30,
        num_epoch_per_validate=1,
        state_dict_path=f"state_dicts/state_dict_cs2sg_{csnum}")

    plot_loss(ech, loss, ech_a, acc)
    plot_dist(dataset, output_size, title="Cut-off Raw sample")
    plot_dist(train_dataset, output_size, title="Train sample")
    plot_dist(test_dataset, output_size, title="Test sample")
    """
Beispiel #14
0
def main(**kwargs):
    global args
    lowest_error1 = 100

    for arg, v in kwargs.items():
        args.__setattr__(arg, v)

    program_start_time = time.time()
    instanceName = "Classification_Accuracy"
    folder_path = os.path.dirname(
        os.path.abspath(__file__)) + os.sep + args.model

    timestamp = datetime.datetime.now()
    ts_str = timestamp.strftime('%Y-%m-%d-%H-%M-%S')
    path = folder_path + os.sep + instanceName + os.sep + args.model_name + os.sep + ts_str

    tensorboard_folder = path + os.sep + "Graph"
    os.makedirs(path)
    args.savedir = path

    writer = SummaryWriter(tensorboard_folder)

    global logFile
    logFile = path + os.sep + "log.txt"
    args.filename = logFile
    global num_outputs

    print(args)
    global device
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    if args.data == "cifar100" or args.data == "CIFAR100":
        fig_title_str = " on CIFAR-100"

    elif args.data == "cifar10" or args.data == "CIFAR10":
        fig_title_str = " on CIFAR-10"
    elif args.data == "tiny_imagenet":
        fig_title_str = " on tiny_imagenet"
    else:
        LOG(
            "ERROR =============================dataset should be CIFAR10 or CIFAR100",
            logFile)
        NotImplementedError

    captionStrDict = {
        "fig_title": fig_title_str,
        "x_label": "epoch",
        'elastic_final_layer_label': "Final_Layer_Output_Classifier",
        "elastic_intermediate_layer_label": "Intermediate_Layer_Classifier_"
    }

    # save input parameters into log file

    LOG("program start time: " + ts_str + "\n", logFile)

    # if args.layers_weight_change == 1:
    #     LOG("weights for intermediate layers: 1/(34-Depth), giving different weights for different intermediate layers output, using the formula weigh = 1/(34-Depth)", logFile)
    # elif args.layers_weight_change == 0:
    #     LOG("weights for intermediate layers: 1, giving same weights for different intermediate layers output as  1", logFile)
    # else:
    #     print("Parameter --layers_weight_change, Error")
    #     sys.exit()

    if args.model == "Elastic_ResNet18" or args.model == "Elastic_ResNet34" or args.model == "Elastic_ResNet50" or args.model == "Elastic_ResNet101" or args.model == "Elastic_ResNet152":
        model = Elastic_ResNet(args, logFile)

    elif args.model == "Elastic_InceptionV3":
        args.target_size = (
            299, 299, 3
        )  # since pytorch inceptionv3 pretrained accepts image size (299, 299, 3) instead of (224, 224, 3)
        model = Elastic_InceptionV3(args, logFile)

    elif args.model == "Elastic_MobileNet":
        model = Elastic_MobileNet(args, logFile)

    elif args.model == "Elastic_VGG16":
        model = Elastic_VGG16_bn(args, logFile)

    elif args.model == "Elastic_SqueezeNet":
        model = Elastic_SqueezeNet(args, logFile)

    elif args.model == "Elastic_DenseNet121" or args.model == "Elastic_DenseNet169" or args.model == "Elastic_DenseNet201":
        model = Elastic_DenseNet(args, logFile)

    else:
        LOG(
            "--model parameter should be in ResNet, InceptionV3, MobileNet, VGG16, SqueezeNet, DenseNet",
            logFile)
        exit()

    num_outputs = model.num_outputs
    # num_outputs = 1

    LOG("num_outputs: " + str(num_outputs), logFile)
    LOG("successfully create model: " + args.model, logFile)

    args_str = str(args)
    LOG(args_str, logFile)

    model = model.to(device)
    if device == 'cuda':
        model = torch.nn.DataParallel(model).cuda()
        cudnn.benchmark = True

    # TUT thinkstation data folder path
    data_folder = "/media/yi/e7036176-287c-4b18-9609-9811b8e33769/tiny_imagenet/tiny-imagenet-200"

    # narvi data folder path
    # data_folder = "/home/zhouy/data/tiny-imagenet-200"

    # XPS 15 laptop data folder path
    # data_folder = "D:\Elastic\data"
    # args.batch_size = 1

    summary(model, (3, 224, 224))

    if args.data == "tiny_imagenet":
        train_loader, test_loader = tiny_image_data_loader(data_folder, args)
    else:
        train_loader = get_train_loader(args.data,
                                        data_dir=data_folder,
                                        batch_size=args.batch_size,
                                        augment=False,
                                        target_size=args.target_size,
                                        random_seed=20180614,
                                        valid_size=0.2,
                                        shuffle=True,
                                        show_sample=False,
                                        num_workers=4,
                                        pin_memory=True,
                                        debug=args.debug)

        test_loader = get_test_loader(args.data,
                                      data_dir=data_folder,
                                      batch_size=args.batch_size,
                                      shuffle=True,
                                      target_size=args.target_size,
                                      num_workers=4,
                                      pin_memory=True,
                                      debug=args.debug)

    criterion = nn.CrossEntropyLoss().cuda()

    if args.data != "tiny_imagenet":
        pretrain_optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad,
                                                    model.parameters()),
                                             args.pretrain_learning_rate,
                                             momentum=args.momentum,
                                             weight_decay=args.weight_decay)

        LOG("==> Pretraining for **1** epoches    \n", logFile)
        for pretrain_epoch in range(0, 1):
            accs, losses, lr = train(train_loader, model, criterion,
                                     pretrain_optimizer, pretrain_epoch)
            epoch_result = "    pretrain epoch: " + str(
                pretrain_epoch) + ", pretrain error: " + str(
                    accs) + ", pretrain loss: " + str(
                        losses) + ", pretrain learning rate: " + str(
                            lr) + ", pretrain total train sum loss: " + str(
                                sum(losses))
            LOG(epoch_result, logFile)

        summary(model, (3, 224, 224))

    LOG("==> Full training    \n", logFile)
    for param in model.parameters():
        param.requires_grad = True

    optimizers = []
    childs = []
    k = 0
    for child in model.parameters():
        childs.append(child)
        k += 1

    # childs_params = [childs[:9], childs[:15], childs[:21], childs[:27],
    #                     childs[:33], childs[:39], childs[:45], childs[:51],
    #                     childs[:57], childs[:63], childs[:69], childs[:75], childs]
    childs_params = [childs[:25], childs[:43], childs[:61], childs]

    for i in range(num_outputs):
        optimizer = torch.optim.SGD(childs_params[i],
                                    args.learning_rate,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)
        optimizers.append(optimizer)
    # optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=args.weight_decay)
    # summary(model, (3,224,224))

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           mode='min',
                                                           threshold=1e-4,
                                                           patience=10)

    # implement early stop by own
    EarlyStopping_epoch_count = 0

    epochs_train_accs = []
    epochs_train_top5_accs = []
    epochs_train_losses = []
    epochs_test_accs = []
    epochs_test_losses = []
    epochs_lr = []
    epochs_test_top5_accs = []

    for epoch in range(0, args.epochs):

        epoch_str = "==================================== epoch %d ==============================" % epoch
        LOG(epoch_str, logFile)
        # Train for one epoch
        accs, losses, lr, accs_top5 = train(train_loader, model, criterion,
                                            optimizers, epoch)
        epochs_train_accs.append(accs)
        epochs_train_losses.append(losses)
        epochs_lr.append(lr)
        epochs_train_top5_accs.append(accs_top5)

        writer.add_scalar(tensorboard_folder + os.sep + "data" + os.sep + 'lr',
                          lr, epoch)
        for i, a, l, k in zip(range(len(accs)), accs, losses, accs_top5):
            writer.add_scalar(
                tensorboard_folder + os.sep + "data" + os.sep +
                'train_error_' + str(i), a, epoch)
            writer.add_scalar(
                tensorboard_folder + os.sep + "data" + os.sep +
                'train_losses_' + str(i), l, epoch)
            writer.add_scalar(
                tensorboard_folder + os.sep + "data" + os.sep +
                'train_top5_error_' + str(i), k, epoch)

        epoch_result = "\ntrain error: " + str(accs) + "top 5 error: " + str(
            accs_top5) + ", \nloss: " + str(
                losses) + ", \nlearning rate " + str(
                    lr) + ", \ntotal train sum loss " + str(sum(losses))
        LOG(epoch_result, logFile)

        if num_outputs > 1:
            writer.add_scalar(
                tensorboard_folder + os.sep + "data" + os.sep +
                'train_total_sum_losses', sum(losses), epoch)
            losses.append(sum(losses))  # add the total sum loss
            LOG("train_total_sum_losses: " + str(sum(losses)), logFile)

        # run on test dataset
        LOG("==> test \n", logFile)
        test_accs, test_losses, test_top5_accs = validate(
            test_loader, model, criterion)

        epochs_test_accs.append(test_accs)
        epochs_test_losses.append(test_losses)
        epochs_test_top5_accs.append(test_top5_accs)

        for i, a, l, k in zip(range(len(test_accs)), test_accs, test_losses,
                              test_top5_accs):
            writer.add_scalar(
                tensorboard_folder + os.sep + "data" + os.sep + 'test_error_' +
                str(i), a, epoch)
            writer.add_scalar(
                tensorboard_folder + os.sep + "data" + os.sep +
                'test_losses_' + str(i), l, epoch)
            writer.add_scalar(
                tensorboard_folder + os.sep + "data" + os.sep +
                'test_top5_losses_' + str(i), k, epoch)

        test_result_str = "==> Test epoch: \nfinal output classifier error: " + str(
            test_accs
        ) + "test top 5 error: " + str(test_top5_accs) + ", \ntest_loss" + str(
            test_losses) + ", \ntotal test sum loss " + str(sum(test_losses))
        LOG(test_result_str, logFile)

        total_loss = sum(test_losses)

        if num_outputs > 1:
            writer.add_scalar(
                tensorboard_folder + os.sep + "data" + os.sep +
                'test_total_sum_losses', total_loss, epoch)
            test_losses.append(total_loss)  # add the total sum loss
            LOG("test_total_sum_losses: " + str(total_loss), logFile)

        log_stats(path, accs, losses, lr, test_accs, test_losses, accs_top5,
                  test_top5_accs)

        # Remember best prec@1 and save checkpoint
        is_best = test_accs[
            -1] < lowest_error1  #error not accuracy, but i don't want to change variable names

        if is_best:

            lowest_error1 = test_accs[-1]  #但是有个问题,有时是倒数第二个CLF取得更好的结果

            save_checkpoint(
                {
                    'epoch': epoch,
                    'model': args.model_name,
                    'state_dict': model.state_dict(),
                    'best_prec1': lowest_error1,
                    'optimizer': optimizer.state_dict(),
                }, args)

        # apply early_stop with monitoring val_loss
        # EarlyStopping(patience=15, score_function=score_function(val_loss), trainer=model)

        scheduler.step(total_loss)  # adjust learning rate with test_loss

        if epoch == 0:
            prev_epoch_loss = total_loss  # use all intemediate classifiers sum loss instead of only one classifier loss
        else:
            if total_loss >= prev_epoch_loss:  # means this current epoch doesn't reduce test losses
                EarlyStopping_epoch_count += 1
        if EarlyStopping_epoch_count > 20:
            LOG(
                "No improving test_loss for more than 10 epochs, stop running model",
                logFile)
            break

    # n_flops, n_params = measure_model(model, IMAGE_SIZE, IMAGE_SIZE)
    # FLOPS_result = 'Finished training! FLOPs: %.2fM, Params: %.2fM' % (n_flops / 1e6, n_params / 1e6)
    # LOG(FLOPS_result, logFile)
    # print(FLOPS_result)
    writer.close()

    end_timestamp = datetime.datetime.now()
    end_ts_str = end_timestamp.strftime('%Y-%m-%d-%H-%M-%S')
    LOG("program end time: " + end_ts_str + "\n", logFile)

    # here plot figures
    plot_figs(epochs_train_accs, epochs_train_losses, epochs_test_accs,
              epochs_test_losses, args, captionStrDict)
    LOG("============Finish============", logFile)
Beispiel #15
0
    def __init__(self, args, model, optimizer, lr_policy):
        self.args = args
        self.lr_policy = lr_policy
        self.iter_wise = self.lr_policy.iteration_wise

        # for loggin the training
        val_head = [
            "iter" if self.iter_wise else "epoch", "mean_pixel_accuracy"
        ]
        for i in range(self.args.class_num):
            val_head.append("mean_precision_class_{}".format(i))
        for i in range(self.args.class_num):
            val_head.append("mean_IoU_class_{}".format(i))
        self.tlog = self.get_train_logger(
            {
                "train": [
                    "iter" if self.iter_wise else "epoch",
                    "batch_mean_total_loss"
                ],
                "val":
                val_head
            },
            save_dir=self.args.save_dir,
            save_name=self.args.save_name,
            arguments=self.get_argparse_arguments(self.args),
            use_http_server=self.args.use_http_server,
            use_msg_server=self.args.use_msg_server,
            notificate=False,
            visualize_fetch_stride=self.args.viz_fetch_stride,
            http_port=self.args.http_server_port,
            msg_port=self.args.msg_server_port)

        # paths
        self.save_dir = self.tlog.log_save_path
        self.model_param_dir = self.tlog.mkdir("model_param")

        if torch.cuda.is_available() and not self.args.nogpu:
            self.map_device = torch.device('cuda:{}'.format(
                self.args.gpu_device_num))
        else:
            self.map_device = torch.device('cpu')

        self.model = model
        if torch.cuda.is_available() and not args.nogpu:
            self.model = self.model.to(self.map_device)

        self.optimizer = optimizer

        self.train_loader = data_loader.get_train_loader(
            self.args,
            [(0.5, 0.5, 0.5),
             (0.5, 0.5, 0.5)])  #[(0.485, 0.456, 0.406),(0.229, 0.224, 0.225)])
        self.val_loader = data_loader.get_val_loader(self.args,
                                                     [(0.5, 0.5, 0.5),
                                                      (0.5, 0.5, 0.5)])

        self.cmap = self._gen_cmap()

        if self.args.show_parameters:
            for idx, m in enumerate(model.modules()):
                print(idx, '->', m)
            print(args)

        print("\nsaving at {}\n".format(self.save_dir))
#Define a split for train/valid
valid_size = 0.2
batch_size = 10
num_train = len(cities)
indices = list(range(num_train))
split = int(np.floor(valid_size * num_train))

train_idx, valid_idx = indices[split:], indices[:split]

train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

#Load data generators
train_data_loader = get_train_loader(cities=cities,
                                     labels=labels,
                                     batch_size=batch_size,
                                     shuffle=False,
                                     collate_fn=collate_fn,
                                     sampler=train_sampler)
valid_data_loader = get_train_loader(cities=cities,
                                     labels=labels,
                                     batch_size=batch_size,
                                     shuffle=False,
                                     collate_fn=collate_fn,
                                     sampler=valid_sampler)

#Initialize the model to train
model = LSTMClassifier(27, 10, 14)

# Loss and Optimizer
criterion = nn.NLLLoss()
learning_rate = 0.8
Beispiel #17
0
def train(opt):
    # Load models
    print('----------- Network Initialization --------------')
    teacher = select_model(dataset=opt.data_name,
                           model_name=opt.t_name,
                           pretrained=True,
                           pretrained_models_path=opt.t_model,
                           n_classes=opt.num_class).to(opt.device)
    print('finished teacher model init...')

    student = select_model(dataset=opt.data_name,
                           model_name=opt.s_name,
                           pretrained=True,
                           pretrained_models_path=opt.s_model,
                           n_classes=opt.num_class).to(opt.device)
    print('finished student model init...')
    teacher.eval()

    nets = {'snet': student, 'tnet': teacher}

    for param in teacher.parameters():
        param.requires_grad = False

    # initialize optimizer
    optimizer = torch.optim.SGD(student.parameters(),
                                lr=opt.lr,
                                momentum=opt.momentum,
                                weight_decay=opt.weight_decay,
                                nesterov=True)

    # define loss functions
    if opt.cuda:
        criterionCls = nn.CrossEntropyLoss().cuda()
        criterionAT = AT(opt.p)
    else:
        criterionCls = nn.CrossEntropyLoss()
        criterionAT = AT(opt.p)

    print('----------- DATA Initialization --------------')
    train_loader = get_train_loader(opt)
    test_clean_loader, test_bad_loader = get_test_loader(opt)

    print('----------- Train Initialization --------------')
    for epoch in range(0, opt.epochs):

        adjust_learning_rate(optimizer, epoch, opt.lr)

        # train every epoch
        criterions = {'criterionCls': criterionCls, 'criterionAT': criterionAT}

        if epoch == 0:
            # before training test firstly
            test(opt, test_clean_loader, test_bad_loader, nets, criterions,
                 epoch)

        train_step(opt, train_loader, nets, optimizer, criterions, epoch + 1)

        # evaluate on testing set
        print('testing the models......')
        acc_clean, acc_bad = test(opt, test_clean_loader, test_bad_loader,
                                  nets, criterions, epoch + 1)

        # remember best precision and save checkpoint
        # save_root = opt.checkpoint_root + '/' + opt.s_name
        if opt.save:
            is_best = acc_clean[0] > opt.threshold_clean
            opt.threshold_clean = min(acc_bad[0], opt.threshold_clean)

            best_clean_acc = acc_clean[0]
            best_bad_acc = acc_bad[0]

            save_checkpoint(
                {
                    'epoch': epoch,
                    'state_dict': student.state_dict(),
                    'best_clean_acc': best_clean_acc,
                    'best_bad_acc': best_bad_acc,
                    'optimizer': optimizer.state_dict(),
                }, is_best, opt.checkpoint_root, opt.s_name)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     mode='min',
                                                     factor=0.1,
                                                     patience=20,
                                                     verbose=True)
    print('start: time={}'.format(dt()))

    # optimizer = optim.Adam(net.parameters(), lr=args.lr)
    best_acc = 0
    if not args.train:
        print('Begin train')
        for epoch in range(args.n_epochs):
            train_set, train_loader = get_train_loader(
                image_size=args.img_size,
                batch_size=args.train_batch_size,
                train_steps=args.train_steps,
                val_steps=args.val_steps,
                one_to_zero_train=args.one_to_zero_train,
                one_to_zero_val=args.one_to_zero_val)
            val_loader = get_val_loader(
                image_size=args.img_size,
                batch_size=args.val_batch_size,
                train_steps=args.train_steps,
                val_steps=args.val_steps,
                one_to_zero_train=args.one_to_zero_train,
                one_to_zero_val=args.one_to_zero_val)
            print("epoch:", epoch)

            #    if epoch in args.change_lr_for_epochs:
            #        args.lr *= 0.1
            #        optimizer = optim.SGD(param_groups, lr=args.lr, momentum=0.9, weight_decay=5e-4)