def basic_callback_dict(identifier, save="val_loss"):
    callback_dict = defaultdict(list)

    path = Path(__file__).resolve().parent / (identifier + ".pt" if identifier
                                              else "best_model.pt")
    if save == "val_loss":
        ModelSaver(path=path).register(callback_dict)
    else:
        raise NotImplemented(f"Saving by {save} not implemented.")

    return callback_dict
Beispiel #2
0
def train(args):
    Arguments.save_args(args, args.args_path)
    train_loader, val_loader, _ = get_dataloaders(args)
    model = UNetVgg16(n_classes=args.n_classes).to(args.device)
    optimizer = get_optimizer(args.optimizer, model)
    lr_scheduler = LRScheduler(args.lr_scheduler, optimizer)
    criterion = get_loss_fn(args.loss_type, args.ignore_index).to(args.device)
    model_saver = ModelSaver(args.model_path)
    recorder = Recorder(['train_miou', 'train_acc', 'train_loss',
                         'val_miou', 'val_acc', 'val_loss'])
    for epoch in range(args.n_epochs):
        print(f"{args.experim_name} Epoch {epoch+1}:")
        train_loss, train_acc, train_miou, train_ious = train_epoch(
            model=model,
            dataloader=train_loader,
            n_classes=args.n_classes,
            optimizer=optimizer,
            lr_scheduler=lr_scheduler,
            criterion=criterion,
            device=args.device,
        )
        print(f"train | mIoU: {train_miou:.3f} | accuracy: {train_acc:.3f} | loss: {train_loss:.3f}")
        val_loss, val_scores = eval_epoch(
            model=model,
            dataloader=val_loader,
            n_classes=args.n_classes,
            criterion=criterion,
            device=args.device,
        )
        val_miou, val_ious, val_acc = val_scores['mIoU'], val_scores['IoUs'], val_scores['accuracy']
        print(f"valid | mIoU: {val_miou:.3f} | accuracy: {val_acc:.3f} | loss: {val_loss:.3f}")
        recorder.update([train_miou, train_acc, train_loss, val_miou, val_acc, val_loss])
        recorder.save(args.record_path)
        if args.metric.startswith("IoU"):
            metric = val_ious[int(args.metric.split('_')[1])]
        else: metric = val_miou
        model_saver.save_models(metric, epoch+1, model,
                                ious={'train': train_ious, 'val': val_ious})

    print(f"best model at epoch {model_saver.best_epoch} with miou {model_saver.best_score:.5f}")
Beispiel #3
0
def train(train_sets: tuple,
          test_sets: tuple,
          input_shape: tuple = (1, 128, 128, 1),
          model_version="1.0.0",
          epochs: int = 100,
          classes: int = 2,
          batch_size: int = 1,
          verbose=1,
          out_dir: str = "saved_models"):
    """
    The function to train the model.

    Parameters:
        train_sets (tuple): A tuple of np.array for train images and train labels.
        test_sets (tuple): A tuple of np.array for test images and test labels.
        input shape (tuple): Input shape of the model. It should be in the form of (1, ..., ...).
        model_version (str): The version of the model in d.d.d format.
        epochs (int): The number of epochs.
        classes (int): The number of classes.
        batch_size (int): The number of batch size.
        verbose (bool): Wether to show the progress of each epoch.
        out_dir (str): The output dir for saving the model in.
    """
    (x_train, y_train), (x_test, y_test) = train_sets, test_sets
    y_train = keras.utils.to_categorical(y_train, classes)
    y_test = keras.utils.to_categorical(y_test, classes)
    m = get_model(model_version)
    if not m:
        return
    model = m.build_model(input_shape)
    model.compile(loss=BinaryCrossentropy(),
                  optimizer=RMSprop(learning_rate=0.0001),
                  metrics=['accuracy'])
    saver = ModelSaver(out_dir)
    csv_logger = CSVLogger(
        "%s/%s/log.csv" %
        (out_dir, datetime.datetime.now().date().strftime("%Y_%m_%d")),
        append=True,
        separator=',')
    history = model.fit(x_train,
                        y_train,
                        batch_size=batch_size,
                        epochs=epochs,
                        verbose=verbose,
                        validation_data=(x_test, y_test),
                        callbacks=[saver, csv_logger])
    model.save("%s/%s/final.hd5" %
               (out_dir, datetime.datetime.now().date().strftime("%Y_%m_%d")))
    print("Model saved in %s as final.hd5" % out_dir)
    plot_results(history, epochs, out_dir)
Beispiel #4
0
    def __init__(self, weights=None, biases=None):
        self.weights = weights if weights else self.weights
        self.biases = biases if biases else self.biases
        self.datasets = get_datasets(heart_diseases, nr_inputs)
        self.label_data = get_labels(self.datasets)

        self.saver = ModelSaver(save_dir="saved_models/cnn/")

        logs_path = "tensorboard_data/cnn/"
        self.tensorboard_handler = TensorBoardHandler(logs_path)
        self.tensorboard_handler.add_histograms(self.weights)
        self.tensorboard_handler.add_histograms(self.biases)

        self.build()
Beispiel #5
0
def build_trainer(config, seed, args):

    monitoring_metrics = [
        'epoch', 'iteration', 'total_loss', 'latent_loss', 'seg_loss', 'NET',
        'ED', 'ET'
    ]

    logger = Logger(save_dir=config.save.save_dir,
                    config=config,
                    seed=seed,
                    name=config.save.study_name,
                    monitoring_metrics=monitoring_metrics)

    save_dir_path = logger.log_dir

    checkpoint_callback = ModelSaver(limit_num=10,
                                     monitor=None,
                                     filepath=os.path.join(
                                         save_dir_path,
                                         'ckpt-{epoch:04d}-{total_loss:.2f}'),
                                     save_top_k=-1)

    if config.run.resume_checkpoint:
        print('Training will resume from: {}'.format(
            config.run.resume_checkpoint))
        model = TumorSegmentation.load_from_checkpoint(
            config.run.resume_checkpoint,
            config=config,
            save_dir_path=save_dir_path,
        )
    else:
        model = TumorSegmentation(config, save_dir_path)

    trainer = pl.Trainer(gpus=config.run.visible_devices,
                         num_nodes=1,
                         max_epochs=config.run.n_epochs,
                         progress_bar_refresh_rate=1,
                         automatic_optimization=True,
                         distributed_backend=config.run.distributed_backend,
                         deterministic=True,
                         logger=logger,
                         sync_batchnorm=True,
                         checkpoint_callback=checkpoint_callback,
                         resume_from_checkpoint=config.run.resume_checkpoint,
                         limit_val_batches=10)

    return model, trainer
Beispiel #6
0
parser.add_argument('--unfreeze', type=str, metavar='UF', default='',
                    help='Provide an option for unfreezeing given layers')
parser.add_argument('--freeze', type=str, metavar='F', default='',
                    help='Provide an option for freezeing given layers')
parser.add_argument('--pretrain', action='store_true')
parser.add_argument('--fc-only', action='store_true')
parser.add_argument('--except-fc', action='store_true')
parser.add_argument('--load-best', action='store_true')
parser.add_argument('--load-last', action='store_true')
parser.add_argument('--continue-step', action='store_true')
parser.add_argument('--train-all', action='store_true', help='Train all layers')

args = parser.parse_args()
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
l2_dist = PairwiseDistance(2)
modelsaver = ModelSaver()


def save_if_best(state, acc):
    modelsaver.save_if_best(acc, state)


def main():
    init_log_just_created("log/valid.csv")
    init_log_just_created("log/train.csv")
    import pandas as pd
    valid = pd.read_csv('log/valid.csv')
    max_acc = valid['acc'].max()

    pretrain = args.pretrain
    fc_only = args.fc_only
Beispiel #7
0
            betas=(
                CONFIG.hyperparam.optimization.beta1,
                CONFIG.hyperparam.optimization.beta2,
            ),
            weight_decay=CONFIG.hyperparam.optimization.weight_decay,
        )
    else:
        raise NotImplementedError("only Adam implemented")
    #########################################################

    ################# evaluator, saver ######################
    print("loading evaluator and model saver...")
    evaluator = NLGEval(no_skipthoughts=True, no_glove=True)
    # evaluator = NLGEval(metrics_to_omit=["METEOR"])
    model_path = os.path.join(outdir, "best_score.ckpt")
    saver = ModelSaver(model_path, init_val=0)
    offset_ep = 1
    offset_ep = saver.load_ckpt(model, optimizer, device)
    if offset_ep > CONFIG.hyperparam.misc.max_epoch:
        raise RuntimeError(
            "trying to restart at epoch {} while max training is set to {} \
            epochs".format(offset_ep, CONFIG.hyperparam.misc.max_epoch))
    ########################################################

    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)

    if CONFIG.use_wandb:
        wandb.watch(model)

    ################### training loop #####################
        return x


model = NIMA()
model = model.to(device)

#########
# Train #
#########
parameters = [
    {"params": model.base_model.parameters()},
    {"params": model.head.parameters(), "lr": 3e-5},
]
optimizer = torch.optim.Adam(parameters, lr=3e-6)
scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.95)
saver = ModelSaver()


def train(model, train_loader, scheduler, optimizer):
    criterion = EMDLoss()  # r=2 for train
    logging.info("Train Phase, Epoch: {}".format(epoch))
    scheduler.step()
    emd_losses = AverageMeter()

    model.train()
    for batch_num, batch in enumerate(train_loader, 1):
        imgs, labels = batch
        imgs, labels = imgs.to(device).float(), labels.to(device).float()

        scores = model(imgs)
        emd_loss = criterion(scores, labels)
Beispiel #9
0
def run(opt):
    train_loader, test_loader = create_loaders(opt)

    # Initialize generator and discriminator
    generator = load_or_init_models(RetouchGenerator(opt.device, opt.pw_guide), opt)

    # Optimizers
    optimizer_G = torch.optim.Adam(generator.parameters(), lr=opt.lr, weight_decay=1e-8)

    # Losses
    # criterion_GAN = torch.nn.MSELoss()
    # criterion_pixelwise = torch.nn.L1Loss()

    # if opt.cuda:
    #     generator = generator.cuda()
    #     discriminator = discriminator.cuda()

    generator, criterion_pixelwise = to_variables((generator,torch.nn.MSELoss()), cuda=opt.cuda, device=opt.device)

    saverG = ModelSaver(f'{opt.checkpoint_dir}/saved_models/{opt.name}')
    train_writer = SummaryWriter(log_dir=os.path.join(opt.checkpoint_dir, 'train'))
    test_writer = SummaryWriter(log_dir=os.path.join(opt.checkpoint_dir, 'test'))

    for epoch in tqdm(range(opt.epoch, opt.n_epochs), desc='Training'):
        ####
        # Train
        ###
        avg_stats = defaultdict(float)
        for i, data in enumerate(train_loader):
            data = to_variables(data, cuda=opt.cuda, device=opt.device)
            y_hat, loss_G = trainG(generator, criterion_pixelwise, optimizer_G, data)
            update_stats(avg_stats, loss_G)

            # Print image to tensorboard
            if (epoch % opt.sample_interval == 0) and (i % 50 == 0):
                train_writer.add_image('RetouchNet', y_hat[0], epoch)
                train_writer.add_image('Edited', data[2][0], epoch)
                train_writer.add_image('Original', data[0][0], epoch)

        # Log Progress
        str_out = '[train] {}/{} '.format(epoch, opt.n_epochs)
        for k, v in avg_stats.items():
            avg = v / len(train_loader)
            train_writer.add_scalar(k, avg, epoch)
            str_out += '{}: {:.6f}  '.format(k, avg)
        print(str_out)

        ####
        # Test
        ###
        avg_stats = defaultdict(float)
        images = None
        with torch.no_grad():
            for i, data in enumerate(test_loader):
                data = to_variables(data, cuda=opt.cuda, device=opt.device, test=True)
                images, losses = test(generator, criterion_pixelwise, data)
                update_stats(avg_stats, losses)

                # Print image to tensorboard
                if (epoch % opt.sample_interval == 0) and (i % 5 == 0):
                    test_writer.add_image('RetouchNet', images[0], epoch)
                    test_writer.add_image('Edited', data[2][0], epoch)
                    test_writer.add_image('Original', data[0][0], epoch)

        # Log Progress
        str_out = '[test] {}/{} '.format(epoch, opt.n_epochs)
        for k, v in avg_stats.items():
            avg = v / len(test_loader)
            test_writer.add_scalar(k, avg, epoch)
            str_out += '{}: {:.6f}  '.format(k, avg)
        print(str_out)

        # If at sample interval save image
        # if epoch % opt.sample_interval == 0:
        #     x_hr, x_lr, y_hr, y_lr = data
        #     test_writer.add_image('RetouchNet', images[0], epoch)
        #     test_writer.add_image('GroundTruth', y_hr[0], epoch)
        #     test_writer.add_image('raw', x_hr[0], epoch)

        if epoch % opt.checkpoint_interval == 0:
            # Save model checkpoints
            saverG.save_if_best(generator, loss_G['loss_G'])
Beispiel #10
0
    optimizer = optim.Adam(
        model.parameters(),
        lr=CONFIG.hyperparam.optimization.lr,
        betas=(
            CONFIG.hyperparam.optimization.beta1,
            CONFIG.hyperparam.optimization.beta2,
        ),
        weight_decay=CONFIG.hyperparam.optimization.weight_decay,
    )
    logging.info("done!")
    #########################################################

    ################# load model params ######################
    logging.info("loading model params...")
    model_path = os.path.join(outdir, "best_score.ckpt")
    saver = ModelSaver(model_path)
    offset_ep = saver.load_ckpt(model, optimizer, device)
    if offset_ep == 1:
        raise RuntimeError("aborting, no pretrained model")
    logging.info("done!")
    ##########################################################

    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)

    ################# make submission file ######################
    logging.info("making submission file...")
    submission = {
        "version": "VERSION 1.3",
        "external_data": {
            "used":
Beispiel #11
0
def main():
    logging = get_root_logger(args.log_path, mode='a')
    logging.info('Command Line Arguments:')
    for key, i in vars(args).items():
        logging.info(key + ' = ' + str(i))
    logging.info('End Command Line Arguments')

    batch_size = args.batch_size
    num_epochs = args.num_epochs

    resume_from = args.resume_from
    steps_per_checkpoint = args.steps_per_checkpoint

    gpu_id = args.gpu_id

    configure_process(args, gpu_id)
    if gpu_id > -1:
        logging.info('Using CUDA on GPU ' + str(gpu_id))
        args.cuda = True
    else:
        logging.info('Using CPU')
        args.cuda = False

    '''Load data'''
    logging.info('Data base dir ' + args.data_base_dir)
    logging.info('Loading vocab from ' + args.vocab_file)
    with open(args.vocab_file, "r", encoding='utf-8') as f:
        args.target_vocab_size = len(f.readlines()) + 4
    logging.info('Load training data from ' + args.data_path)
    train_data = UIDataset(args.data_base_dir, args.data_path, args.label_path, args.vocab_file)
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True,
                              num_workers=2, drop_last=True, collate_fn=collate_fn)

    logging.info('Load validation data from ' + args.val_data_path)
    val_data = UIDataset(args.data_base_dir, args.val_data_path, args.label_path, args.vocab_file)
    val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True,
                            num_workers=2, drop_last=True, collate_fn=collate_fn)

    # Build model
    logging.info('Building model')
    if args.resume_from:
        logging.info('Loading checkpoint from %s' % resume_from)
        checkpoint = torch.load(resume_from)
    else:
        checkpoint = None
        logging.info('Creating model with fresh parameters')
    model = build_model(args, gpu_id, checkpoint)
    logging.info(model)

    n_params, enc, dec = cal_parameters(model)
    logging.info('encoder: %d' % enc)
    logging.info('decoder: %d' % dec)
    logging.info('number of parameters: %d' % n_params)

    # Build optimizer
    optimier = torch.optim.SGD(model.parameters(), lr=args.learning_rate)
    optim = Optimizer(optimier)
    if checkpoint:
        optim.load_state_dict(checkpoint['optim'])
        optim.training_step += 1

    # Build model saver
    model_saver = ModelSaver(args.model_dir, model, optim)

    train(model, optim, model_saver, num_epochs, train_loader, val_loader, steps_per_checkpoint,
          args.valid_steps, args.lr_decay, args.start_decay_at, args.cuda)
Beispiel #12
0
obs_size = 2 * player_size + hand_size + hand_size + 32

num_actions = 4
hidden_1 = 256
hidden_2 = 64

# Actor maps state to actions' probabilities
actor = nn.Sequential(nn.Linear(obs_size, hidden_1), nn.ReLU(),
                      nn.Linear(hidden_1, hidden_2), nn.ReLU(),
                      nn.Linear(hidden_2, num_actions), nn.Softmax(dim=1))

optimizer = optim.Adam(actor.parameters(), lr=0.01)
discounting = 0.99999

saver = ModelSaver({
    'actor': actor,
    'optim_actor': optimizer
}, './models/Sedma/VPG-3')

# saver.load()

# saver.load(ignore_errors=True)


def card2tensor(c):
    suit, rank = c
    suit = suit2ix[suit]
    rank = rank2ix[rank]
    return torch.cat((one_hot(suit, len(suits)), one_hot(rank, len(ranks))))


def state2tensor(state):