Example #1
0
def transform_args(args):
    # shared transform
    config_args = misc.load_config_yaml(args.config_path)
    config_args["training"]["output_folder"] = misc.get_output_folder_from_config(
        saveplace=args.saveplace, config_path=args.config_path
    )
    config.cfg.DEBUG = args.debug
    os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # recover epoch and checkpoints
    ckpt_path = None
    if os.path.exists(config_args["training"]["output_folder"]):
        if not args.from_scratch:
            ckpt_path = misc.get_previous_ckpt(config_args["training"]["output_folder"])
        else:
            rmtree(config_args["training"]["output_folder"])
    if ckpt_path is not None:
        LOGGER.warning(f"Last checkpoint: {ckpt_path}")
        checkpoint = torch.load(ckpt_path, map_location=device)
        start_epoch = checkpoint["epoch"] + 1
    else:
        os.mkdir(config_args["training"]["output_folder"])
        LOGGER.info("Starting from scratch")
        checkpoint = None
        start_epoch = 1
    copyfile(
        args.config_path,
        os.path.join(config_args["training"]["output_folder"], f"config_{start_epoch}.yaml")
    )

    # seed
    config.cfg.RANDOM.SEED = args.seed - 1 + start_epoch
    misc.set_determ(config.cfg.RANDOM.SEED)
    return config_args, start_epoch, device, checkpoint
Example #2
0
def transform_args(args):
    # shared transform
    config_args = misc.load_config_yaml(args.config_path)
    config_args["training"]["output_folder"] = output_folder = misc.get_output_folder_from_config(
        saveplace=args.saveplace, config_path=args.config_path
    )
    config.cfg.DEBUG = args.debug
    os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # recover epoch and checkpoints
    checkpoint = None
    if os.path.exists(output_folder):
        if not args.from_scratch:
            checkpoint = misc.get_previous_ckpt(output_folder)
        else:
            rmtree(output_folder)
            os.mkdir(output_folder)
    if not os.path.exists(output_folder):
        os.mkdir(output_folder)
    copyfile(
        args.config_path,
        os.path.join(output_folder, "config.yaml")
    )

    # seed
    config.cfg.RANDOM.SEED = args.seed
    misc.set_determ(config.cfg.RANDOM.SEED)
    return config_args, device, checkpoint
Example #3
0
def transform_args(args):
    # shared transform
    config_args = misc.load_config_yaml(args.config_path)

    config.cfg.DEBUG = args.debug
    os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # recover epoch and checkpoint
    if args.checkpoint == "best" or misc.is_int(args.checkpoint):
        assert args.saveplace is not None
        checkpoint = misc.get_checkpoint(
            epoch=args.checkpoint,
            output_folder=misc.get_output_folder_from_config(
                saveplace=args.saveplace, config_path=args.config_path))
    elif os.path.exists(args.checkpoint):
        checkpoint = args.checkpoint
    else:
        raise ValueError(
            f"args.checkpoint: {args.checkpoint} should be either 'best', an int or a path to a previously trained checkpoint"
        )

    # seed
    misc.set_determ(config.cfg.RANDOM.SEED)
    return config_args, device, checkpoint
Example #4
0
def train(config_args, device, checkpoint, dataplace):

    # Load dataset
    LOGGER.info(f"Loading dataset {config_args['data']['dataset']}")
    dloader = get_loader(config_args, dataplace=dataplace)

    # Set learner
    learner = Learner(
        config_args=config_args,
        dloader=dloader,
        device=device,
    )

    # Resume existing model or from pretrained one
    if checkpoint is not None:
        LOGGER.warning(f"Load checkpoint: {checkpoint}")
        start_epoch = learner.load_checkpoint(
            checkpoint, include_optimizer=True, return_epoch=True) + 1
        config.cfg.RANDOM.SEED = config.cfg.RANDOM.SEED - 1 + start_epoch
        misc.set_determ(config.cfg.RANDOM.SEED)
    else:
        LOGGER.info("Starting from scratch")
        start_epoch = 1

    LOGGER.info(f"Saving logs in: {config_args['training']['output_folder']}")

    # Start training
    _config_name = os.path.split(
        os.path.splitext(config_args['training']['config_path'])[0])[-1]

    try:
        epoch = start_epoch
        for epoch in range(start_epoch, config_args["training"]["nb_epochs"] + 1):
            LOGGER.debug(f"Epoch: {epoch} for: {_config_name}")
            learner.dloader.traindatasetwrapper.set_ratio_epoch(
                ratioepoch=epoch / config_args["training"]["nb_epochs"]
            )
            learner.train(epoch)

    except KeyboardInterrupt:
        LOGGER.warning(f"KeyboardInterrupt for: {_config_name}")
        if not click.confirm("continue ?", abort=False):
            raise KeyboardInterrupt

    except Exception as exc:
        LOGGER.error(f"Exception for: {_config_name}")
        raise exc

    return epoch