Beispiel #1
0
def dqn_per_gridworld():
    hp = DictConfig({})

    hp.steps = 1000
    hp.batch_size = 500
    hp.replay_batch = 100
    hp.replay_size = 1000
    hp.delete_freq = 100 * (hp.batch_size + hp.replay_size)  # every 100 steps

    hp.env_record_freq = 100
    hp.env_record_duration = 25

    hp.max_steps = 50
    hp.grid_size = 4

    hp.lr = 1e-3
    hp.epsilon_exploration = 0.1
    hp.gamma_discount = 0.9

    model = (GenericConvModel(height=4,
                              width=4,
                              in_channels=4,
                              channels=[50],
                              out_size=4).float().to(device))

    train_dqn_per(
        GridWorldEnvWrapper,
        model,
        hp,
        project_name="SimpleGridWorld",
        run_name="dqn_per",
    )
Beispiel #2
0
    def test_dqn_vanilla(self, *_):
        from dqn.dqn import train_dqn

        hp = DictConfig({})

        hp.steps = 2
        hp.batch_size = 2
        hp.env_record_freq = 0
        hp.env_record_duration = 0

        hp.max_steps = 50
        hp.grid_size = 4

        hp.lr = 1e-3
        hp.epsilon_exploration = 0.1
        hp.gamma_discount = 0.9

        for case in env_cases:
            print(case["env"].__name__)

            model = GenericLinearModel(
                in_size=case["input"],
                units=[10],
                out_size=case["output"],
                flatten=case.get("flatten", False),
            )

            train_dqn(case["env"], model, hp)
Beispiel #3
0
def breakout_double_dqn():
    hp = DictConfig({})

    hp.steps = 2000
    hp.batch_size = 50

    hp.replay_batch = 50
    hp.replay_size = 1000

    hp.delete_freq = 50 * (hp.batch_size + hp.replay_size)  # every 100 steps
    hp.delete_percentage = 0.2

    hp.env_record_freq = 100
    hp.env_record_duration = 50

    hp.lr = 1e-3
    hp.gamma_discount = 0.9

    # hp.epsilon_exploration = 0.1
    hp.epsilon_flatten_step = 1500
    hp.epsilon_start = 1
    hp.epsilon_end = 0.1
    hp.epsilon_decay_function = decay_functions.LINEAR

    hp.target_model_sync_freq = 50

    model = GenericConvModel(42, 42, 3, [50, 50, 50], [100], 4)

    train_dqn_double(
        BreakoutEnvWrapper, model, hp, project_name="Breakout", run_name="double_dqn"
    )
Beispiel #4
0
def train_dqn_connect4():

    hp = DictConfig({})

    hp.steps = 20
    hp.batch_size = 2
    hp.max_steps = 10
    hp.lr = 1e-3
    hp.epsilon_exploration = 0.1
    hp.gamma_discount = 0.9

    model = GenericLinearModel(2 * 6 * 7, [10], 7, flatten=True).float().to(device)

    train_dqn(ConnectXEnvWrapper, model, hp, name="Connect4")
Beispiel #5
0
def breakout_dqn():

    hp = DictConfig({})

    hp.steps = 2000
    hp.batch_size = 32
    hp.env_record_freq = 500
    hp.env_record_duration = 100
    hp.max_steps = 1000
    hp.lr = 1e-3
    hp.epsilon_exploration = 0.1
    hp.gamma_discount = 0.9

    model = GenericLinearModel(42 * 42 * 3, [100, 100], 4, flatten=True)

    train_dqn(
        BreakoutEnvWrapper, model, hp, project_name="Breakout", run_name="vanilla_dqn"
    )
Beispiel #6
0
def dqn_double():
    hp = DictConfig({})

    hp.steps = 1000
    hp.batch_size = 500

    hp.replay_batch = 100
    hp.replay_size = 1000

    hp.delete_freq = 100 * (hp.batch_size + hp.replay_size)  # every 100 steps

    hp.env_record_freq = 100
    hp.env_record_duration = 25

    hp.max_steps = 50
    hp.grid_size = 4

    hp.lr = 1e-3
    hp.gamma_discount = 0.9

    # hp.epsilon_exploration = 0.1
    hp.epsilon_flatten_step = 700
    hp.epsilon_start = 1
    hp.epsilon_end = 0.001
    hp.epsilon_decay_function = decay_functions.LINEAR

    hp.target_model_sync_freq = 50

    model = (GenericConvModel(height=4,
                              width=4,
                              in_channels=4,
                              channels=[50],
                              out_size=4).float().to(device))

    train_dqn_double(
        GridWorldEnvWrapper,
        model,
        hp,
        project_name="SimpleGridWorld",
        run_name="dqn_target",
    )
Beispiel #7
0
def pg_gridworld():

    hp = DictConfig({})

    hp.episodes = 2
    hp.batch_size = 2

    hp.lr = 1e-3

    hp.gamma_discount_credits = 0.9
    hp.gamma_discount_returns = 0.9

    model = (GenericConvModel(height=4,
                              width=4,
                              in_channels=4,
                              channels=[50],
                              out_size=4).float().to(device))

    train_pg(GridWorldEnvWrapper,
             model,
             hp,
             project_name="SimpleGridWorld",
             run_name="pg")
Beispiel #8
0
    def test_pg(self, *_):
        from dqn.pg import train_pg

        hp = DictConfig({})

        hp.episodes = 2
        hp.batch_size = 2

        hp.lr = 1e-3

        hp.gamma_discount_returns = 0.9
        hp.gamma_discount_credits = 0.9

        for case in env_cases:
            print(case["env"].__name__)

            model = GenericLinearModel(
                in_size=case["input"],
                units=[10],
                out_size=case["output"],
                flatten=case.get("flatten", False),
            )

            train_pg(case["env"], model, hp)
    def __init__(self):
        super().__init__()
        self.env = FrozenLakeEnv(map_name="4x4", is_slippery=True)

    def get_legal_actions(self):
        return list(range(4))

    @staticmethod
    def get_state_batch(envs: Iterable) -> torch.Tensor:
        return to_onehot([env.state for env in envs], 16).float()


if __name__ == "__main__":

    hp = DictConfig({})

    hp.steps = 5000
    hp.batch_size = 500

    hp.max_steps = 200

    hp.lr = 1e-3
    hp.epsilon_exploration = 0.1
    hp.gamma_discount = 0.9

    hp.units = [10]

    model = GenericLinearModel(16, hp.units, 4).double().to(device)

    train_dqn(FrozenLakeEnvWrapper, model, hp, name="FrozenLake")
Beispiel #10
0
def get_trainer_from_cfg(cfg: DictConfig,
                         lightning_module,
                         stopper,
                         profiler: str = None) -> pl.Trainer:
    """Gets a PyTorch Lightning Trainer from a configuration

    Supports:
        automatic batch sizing
        Automatic learning rate finding (experimental)
        Callback instantiation
        Logging, both to disk and with TensorBoard

    Parameters
    ----------
    cfg : DictConfig
        configuration
    lightning_module : pl.LightningModule
        Lightning model to train
    stopper : callable
        Method to stop training. Must be passed so that figuring out batch size does not "count" towards stopping
    profiler : str, optional
        https://pytorch-lightning.readthedocs.io/en/latest/advanced/profiler.html, by default None

    Returns
    -------
    pl.Trainer
        https://pytorch-lightning.readthedocs.io/en/latest/common/trainer.html
    """
    steps_per_epoch = cfg.train.steps_per_epoch
    for split in ['train', 'val', 'test']:
        steps_per_epoch[split] = steps_per_epoch[split] if steps_per_epoch[
            split] is not None else 1.0

    # reload_dataloaders_every_epoch = True: a bit slower, but enables validation dataloader to get the new, automatic
    # learning rate schedule.

    if cfg.compute.batch_size == 'auto' or cfg.train.lr == 'auto':
        trainer = pl.Trainer(gpus=[cfg.compute.gpu_id],
                             precision=16 if cfg.compute.fp16 else 32,
                             limit_train_batches=1.0,
                             limit_val_batches=1.0,
                             limit_test_batches=1.0,
                             num_sanity_val_steps=0)
        # callbacks=[ExampleImagesCallback()])
        tmp_metrics = lightning_module.metrics
        tmp_workers = lightning_module.hparams.compute.num_workers
        # visualize_examples = lightning_module.visualize_examples

        if lightning_module.model_type != 'sequence':
            # there is a somewhat common error that VRAM will be maximized by the gpu-auto-tuner.
            # However, during training, we probabilistically sample colorspace transforms; in an "unlucky"
            # batch, perhaps all of the training samples are converted to HSV, hue and saturation changed, then changed
            # back. This is rare enough to not be encountered in "auto-tuning," so we'll get a train-time error. BAD!
            # so, we crank up the colorspace augmentation probability, then pick batch size, then change it back
            original_gpu_transforms = deepcopy(lightning_module.gpu_transforms)

            log.debug('orig: {}'.format(lightning_module.gpu_transforms))

            original_augs = cfg.augs
            new_augs = deepcopy(cfg.augs)
            new_augs.color_p = 1.0

            arch = lightning_module.hparams[lightning_module.model_type].arch
            mode = '2d'
            gpu_transforms = get_gpu_transforms(
                new_augs, '3d' if '3d' in arch.lower() else '2d')
            lightning_module.gpu_transforms = gpu_transforms
            log.debug('new: {}'.format(lightning_module.gpu_transforms))

        tuner = pl.tuner.tuning.Tuner(trainer)
        # hack for lightning to find the batch size
        cfg.batch_size = 2  # to start

        empty_metrics = EmptyMetrics()
        # don't store metrics when batch size finding
        lightning_module.metrics = empty_metrics
        # don't visualize our model inputs when batch size finding
        # lightning_module.visualize_examples = False
        should_viz = cfg.train.viz_examples
        lightning_module.hparams.train.viz_examples = 0
        # dramatically reduces RAM usage by this process
        lightning_module.hparams.compute.num_workers = min(tmp_workers, 1)
        if cfg.compute.batch_size == 'auto':
            max_trials = int(math.log2(cfg.compute.max_batch_size)) - int(
                math.log2(cfg.compute.min_batch_size))
            log.info('max trials: {}'.format(max_trials))
            new_batch_size = trainer.tuner.scale_batch_size(
                lightning_module,
                mode='power',
                steps_per_trial=30,
                init_val=cfg.compute.min_batch_size,
                max_trials=max_trials)
            cfg.compute.batch_size = new_batch_size
            log.info('auto-tuned batch size: {}'.format(new_batch_size))
        if cfg.train.lr == 'auto':
            lr_finder = trainer.tuner.lr_find(lightning_module,
                                              early_stop_threshold=None,
                                              min_lr=1e-6,
                                              max_lr=10.0)
            # log.info(lr_finder.results)
            plt.style.use('seaborn')
            fig = lr_finder.plot(suggest=True, show=False)
            viz.save_figure(fig, 'auto_lr_finder', False, 0, overwrite=False)
            plt.close(fig)
            new_lr = lr_finder.suggestion()
            log.info('auto-tuned learning rate: {}'.format(new_lr))
            cfg.train.lr = new_lr
            lightning_module.lr = new_lr
            lightning_module.hparams.lr = new_lr
        del trainer, tuner
        #  restore lightning module to original state
        lightning_module.hparams.train.viz_examples = should_viz
        lightning_module.metrics = tmp_metrics
        lightning_module.hparams.compute.num_workers = tmp_workers
        if lightning_module.model_type != 'sequence':
            lightning_module.gpu_transforms = original_gpu_transforms
            log.debug('reverted: {}'.format(lightning_module.gpu_transforms))

    key_metric = lightning_module.metrics.key_metric
    mode = 'min' if 'loss' in key_metric else 'max'
    monitor = f'val/{key_metric}'
    dirpath = os.path.join(cfg.run.dir, 'lightning_checkpoints')
    callback_list = [
        FPSCallback(),
        MetricsCallback(),
        ExampleImagesCallback(),
        CheckpointCallback(),
        StopperCallback(stopper),
        pl.callbacks.ModelCheckpoint(dirpath=dirpath,
                                     save_top_k=1,
                                     save_last=True,
                                     mode=mode,
                                     monitor=monitor,
                                     save_weights_only=True)
    ]
    if 'tune' in cfg and cfg.tune.use and ray:
        callback_list.append(
            TuneReportCallback(OmegaConf.to_container(cfg.tune.metrics),
                               on='validation_end'))
        # https://docs.ray.io/en/master/tune/tutorials/tune-pytorch-lightning.html
        tensorboard_logger = pl.loggers.tensorboard.TensorBoardLogger(
            save_dir=get_trial_dir(),
            name="",
            version=".",
            default_hp_metric=False)
        refresh_rate = 0
    else:
        tensorboard_logger = pl.loggers.tensorboard.TensorBoardLogger(
            os.getcwd())
        refresh_rate = 1

    # tuning messes with the callbacks
    trainer = pl.Trainer(gpus=[cfg.compute.gpu_id],
                         precision=16 if cfg.compute.fp16 else 32,
                         limit_train_batches=steps_per_epoch['train'],
                         limit_val_batches=steps_per_epoch['val'],
                         limit_test_batches=steps_per_epoch['test'],
                         logger=tensorboard_logger,
                         max_epochs=cfg.train.num_epochs,
                         num_sanity_val_steps=0,
                         callbacks=callback_list,
                         reload_dataloaders_every_epoch=True,
                         progress_bar_refresh_rate=refresh_rate,
                         profiler=profiler)
    torch.cuda.empty_cache()
    # gc.collect()

    # import signal
    # signal.signal(signal.SIGTERM, signal.SIG_DFL)
    # log.info('trainer is_slurm_managing_tasks: {}'.format(trainer.is_slurm_managing_tasks))
    return trainer
Beispiel #11
0
    max_steps = 500
    reward_range = (-10, 10)  # TODO: Fix this

    def __init__(self):
        super().__init__()
        self.env = gym.make(
            "GDY-Sokoban---2-v0",
            global_observer_type=gd.ObserverType.VECTOR,
            player_observer_type=gd.ObserverType.VECTOR,
            level=0,
        )


if __name__ == "__main__":

    hp = DictConfig({})

    hp.steps = 10000
    hp.batch_size = 1000
    hp.env_record_freq = 500
    hp.env_record_duration = 50
    hp.max_steps = 200
    hp.lr = 1e-3
    hp.epsilon_exploration = 0.1
    hp.gamma_discount = 0.9

    model = GenericLinearModel(5 * 7 * 8, [10], 5,
                               flatten=True).float().to(device)

    train_dqn(SokobanV2L0EnvWrapper, model, hp, name="SokobanV2L0")
Beispiel #12
0
from envs.env_wrapper import (
    PettingZooEnvWrapper,
    NumpyStateMixin,
    petting_zoo_random_player,
)
from models import GenericLinearModel
from settings import device


class TicTacToeEnvWrapper(PettingZooEnvWrapper, NumpyStateMixin):
    def __init__(self):
        super(TicTacToeEnvWrapper, self).__init__(
            env=tictactoe_v3.env(), opponent_policy=petting_zoo_random_player
        )


if __name__ == "__main__":

    hp = DictConfig({})

    hp.steps = 20
    hp.batch_size = 2
    hp.max_steps = 10
    hp.lr = 1e-3
    hp.epsilon_exploration = 0.1
    hp.gamma_discount = 0.9

    model = GenericLinearModel(18, [10], 9, flatten=True).float().to(device)

    train_dqn(TicTacToeEnvWrapper, model, hp, name="TicTacToe")
def run(opt: DictConfig) -> None:
    print(opt)
    # Set DDP variables
    opt.world_size = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    opt.global_rank = int(os.environ["RANK"]) if "RANK" in os.environ else -1
    set_logging(opt.global_rank)
    if opt.global_rank in [-1, 0]:
        os.chdir(
            "/content/drive/My Drive/Colab Notebooks/AITraining/yolo/yolov5/")
        check_git_status()
        check_requirements()

    # Resume
    if opt.resume:  # resume an interrupted run
        ckpt = (
            opt.resume if isinstance(opt.resume, str) else get_latest_run()
        )  # specified or most recent path
        assert os.path.isfile(
            ckpt), "ERROR: --resume checkpoint does not exist"
        apriori = opt.global_rank, opt.local_rank
        with open(Path(ckpt).parent.parent / "opt.yaml") as f:
            opt = argparse.Namespace(**yaml.load(
                f, Loader=yaml.SafeLoader))  # replace
        (
            opt.cfg,
            opt.weights,
            opt.resume,
            opt.batch_size,
            opt.global_rank,
            opt.local_rank,
        ) = (
            "",
            ckpt,
            True,
            opt.total_batch_size,
            *apriori,
        )  # reinstate
        logger.info("Resuming training from %s" % ckpt)
    else:
        # opt.hyp = opt.hyp or ('hyp.finetune.yaml' if opt.weights else 'hyp.scratch.yaml')
        opt.data, opt.cfg, opt.hyp = (
            check_file(opt.data),
            check_file(opt.cfg),
            check_file(opt.hyp),
        )  # check files
        assert len(opt.cfg) or len(
            opt.weights), "either --cfg or --weights must be specified"
        opt.img_size.extend(
            [opt.img_size[-1]] *
            (2 - len(opt.img_size)))  # extend to 2 sizes (train, test)
        opt.name = "evolve" if opt.evolve else opt.name
        opt.save_dir = increment_path(Path(opt.project) / opt.name,
                                      exist_ok=opt.exist_ok
                                      | opt.evolve)  # increment run

    # DDP mode
    opt.total_batch_size = opt.batch_size
    device = select_device(opt.device, batch_size=opt.batch_size)
    if opt.local_rank != -1:
        assert torch.cuda.device_count() > opt.local_rank
        torch.cuda.set_device(opt.local_rank)
        device = torch.device("cuda", opt.local_rank)
        dist.init_process_group(backend="nccl",
                                init_method="env://")  # distributed backend
        assert (opt.batch_size % opt.world_size == 0
                ), "--batch-size must be multiple of CUDA device count"
        opt.batch_size = opt.total_batch_size // opt.world_size

    # Hyperparameters
    with open(opt.hyp) as f:
        hyp = yaml.load(f, Loader=yaml.SafeLoader)  # load hyps

    # Train
    logger.info(opt)
    try:
        import wandb
    except ImportError:
        wandb = None
        prefix = colorstr("wandb: ")
        logger.info(
            f"{prefix}Install Weights & Biases for YOLOv5 logging with 'pip install wandb' (recommended)"
        )
    if not opt.evolve:
        tb_writer = None  # init loggers
        if opt.global_rank in [-1, 0]:
            logger.info(
                f'Start Tensorboard with "tensorboard --logdir {opt.project}", view at http://localhost:6006/'
            )
            tb_writer = SummaryWriter(opt.save_dir)  # Tensorboard
        train(hyp, opt, device, tb_writer, wandb)

    # Evolve hyperparameters (optional)
    else:
        # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit)
        meta = {
            "lr0":
            (1, 1e-5, 1e-1),  # initial learning rate (SGD=1E-2, Adam=1E-3)
            "lrf":
            (1, 0.01, 1.0),  # final OneCycleLR learning rate (lr0 * lrf)
            "momentum": (0.3, 0.6, 0.98),  # SGD momentum/Adam beta1
            "weight_decay": (1, 0.0, 0.001),  # optimizer weight decay
            "warmup_epochs": (1, 0.0, 5.0),  # warmup epochs (fractions ok)
            "warmup_momentum": (1, 0.0, 0.95),  # warmup initial momentum
            "warmup_bias_lr": (1, 0.0, 0.2),  # warmup initial bias lr
            "box": (1, 0.02, 0.2),  # box loss gain
            "cls": (1, 0.2, 4.0),  # cls loss gain
            "cls_pw": (1, 0.5, 2.0),  # cls BCELoss positive_weight
            "obj": (1, 0.2, 4.0),  # obj loss gain (scale with pixels)
            "obj_pw": (1, 0.5, 2.0),  # obj BCELoss positive_weight
            "iou_t": (0, 0.1, 0.7),  # IoU training threshold
            "anchor_t": (1, 2.0, 8.0),  # anchor-multiple threshold
            "anchors": (2, 2.0, 10.0),  # anchors per output grid (0 to ignore)
            "fl_gamma": (
                0,
                0.0,
                2.0,
            ),  # focal loss gamma (efficientDet default gamma=1.5)
            "hsv_h": (1, 0.0, 0.1),  # image HSV-Hue augmentation (fraction)
            "hsv_s":
            (1, 0.0, 0.9),  # image HSV-Saturation augmentation (fraction)
            "hsv_v": (1, 0.0, 0.9),  # image HSV-Value augmentation (fraction)
            "degrees": (1, 0.0, 45.0),  # image rotation (+/- deg)
            "translate": (1, 0.0, 0.9),  # image translation (+/- fraction)
            "scale": (1, 0.0, 0.9),  # image scale (+/- gain)
            "shear": (1, 0.0, 10.0),  # image shear (+/- deg)
            "perspective": (
                0,
                0.0,
                0.001,
            ),  # image perspective (+/- fraction), range 0-0.001
            "flipud": (1, 0.0, 1.0),  # image flip up-down (probability)
            "fliplr": (0, 0.0, 1.0),  # image flip left-right (probability)
            "mosaic": (1, 0.0, 1.0),  # image mixup (probability)
            "mixup": (1, 0.0, 1.0),
        }  # image mixup (probability)

        assert opt.local_rank == -1, "DDP mode not implemented for --evolve"
        opt.notest, opt.nosave = True, True  # only test/save final epoch
        # ei = [isinstance(x, (int, float)) for x in hyp.values()]  # evolvable indices
        yaml_file = Path(
            opt.save_dir) / "hyp_evolved.yaml"  # save best result here
        if opt.bucket:
            os.system("gsutil cp gs://%s/evolve.txt ." %
                      opt.bucket)  # download evolve.txt if exists

        for _ in range(300):  # generations to evolve
            if Path("evolve.txt").exists(
            ):  # if evolve.txt exists: select best hyps and mutate
                # Select parent(s)
                parent = "single"  # parent selection method: 'single' or 'weighted'
                x = np.loadtxt("evolve.txt", ndmin=2)
                n = min(5, len(x))  # number of previous results to consider
                x = x[np.argsort(-fitness(x))][:n]  # top n mutations
                w = fitness(x) - fitness(x).min()  # weights
                if parent == "single" or len(x) == 1:
                    # x = x[random.randint(0, n - 1)]  # random selection
                    x = x[random.choices(range(n),
                                         weights=w)[0]]  # weighted selection
                elif parent == "weighted":
                    x = (x * w.reshape(
                        n, 1)).sum(0) / w.sum()  # weighted combination

                # Mutate
                mp, s = 0.8, 0.2  # mutation probability, sigma
                npr = np.random
                npr.seed(int(time.time()))
                g = np.array([x[0] for x in meta.values()])  # gains 0-1
                ng = len(meta)
                v = np.ones(ng)
                while all(
                        v == 1
                ):  # mutate until a change occurs (prevent duplicates)
                    v = (g * (npr.random(ng) < mp) * npr.randn(ng) *
                         npr.random() * s + 1).clip(0.3, 3.0)
                for i, k in enumerate(hyp.keys()):  # plt.hist(v.ravel(), 300)
                    hyp[k] = float(x[i + 7] * v[i])  # mutate

            # Constrain to limits
            for k, v in meta.items():
                hyp[k] = max(hyp[k], v[1])  # lower limit
                hyp[k] = min(hyp[k], v[2])  # upper limit
                hyp[k] = round(hyp[k], 5)  # significant digits

            # Train mutation
            results = train(hyp.copy(), opt, device, wandb=wandb)

            # Write mutation results
            print_mutation(hyp.copy(), results, yaml_file, opt.bucket)

        # Plot results
        plot_evolution(yaml_file)
        print(
            f"Hyperparameter evolution complete. Best results saved as: {yaml_file}\n"
            f"Command to train a new model with these hyperparameters: $ python train.py --hyp {yaml_file}"
        )