def dqn_per_gridworld(): hp = DictConfig({}) hp.steps = 1000 hp.batch_size = 500 hp.replay_batch = 100 hp.replay_size = 1000 hp.delete_freq = 100 * (hp.batch_size + hp.replay_size) # every 100 steps hp.env_record_freq = 100 hp.env_record_duration = 25 hp.max_steps = 50 hp.grid_size = 4 hp.lr = 1e-3 hp.epsilon_exploration = 0.1 hp.gamma_discount = 0.9 model = (GenericConvModel(height=4, width=4, in_channels=4, channels=[50], out_size=4).float().to(device)) train_dqn_per( GridWorldEnvWrapper, model, hp, project_name="SimpleGridWorld", run_name="dqn_per", )
def test_dqn_vanilla(self, *_): from dqn.dqn import train_dqn hp = DictConfig({}) hp.steps = 2 hp.batch_size = 2 hp.env_record_freq = 0 hp.env_record_duration = 0 hp.max_steps = 50 hp.grid_size = 4 hp.lr = 1e-3 hp.epsilon_exploration = 0.1 hp.gamma_discount = 0.9 for case in env_cases: print(case["env"].__name__) model = GenericLinearModel( in_size=case["input"], units=[10], out_size=case["output"], flatten=case.get("flatten", False), ) train_dqn(case["env"], model, hp)
def breakout_double_dqn(): hp = DictConfig({}) hp.steps = 2000 hp.batch_size = 50 hp.replay_batch = 50 hp.replay_size = 1000 hp.delete_freq = 50 * (hp.batch_size + hp.replay_size) # every 100 steps hp.delete_percentage = 0.2 hp.env_record_freq = 100 hp.env_record_duration = 50 hp.lr = 1e-3 hp.gamma_discount = 0.9 # hp.epsilon_exploration = 0.1 hp.epsilon_flatten_step = 1500 hp.epsilon_start = 1 hp.epsilon_end = 0.1 hp.epsilon_decay_function = decay_functions.LINEAR hp.target_model_sync_freq = 50 model = GenericConvModel(42, 42, 3, [50, 50, 50], [100], 4) train_dqn_double( BreakoutEnvWrapper, model, hp, project_name="Breakout", run_name="double_dqn" )
def train_dqn_connect4(): hp = DictConfig({}) hp.steps = 20 hp.batch_size = 2 hp.max_steps = 10 hp.lr = 1e-3 hp.epsilon_exploration = 0.1 hp.gamma_discount = 0.9 model = GenericLinearModel(2 * 6 * 7, [10], 7, flatten=True).float().to(device) train_dqn(ConnectXEnvWrapper, model, hp, name="Connect4")
def breakout_dqn(): hp = DictConfig({}) hp.steps = 2000 hp.batch_size = 32 hp.env_record_freq = 500 hp.env_record_duration = 100 hp.max_steps = 1000 hp.lr = 1e-3 hp.epsilon_exploration = 0.1 hp.gamma_discount = 0.9 model = GenericLinearModel(42 * 42 * 3, [100, 100], 4, flatten=True) train_dqn( BreakoutEnvWrapper, model, hp, project_name="Breakout", run_name="vanilla_dqn" )
def dqn_double(): hp = DictConfig({}) hp.steps = 1000 hp.batch_size = 500 hp.replay_batch = 100 hp.replay_size = 1000 hp.delete_freq = 100 * (hp.batch_size + hp.replay_size) # every 100 steps hp.env_record_freq = 100 hp.env_record_duration = 25 hp.max_steps = 50 hp.grid_size = 4 hp.lr = 1e-3 hp.gamma_discount = 0.9 # hp.epsilon_exploration = 0.1 hp.epsilon_flatten_step = 700 hp.epsilon_start = 1 hp.epsilon_end = 0.001 hp.epsilon_decay_function = decay_functions.LINEAR hp.target_model_sync_freq = 50 model = (GenericConvModel(height=4, width=4, in_channels=4, channels=[50], out_size=4).float().to(device)) train_dqn_double( GridWorldEnvWrapper, model, hp, project_name="SimpleGridWorld", run_name="dqn_target", )
def pg_gridworld(): hp = DictConfig({}) hp.episodes = 2 hp.batch_size = 2 hp.lr = 1e-3 hp.gamma_discount_credits = 0.9 hp.gamma_discount_returns = 0.9 model = (GenericConvModel(height=4, width=4, in_channels=4, channels=[50], out_size=4).float().to(device)) train_pg(GridWorldEnvWrapper, model, hp, project_name="SimpleGridWorld", run_name="pg")
def test_pg(self, *_): from dqn.pg import train_pg hp = DictConfig({}) hp.episodes = 2 hp.batch_size = 2 hp.lr = 1e-3 hp.gamma_discount_returns = 0.9 hp.gamma_discount_credits = 0.9 for case in env_cases: print(case["env"].__name__) model = GenericLinearModel( in_size=case["input"], units=[10], out_size=case["output"], flatten=case.get("flatten", False), ) train_pg(case["env"], model, hp)
def __init__(self): super().__init__() self.env = FrozenLakeEnv(map_name="4x4", is_slippery=True) def get_legal_actions(self): return list(range(4)) @staticmethod def get_state_batch(envs: Iterable) -> torch.Tensor: return to_onehot([env.state for env in envs], 16).float() if __name__ == "__main__": hp = DictConfig({}) hp.steps = 5000 hp.batch_size = 500 hp.max_steps = 200 hp.lr = 1e-3 hp.epsilon_exploration = 0.1 hp.gamma_discount = 0.9 hp.units = [10] model = GenericLinearModel(16, hp.units, 4).double().to(device) train_dqn(FrozenLakeEnvWrapper, model, hp, name="FrozenLake")
def get_trainer_from_cfg(cfg: DictConfig, lightning_module, stopper, profiler: str = None) -> pl.Trainer: """Gets a PyTorch Lightning Trainer from a configuration Supports: automatic batch sizing Automatic learning rate finding (experimental) Callback instantiation Logging, both to disk and with TensorBoard Parameters ---------- cfg : DictConfig configuration lightning_module : pl.LightningModule Lightning model to train stopper : callable Method to stop training. Must be passed so that figuring out batch size does not "count" towards stopping profiler : str, optional https://pytorch-lightning.readthedocs.io/en/latest/advanced/profiler.html, by default None Returns ------- pl.Trainer https://pytorch-lightning.readthedocs.io/en/latest/common/trainer.html """ steps_per_epoch = cfg.train.steps_per_epoch for split in ['train', 'val', 'test']: steps_per_epoch[split] = steps_per_epoch[split] if steps_per_epoch[ split] is not None else 1.0 # reload_dataloaders_every_epoch = True: a bit slower, but enables validation dataloader to get the new, automatic # learning rate schedule. if cfg.compute.batch_size == 'auto' or cfg.train.lr == 'auto': trainer = pl.Trainer(gpus=[cfg.compute.gpu_id], precision=16 if cfg.compute.fp16 else 32, limit_train_batches=1.0, limit_val_batches=1.0, limit_test_batches=1.0, num_sanity_val_steps=0) # callbacks=[ExampleImagesCallback()]) tmp_metrics = lightning_module.metrics tmp_workers = lightning_module.hparams.compute.num_workers # visualize_examples = lightning_module.visualize_examples if lightning_module.model_type != 'sequence': # there is a somewhat common error that VRAM will be maximized by the gpu-auto-tuner. # However, during training, we probabilistically sample colorspace transforms; in an "unlucky" # batch, perhaps all of the training samples are converted to HSV, hue and saturation changed, then changed # back. This is rare enough to not be encountered in "auto-tuning," so we'll get a train-time error. BAD! # so, we crank up the colorspace augmentation probability, then pick batch size, then change it back original_gpu_transforms = deepcopy(lightning_module.gpu_transforms) log.debug('orig: {}'.format(lightning_module.gpu_transforms)) original_augs = cfg.augs new_augs = deepcopy(cfg.augs) new_augs.color_p = 1.0 arch = lightning_module.hparams[lightning_module.model_type].arch mode = '2d' gpu_transforms = get_gpu_transforms( new_augs, '3d' if '3d' in arch.lower() else '2d') lightning_module.gpu_transforms = gpu_transforms log.debug('new: {}'.format(lightning_module.gpu_transforms)) tuner = pl.tuner.tuning.Tuner(trainer) # hack for lightning to find the batch size cfg.batch_size = 2 # to start empty_metrics = EmptyMetrics() # don't store metrics when batch size finding lightning_module.metrics = empty_metrics # don't visualize our model inputs when batch size finding # lightning_module.visualize_examples = False should_viz = cfg.train.viz_examples lightning_module.hparams.train.viz_examples = 0 # dramatically reduces RAM usage by this process lightning_module.hparams.compute.num_workers = min(tmp_workers, 1) if cfg.compute.batch_size == 'auto': max_trials = int(math.log2(cfg.compute.max_batch_size)) - int( math.log2(cfg.compute.min_batch_size)) log.info('max trials: {}'.format(max_trials)) new_batch_size = trainer.tuner.scale_batch_size( lightning_module, mode='power', steps_per_trial=30, init_val=cfg.compute.min_batch_size, max_trials=max_trials) cfg.compute.batch_size = new_batch_size log.info('auto-tuned batch size: {}'.format(new_batch_size)) if cfg.train.lr == 'auto': lr_finder = trainer.tuner.lr_find(lightning_module, early_stop_threshold=None, min_lr=1e-6, max_lr=10.0) # log.info(lr_finder.results) plt.style.use('seaborn') fig = lr_finder.plot(suggest=True, show=False) viz.save_figure(fig, 'auto_lr_finder', False, 0, overwrite=False) plt.close(fig) new_lr = lr_finder.suggestion() log.info('auto-tuned learning rate: {}'.format(new_lr)) cfg.train.lr = new_lr lightning_module.lr = new_lr lightning_module.hparams.lr = new_lr del trainer, tuner # restore lightning module to original state lightning_module.hparams.train.viz_examples = should_viz lightning_module.metrics = tmp_metrics lightning_module.hparams.compute.num_workers = tmp_workers if lightning_module.model_type != 'sequence': lightning_module.gpu_transforms = original_gpu_transforms log.debug('reverted: {}'.format(lightning_module.gpu_transforms)) key_metric = lightning_module.metrics.key_metric mode = 'min' if 'loss' in key_metric else 'max' monitor = f'val/{key_metric}' dirpath = os.path.join(cfg.run.dir, 'lightning_checkpoints') callback_list = [ FPSCallback(), MetricsCallback(), ExampleImagesCallback(), CheckpointCallback(), StopperCallback(stopper), pl.callbacks.ModelCheckpoint(dirpath=dirpath, save_top_k=1, save_last=True, mode=mode, monitor=monitor, save_weights_only=True) ] if 'tune' in cfg and cfg.tune.use and ray: callback_list.append( TuneReportCallback(OmegaConf.to_container(cfg.tune.metrics), on='validation_end')) # https://docs.ray.io/en/master/tune/tutorials/tune-pytorch-lightning.html tensorboard_logger = pl.loggers.tensorboard.TensorBoardLogger( save_dir=get_trial_dir(), name="", version=".", default_hp_metric=False) refresh_rate = 0 else: tensorboard_logger = pl.loggers.tensorboard.TensorBoardLogger( os.getcwd()) refresh_rate = 1 # tuning messes with the callbacks trainer = pl.Trainer(gpus=[cfg.compute.gpu_id], precision=16 if cfg.compute.fp16 else 32, limit_train_batches=steps_per_epoch['train'], limit_val_batches=steps_per_epoch['val'], limit_test_batches=steps_per_epoch['test'], logger=tensorboard_logger, max_epochs=cfg.train.num_epochs, num_sanity_val_steps=0, callbacks=callback_list, reload_dataloaders_every_epoch=True, progress_bar_refresh_rate=refresh_rate, profiler=profiler) torch.cuda.empty_cache() # gc.collect() # import signal # signal.signal(signal.SIGTERM, signal.SIG_DFL) # log.info('trainer is_slurm_managing_tasks: {}'.format(trainer.is_slurm_managing_tasks)) return trainer
max_steps = 500 reward_range = (-10, 10) # TODO: Fix this def __init__(self): super().__init__() self.env = gym.make( "GDY-Sokoban---2-v0", global_observer_type=gd.ObserverType.VECTOR, player_observer_type=gd.ObserverType.VECTOR, level=0, ) if __name__ == "__main__": hp = DictConfig({}) hp.steps = 10000 hp.batch_size = 1000 hp.env_record_freq = 500 hp.env_record_duration = 50 hp.max_steps = 200 hp.lr = 1e-3 hp.epsilon_exploration = 0.1 hp.gamma_discount = 0.9 model = GenericLinearModel(5 * 7 * 8, [10], 5, flatten=True).float().to(device) train_dqn(SokobanV2L0EnvWrapper, model, hp, name="SokobanV2L0")
from envs.env_wrapper import ( PettingZooEnvWrapper, NumpyStateMixin, petting_zoo_random_player, ) from models import GenericLinearModel from settings import device class TicTacToeEnvWrapper(PettingZooEnvWrapper, NumpyStateMixin): def __init__(self): super(TicTacToeEnvWrapper, self).__init__( env=tictactoe_v3.env(), opponent_policy=petting_zoo_random_player ) if __name__ == "__main__": hp = DictConfig({}) hp.steps = 20 hp.batch_size = 2 hp.max_steps = 10 hp.lr = 1e-3 hp.epsilon_exploration = 0.1 hp.gamma_discount = 0.9 model = GenericLinearModel(18, [10], 9, flatten=True).float().to(device) train_dqn(TicTacToeEnvWrapper, model, hp, name="TicTacToe")
def run(opt: DictConfig) -> None: print(opt) # Set DDP variables opt.world_size = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 opt.global_rank = int(os.environ["RANK"]) if "RANK" in os.environ else -1 set_logging(opt.global_rank) if opt.global_rank in [-1, 0]: os.chdir( "/content/drive/My Drive/Colab Notebooks/AITraining/yolo/yolov5/") check_git_status() check_requirements() # Resume if opt.resume: # resume an interrupted run ckpt = ( opt.resume if isinstance(opt.resume, str) else get_latest_run() ) # specified or most recent path assert os.path.isfile( ckpt), "ERROR: --resume checkpoint does not exist" apriori = opt.global_rank, opt.local_rank with open(Path(ckpt).parent.parent / "opt.yaml") as f: opt = argparse.Namespace(**yaml.load( f, Loader=yaml.SafeLoader)) # replace ( opt.cfg, opt.weights, opt.resume, opt.batch_size, opt.global_rank, opt.local_rank, ) = ( "", ckpt, True, opt.total_batch_size, *apriori, ) # reinstate logger.info("Resuming training from %s" % ckpt) else: # opt.hyp = opt.hyp or ('hyp.finetune.yaml' if opt.weights else 'hyp.scratch.yaml') opt.data, opt.cfg, opt.hyp = ( check_file(opt.data), check_file(opt.cfg), check_file(opt.hyp), ) # check files assert len(opt.cfg) or len( opt.weights), "either --cfg or --weights must be specified" opt.img_size.extend( [opt.img_size[-1]] * (2 - len(opt.img_size))) # extend to 2 sizes (train, test) opt.name = "evolve" if opt.evolve else opt.name opt.save_dir = increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok | opt.evolve) # increment run # DDP mode opt.total_batch_size = opt.batch_size device = select_device(opt.device, batch_size=opt.batch_size) if opt.local_rank != -1: assert torch.cuda.device_count() > opt.local_rank torch.cuda.set_device(opt.local_rank) device = torch.device("cuda", opt.local_rank) dist.init_process_group(backend="nccl", init_method="env://") # distributed backend assert (opt.batch_size % opt.world_size == 0 ), "--batch-size must be multiple of CUDA device count" opt.batch_size = opt.total_batch_size // opt.world_size # Hyperparameters with open(opt.hyp) as f: hyp = yaml.load(f, Loader=yaml.SafeLoader) # load hyps # Train logger.info(opt) try: import wandb except ImportError: wandb = None prefix = colorstr("wandb: ") logger.info( f"{prefix}Install Weights & Biases for YOLOv5 logging with 'pip install wandb' (recommended)" ) if not opt.evolve: tb_writer = None # init loggers if opt.global_rank in [-1, 0]: logger.info( f'Start Tensorboard with "tensorboard --logdir {opt.project}", view at http://localhost:6006/' ) tb_writer = SummaryWriter(opt.save_dir) # Tensorboard train(hyp, opt, device, tb_writer, wandb) # Evolve hyperparameters (optional) else: # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit) meta = { "lr0": (1, 1e-5, 1e-1), # initial learning rate (SGD=1E-2, Adam=1E-3) "lrf": (1, 0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf) "momentum": (0.3, 0.6, 0.98), # SGD momentum/Adam beta1 "weight_decay": (1, 0.0, 0.001), # optimizer weight decay "warmup_epochs": (1, 0.0, 5.0), # warmup epochs (fractions ok) "warmup_momentum": (1, 0.0, 0.95), # warmup initial momentum "warmup_bias_lr": (1, 0.0, 0.2), # warmup initial bias lr "box": (1, 0.02, 0.2), # box loss gain "cls": (1, 0.2, 4.0), # cls loss gain "cls_pw": (1, 0.5, 2.0), # cls BCELoss positive_weight "obj": (1, 0.2, 4.0), # obj loss gain (scale with pixels) "obj_pw": (1, 0.5, 2.0), # obj BCELoss positive_weight "iou_t": (0, 0.1, 0.7), # IoU training threshold "anchor_t": (1, 2.0, 8.0), # anchor-multiple threshold "anchors": (2, 2.0, 10.0), # anchors per output grid (0 to ignore) "fl_gamma": ( 0, 0.0, 2.0, ), # focal loss gamma (efficientDet default gamma=1.5) "hsv_h": (1, 0.0, 0.1), # image HSV-Hue augmentation (fraction) "hsv_s": (1, 0.0, 0.9), # image HSV-Saturation augmentation (fraction) "hsv_v": (1, 0.0, 0.9), # image HSV-Value augmentation (fraction) "degrees": (1, 0.0, 45.0), # image rotation (+/- deg) "translate": (1, 0.0, 0.9), # image translation (+/- fraction) "scale": (1, 0.0, 0.9), # image scale (+/- gain) "shear": (1, 0.0, 10.0), # image shear (+/- deg) "perspective": ( 0, 0.0, 0.001, ), # image perspective (+/- fraction), range 0-0.001 "flipud": (1, 0.0, 1.0), # image flip up-down (probability) "fliplr": (0, 0.0, 1.0), # image flip left-right (probability) "mosaic": (1, 0.0, 1.0), # image mixup (probability) "mixup": (1, 0.0, 1.0), } # image mixup (probability) assert opt.local_rank == -1, "DDP mode not implemented for --evolve" opt.notest, opt.nosave = True, True # only test/save final epoch # ei = [isinstance(x, (int, float)) for x in hyp.values()] # evolvable indices yaml_file = Path( opt.save_dir) / "hyp_evolved.yaml" # save best result here if opt.bucket: os.system("gsutil cp gs://%s/evolve.txt ." % opt.bucket) # download evolve.txt if exists for _ in range(300): # generations to evolve if Path("evolve.txt").exists( ): # if evolve.txt exists: select best hyps and mutate # Select parent(s) parent = "single" # parent selection method: 'single' or 'weighted' x = np.loadtxt("evolve.txt", ndmin=2) n = min(5, len(x)) # number of previous results to consider x = x[np.argsort(-fitness(x))][:n] # top n mutations w = fitness(x) - fitness(x).min() # weights if parent == "single" or len(x) == 1: # x = x[random.randint(0, n - 1)] # random selection x = x[random.choices(range(n), weights=w)[0]] # weighted selection elif parent == "weighted": x = (x * w.reshape( n, 1)).sum(0) / w.sum() # weighted combination # Mutate mp, s = 0.8, 0.2 # mutation probability, sigma npr = np.random npr.seed(int(time.time())) g = np.array([x[0] for x in meta.values()]) # gains 0-1 ng = len(meta) v = np.ones(ng) while all( v == 1 ): # mutate until a change occurs (prevent duplicates) v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0) for i, k in enumerate(hyp.keys()): # plt.hist(v.ravel(), 300) hyp[k] = float(x[i + 7] * v[i]) # mutate # Constrain to limits for k, v in meta.items(): hyp[k] = max(hyp[k], v[1]) # lower limit hyp[k] = min(hyp[k], v[2]) # upper limit hyp[k] = round(hyp[k], 5) # significant digits # Train mutation results = train(hyp.copy(), opt, device, wandb=wandb) # Write mutation results print_mutation(hyp.copy(), results, yaml_file, opt.bucket) # Plot results plot_evolution(yaml_file) print( f"Hyperparameter evolution complete. Best results saved as: {yaml_file}\n" f"Command to train a new model with these hyperparameters: $ python train.py --hyp {yaml_file}" )