def __init__( self, env_spec, num_candidate_policies: int = 20, policy_arch: GDKC = GDKC(CategoricalMLPPolicy, hidden_sizes=(32, 32)), baseline_arch: GDKC = GDKC( LinearFeatureBaseline ), # Baseline for GAE (Generalized Advantage Estimation). discount_factor: float = 0.99, max_rollout_length: int = 500, parameters_variance: float = 1.0, ): self.policy: Module = policy_arch(env_spec=env_spec) self.max_path_length = max_rollout_length # TODO: REMOVE THIS.. self.sampler_cls = RaySampler self._baseline = baseline_arch() self._max_rollout_length = max_rollout_length self._env_spec = env_spec self._discount = discount_factor self._parameters_variance = parameters_variance self._num_candidate_policies = num_candidate_policies self._evolution_strategy: CMAEvolutionStrategy = None self._shared_params = None self._all_returns = None
def __init__( self, *, copy_percentage: float = 1e-2, batch_size: int = 100, discount_factor: float = 0.999, target_update_interval: int = 1, num_inner_updates: int = 20, sac_alpha: float = 1e-2, memory_buffer: Memory = TransitionPointBuffer(1000000), auto_tune_sac_alpha: bool = False, auto_tune_sac_alpha_optimiser_spec: GDKC = GDKC( constructor=torch.optim.Adam, lr=3e-4 ), actor_optimiser_spec: GDKC = GDKC(constructor=torch.optim.Adam, lr=3e-4), critic_optimiser_spec: GDKC = GDKC(constructor=torch.optim.Adam, lr=3e-4), actor_arch_spec: GDKC = GDKC( ShallowStdNormalMLP, mean_head_activation=torch.tanh ), critic_arch_spec: GDKC = GDKC(PreConcatInputMLP), critic_criterion: callable = mse_loss, **kwargs ): """ :param copy_percentage: :param signal_clipping: :param action_clipping: :param memory_buffer: :param actor_optimiser_spec: :param critic_optimiser_spec: :param actor_arch_spec: :param critic_arch_spec: :param random_process_spec: :param kwargs: """ super().__init__(**kwargs) assert 0 <= discount_factor <= 1.0 assert 0 <= copy_percentage <= 1.0 self._batch_size = batch_size self._discount_factor = discount_factor self._target_update_interval = target_update_interval self._sac_alpha = sac_alpha self._copy_percentage = copy_percentage self._memory_buffer = memory_buffer self._actor_optimiser_spec: GDKC = actor_optimiser_spec self._critic_optimiser_spec: GDKC = critic_optimiser_spec self._actor_arch_spec = actor_arch_spec self._critic_arch_spec = critic_arch_spec self._num_inner_updates = num_inner_updates self._critic_criterion = critic_criterion self._auto_tune_sac_alpha = auto_tune_sac_alpha self._auto_tune_sac_alpha_optimiser_spec = auto_tune_sac_alpha_optimiser_spec self.inner_update_i = 0
def __init__( self, random_process_spec: GDKC = GDKC( constructor=OrnsteinUhlenbeckProcess), memory_buffer: Memory = TransitionPointBuffer(), evaluation_function: callable = F.mse_loss, actor_arch_spec: GDKC = GDKC(MLP, output_activation=torch.nn.Tanh()), critic_arch_spec: GDKC = GDKC(LateConcatInputMLP), discount_factor: float = 0.95, update_target_interval: int = 1, batch_size: int = 128, noise_factor: float = 1e-1, copy_percentage: float = 0.005, actor_optimiser_spec: GDKC = GDKC(constructor=torch.optim.Adam, lr=3e-4), critic_optimiser_spec: GDKC = GDKC(constructor=torch.optim.Adam, lr=3e-4), **kwargs): """ @param random_process_spec: @param memory_buffer: @param evaluation_function: @param actor_arch_spec: @param critic_arch_spec: @param discount_factor: @param update_target_interval: @param batch_size: @param noise_factor: @param copy_percentage: @param actor_optimiser_spec: @param critic_optimiser_spec: @param kwargs: """ super().__init__(**kwargs) assert 0 <= discount_factor <= 1.0 assert 0 <= copy_percentage <= 1.0 self._copy_percentage = copy_percentage self._actor_optimiser_spec = actor_optimiser_spec self._critic_optimiser_spec = critic_optimiser_spec self._actor_arch_spec = actor_arch_spec self._critic_arch_spec = critic_arch_spec self._random_process_spec = random_process_spec self._memory_buffer = memory_buffer self._critic_criteria = evaluation_function self._discount_factor = discount_factor self._update_target_interval = update_target_interval self._batch_size = batch_size self._noise_factor = noise_factor
def __build__( self, observation_space: ObservationSpace, action_space: ActionSpace, signal_space: SignalSpace, metric_writer: Writer = MockWriter(), print_model_repr: bool = True, *, distributional_regressor: Module = None, optimiser: Optimizer = None, ) -> None: """ @param observation_space: @param action_space: @param signal_space: @param metric_writer: @param print_model_repr: @param distributional_regressor: @param optimiser: @return: """ if distributional_regressor: self.distributional_regressor = distributional_regressor else: self._policy_arch_spec.kwargs["input_shape"] = self._input_shape if action_space.is_discrete: self._policy_arch_spec = GDKC( constructor=CategoricalMLP, kwargs=self._policy_arch_spec.kwargs) else: self._policy_arch_spec = GDKC( constructor=MultiDimensionalNormalMLP, kwargs=self._policy_arch_spec.kwargs, ) self._policy_arch_spec.kwargs["output_shape"] = self._output_shape self.distributional_regressor: Module = self._policy_arch_spec( ).to(self._device) if optimiser: self._optimiser = optimiser else: self._optimiser = self._optimiser_spec( self.distributional_regressor.parameters()) if self._scheduler_spec: self._scheduler = self._scheduler_spec(self._optimiser) else: self._scheduler = None
def b(): # DOES NOT WORK! """ """ print("start2") with CaptureEarlyStop(GDKC(exit, code=0)) as _: while True: sleep(0.1) print("done2")
def __init__( self, evaluation_function: callable = torch.nn.CrossEntropyLoss(), policy_arch_spec: GDKC = GDKC(constructor=CategoricalMLP), discount_factor: float = 0.95, optimiser_spec: GDKC = GDKC(constructor=torch.optim.Adam, lr=3e-4), scheduler_spec: GDKC = GDKC( constructor=torch.optim.lr_scheduler.StepLR, step_size=100, gamma=0.65), memory_buffer: Memory = SampleTrajectoryBuffer(), **kwargs, ) -> None: r""" :param evaluation_function: :param trajectory_trace: :param policy_arch_spec: :param discount_factor: :param optimiser_spec: :param state_type: :param kwargs: """ super().__init__(**kwargs) assert 0 <= discount_factor <= 1.0 self._memory_buffer = memory_buffer self._evaluation_function = evaluation_function self._policy_arch_spec = policy_arch_spec self._discount_factor = discount_factor self._optimiser_spec = optimiser_spec self._scheduler_spec = scheduler_spec self._mask_terminated_signals = False
from neodroidagent.common import ParallelSession from neodroidagent.common.architectures.distributional.categorical import CategoricalMLP from neodroidagent.configs.test_reference.base_dicrete_test_config import * __author__ = "Christian Heider Nielsen" from neodroidagent.entry_points.session_factory import session_factory from warg import GDKC CONFIG_NAME = __name__ CONFIG_FILE_PATH = pathlib.Path(__file__) RENDER_ENVIRONMENT = True OPTIMISER_SPEC = GDKC(torch.optim.Adam, lr=3e-4) SCHEDULER_SPEC = None POLICY_ARCH_SPEC = GDKC(constructor=CategoricalMLP, hidden_layers=128) # RENDER_FREQUENCY = 1 pg_config = globals() def pg_run(skip_confirmation: bool = True, environment_type: Union[bool, str] = True, *, config=None, **kwargs) -> None: if config is None:
from typing import Tuple from draugr.scipy_utilities import mag_decimation_subsample __all__ = [ "annotate_lines", "default_index_decimator", ] from warg import GDKC, passes_kws_to from matplotlib.pyplot import Axes default_index_decimator = GDKC( mag_decimation_subsample, decimation_factor=5, return_indices=True) # finds interesting features? @passes_kws_to(Axes.annotate) def annotate_lines( ax_: Axes, num_lines: int = 1, # None for all index_decimator: callable = default_index_decimator, color: str = "k", # None for auto color xycoords: Tuple[str, str] = ( "data", # 'axes fraction', "data", ), # TODO: NOT DONE! Where to place annotation, use 'axes fraction' for along axes' ha: str = "left",
def __init__( self, value_arch_spec: Architecture = GDKC(DuelingQMLP), exploration_spec: ExplorationSpecification = ExplorationSpecification( start=0.95, end=0.05, decay=3000), memory_buffer: Memory = TransitionPointPrioritisedBuffer(int(1e5)), batch_size: int = 256, discount_factor: float = 0.95, double_dqn: bool = True, use_per: bool = True, loss_function: callable = smooth_l1_loss, optimiser_spec: GDKC = GDKC(torch.optim.Adam, lr=3e-4), scheduler_spec: GDKC = None, sync_target_model_frequency: int = 1, initial_observation_period: int = 1000, learning_frequency: int = 1, copy_percentage: float = 1e-2, **kwargs, ): """ @param value_arch_spec: @param exploration_spec: @param memory_buffer: @param batch_size: @param discount_factor: @param double_dqn: https://arxiv.org/abs/1509.06461 @param use_per: https://arxiv.org/abs/1511.05952 @param loss_function: default is huber loss @param optimiser_spec: @param scheduler_spec: @param sync_target_model_frequency: @param initial_observation_period: @param learning_frequency: @param copy_percentage: @param kwargs: """ super().__init__(**kwargs) self._exploration_spec = exploration_spec assert 0 <= self._exploration_spec.end <= self._exploration_spec.start assert 0 < self._exploration_spec.decay self._memory_buffer = memory_buffer assert self._memory_buffer.capacity > batch_size self._value_arch_spec: Architecture = value_arch_spec self._optimiser_spec = optimiser_spec self._scheduler_spec = scheduler_spec self._batch_size = batch_size assert batch_size > 0 self._discount_factor = discount_factor assert 0 <= discount_factor <= 1.0 self._double_dqn = double_dqn self._use_per = use_per and double_dqn self._loss_function = loss_function self._learning_frequency = learning_frequency self._sync_target_model_frequency = sync_target_model_frequency self._initial_observation_period = initial_observation_period assert initial_observation_period >= 0 self._copy_percentage = copy_percentage assert 0 <= copy_percentage <= 1.0 self._state_type = torch.float self._value_type = torch.float self._action_type = torch.long
def main(): dataset_root = Path.home() / "Data" base_path = ensure_existence(PROJECT_APP_PATH.user_data / 'maskrcnn') log_path = ensure_existence(PROJECT_APP_PATH.user_log / 'maskrcnn') export_root = ensure_existence(base_path / 'models') model_name = f'maskrcnn_pennfudanped' batch_size = 4 num_epochs = 10 optimiser_spec = GDKC(torch.optim.Adam, lr=3e-4) scheduler_spec = GDKC( torch.optim.lr_scheduler. StepLR, # a learning rate scheduler which decreases the learning rate by step_size=3, # 10x every 3 epochs gamma=0.1, ) num_workers = os.cpu_count() torch_seed(3825) dataset = PennFudanDataset(dataset_root / "PennFudanPed", Split.Training, return_variant=ReturnVariant.all) dataset_validation = PennFudanDataset( dataset_root / "PennFudanPed", Split.Validation, return_variant=ReturnVariant.all, ) split = SplitIndexer(len(dataset), validation=0.3, testing=0) split_indices = torch.randperm(split.total_num).tolist() data_loader = DataLoader( Subset(dataset, split_indices[:-split.validation_num]), batch_size=batch_size, shuffle=True, num_workers=num_workers, collate_fn=collate_batch_fn, ) data_loader_val = DataLoader( Subset(dataset_validation, split_indices[-split.validation_num:]), batch_size=1, shuffle=False, num_workers=num_workers, collate_fn=collate_batch_fn, ) model = get_pretrained_instance_segmentation_maskrcnn( dataset.response_channels) optimiser = optimiser_spec(trainable_parameters(model)) lr_scheduler = scheduler_spec(optimiser) if True: model = load_model(model_name=model_name, model_directory=export_root) if True: with TorchTrainSession(model): with TensorBoardPytorchWriter(log_path / model_name) as writer: for epoch_i in tqdm(range(num_epochs), desc="Epoch #"): maskrcnn_train_single_epoch(model=model, optimiser=optimiser, data_loader=data_loader, writer=writer) lr_scheduler.step() # update the learning rate maskrcnn_evaluate( model, data_loader_val, writer=writer ) # evaluate on the validation dataset save_model(model, model_name=model_name, save_directory=export_root) if True: with TorchEvalSession(model): # put the model in evaluation mode img, _ = dataset_validation[ 0] # pick one image from the test set with torch.no_grad(): prediction = model([img.to(global_torch_device())]) from matplotlib import pyplot pyplot.imshow( Image.fromarray( img.mul(255).permute(1, 2, 0).byte().numpy())) pyplot.show() import cv2 pyplot.imshow( Image.fromarray(prediction[0]["masks"][0, 0].mul( 255).byte().cpu().numpy())) pyplot.show() (boxes, labels, scores) = ( prediction[0]["boxes"].to('cpu').numpy(), prediction[0]["labels"].to('cpu').numpy(), torch.sigmoid(prediction[0]["scores"]).to('cpu').numpy(), ) from draugr.opencv_utilities import draw_bounding_boxes from draugr.torch_utilities.images.conversion import quick_to_pil_image indices = scores > 0.1 cv2.namedWindow(model_name, cv2.WINDOW_NORMAL) cv2.imshow( model_name, draw_bounding_boxes( quick_to_pil_image(img), boxes[indices], labels=labels[indices], scores=scores[indices], #categories=categories, )) cv2.waitKey()
def __init__(self, discount_factor: float = 0.95, gae_lambda: float = 0.95, entropy_reg_coef: float = 0, value_reg_coef: float = 5e-1, num_inner_updates: int = 10, mini_batch_size: int = 64, update_target_interval: int = 1, surrogate_clipping_value: float = 2e-1, copy_percentage: float = 1.0, target_kl: float = 1e-2, memory_buffer: Any = TransitionPointTrajectoryBuffer(), critic_criterion: callable = mse_loss, optimiser_spec: GDKC = GDKC(constructor=torch.optim.Adam, lr=3e-4), continuous_arch_spec: GDKC = GDKC(constructor=ActorCriticMLP), discrete_arch_spec: GDKC = GDKC( constructor=CategoricalActorCriticMLP), gradient_norm_clipping: TogglableLowHigh = TogglableLowHigh( True, 0, 0.5), **kwargs) -> None: """ :param discount_factor: :param gae_lambda: :param actor_lr: :param critic_lr: :param entropy_reg_coef: :param value_reg_coef: :param num_inner_updates: :param copy_percentage: :param update_target_interval: :param max_grad_norm: :param solved_threshold: :param test_interval: :param early_stop: :param rollouts: :param surrogate_clipping_value: :param state_type: :param value_type: :param action_type: :param exploration_epsilon_start: :param exploration_epsilon_end: :param exploration_epsilon_decay: :param kwargs: """ super().__init__(gradient_norm_clipping=gradient_norm_clipping, **kwargs) assert 0 <= discount_factor <= 1.0 assert 0 <= gae_lambda <= 1.0 self._copy_percentage = copy_percentage self._memory_buffer = memory_buffer self._optimiser_spec: GDKC = optimiser_spec self._continuous_arch_spec = continuous_arch_spec self._discrete_arch_spec = discrete_arch_spec self._discount_factor = discount_factor self._gae_lambda = gae_lambda self._target_kl = target_kl self._mini_batch_size = mini_batch_size self._entropy_reg_coefficient = entropy_reg_coef self._value_reg_coefficient = value_reg_coef self._num_inner_updates = num_inner_updates self._update_target_interval = update_target_interval self._critic_criterion = critic_criterion self._surrogate_clipping_value = surrogate_clipping_value self.inner_update_i = 0
def __call__( self, agent: Type[Agent], *, load_time: Any = str(int(time.time())), seed: int = 0, save_ending_model: bool = False, save_training_resume: bool = False, continue_training: bool = True, train_agent: bool = True, debug: bool = False, num_envs: int = cpu_count(), **kwargs, ): """ Start a session, builds Agent and starts/connect environment(s), and runs Procedure :param args: :param kwargs: :return: """ kwargs.update(num_envs=num_envs) kwargs.update(train_agent=train_agent) kwargs.update(debug=debug) kwargs.update(environment=self._environment) with ContextWrapper(torchsnooper.snoop, debug): with ContextWrapper(torch.autograd.detect_anomaly, debug): if agent is None: raise NoAgent if inspect.isclass(agent): sprint("Instantiating Agent", color="crimson", bold=True, italic=True) torch_seed(seed) self._environment.seed(seed) agent = agent(load_time=load_time, seed=seed, **kwargs) agent_class_name = agent.__class__.__name__ total_shape = "_".join([ str(i) for i in (self._environment.observation_space.shape + self._environment.action_space.shape + self._environment.signal_space.shape) ]) environment_name = f"{self._environment.environment_name}_{total_shape}" save_directory = (PROJECT_APP_PATH.user_data / environment_name / agent_class_name) log_directory = (PROJECT_APP_PATH.user_log / environment_name / agent_class_name / load_time) if self._environment.action_space.is_discrete: rollout_drawer = GDKC(DiscreteScrollPlot, num_actions=self._environment. action_space.discrete_steps, default_delta=None) else: rollout_drawer = GDKC(SeriesScrollPlot, window_length=100, default_delta=None) if train_agent: # TODO: allow metric writing while not training with flag metric_writer = GDKC(TensorBoardPytorchWriter, path=log_directory) else: metric_writer = GDKC(MockWriter) with ContextWrapper(metric_writer, train_agent) as metric_writer: with ContextWrapper(rollout_drawer, num_envs == 1) as rollout_drawer: agent.build( self._environment.observation_space, self._environment.action_space, self._environment.signal_space, metric_writer=metric_writer, ) kwargs.update( environment_name=( self._environment.environment_name, ), save_directory=save_directory, log_directory=log_directory, load_time=load_time, seed=seed, train_agent=train_agent, ) found = False if continue_training: sprint( "Searching for previously trained models for initialisation for this configuration " "(Architecture, Action Space, Observation Space, ...)", color="crimson", bold=True, italic=True, ) found = agent.load(save_directory=save_directory, evaluation=not train_agent) if not found: sprint( "Did not find any previously trained models for this configuration", color="crimson", bold=True, italic=True, ) if not train_agent: agent.eval() else: agent.train() if not found: sprint( "Training from new initialisation", color="crimson", bold=True, italic=True, ) session_proc = self._procedure(agent, **kwargs) with CaptureEarlyStop( callbacks=self._procedure.stop_procedure, **kwargs): with StopWatch() as timer: with suppress(KeyboardInterrupt): training_resume = session_proc( metric_writer=metric_writer, rollout_drawer=rollout_drawer, **kwargs) if training_resume and "stats" in training_resume and save_training_resume: training_resume.stats.save(**kwargs) end_message = f"Training ended, time elapsed: {timer // 60:.0f}m {timer % 60:.0f}s" line_width = 9 sprint( f'\n{"-" * line_width} {end_message} {"-" * line_width}\n', color="crimson", bold=True, italic=True, ) if save_ending_model: agent.save(**kwargs) try: self._environment.close() except BrokenPipeError: pass exit(0)