Esempio n. 1
0
    def __init__(
        self,
        env_spec,
        num_candidate_policies: int = 20,
        policy_arch: GDKC = GDKC(CategoricalMLPPolicy, hidden_sizes=(32, 32)),
        baseline_arch: GDKC = GDKC(
            LinearFeatureBaseline
        ),  # Baseline for GAE (Generalized Advantage Estimation).
        discount_factor: float = 0.99,
        max_rollout_length: int = 500,
        parameters_variance: float = 1.0,
    ):
        self.policy: Module = policy_arch(env_spec=env_spec)
        self.max_path_length = max_rollout_length  # TODO: REMOVE THIS..
        self.sampler_cls = RaySampler

        self._baseline = baseline_arch()
        self._max_rollout_length = max_rollout_length

        self._env_spec = env_spec
        self._discount = discount_factor
        self._parameters_variance = parameters_variance
        self._num_candidate_policies = num_candidate_policies

        self._evolution_strategy: CMAEvolutionStrategy = None
        self._shared_params = None

        self._all_returns = None
Esempio n. 2
0
  def __init__(
      self,
      *,
      copy_percentage: float = 1e-2,
      batch_size: int = 100,
      discount_factor: float = 0.999,
      target_update_interval: int = 1,
      num_inner_updates: int = 20,
      sac_alpha: float = 1e-2,
      memory_buffer: Memory = TransitionPointBuffer(1000000),
      auto_tune_sac_alpha: bool = False,
      auto_tune_sac_alpha_optimiser_spec: GDKC = GDKC(
          constructor=torch.optim.Adam, lr=3e-4
          ),
      actor_optimiser_spec: GDKC = GDKC(constructor=torch.optim.Adam, lr=3e-4),
      critic_optimiser_spec: GDKC = GDKC(constructor=torch.optim.Adam, lr=3e-4),
      actor_arch_spec: GDKC = GDKC(
          ShallowStdNormalMLP, mean_head_activation=torch.tanh
          ),
      critic_arch_spec: GDKC = GDKC(PreConcatInputMLP),
      critic_criterion: callable = mse_loss,
      **kwargs
      ):
    """

:param copy_percentage:
:param signal_clipping:
:param action_clipping:
:param memory_buffer:
:param actor_optimiser_spec:
:param critic_optimiser_spec:
:param actor_arch_spec:
:param critic_arch_spec:
:param random_process_spec:
:param kwargs:
"""
    super().__init__(**kwargs)

    assert 0 <= discount_factor <= 1.0
    assert 0 <= copy_percentage <= 1.0

    self._batch_size = batch_size
    self._discount_factor = discount_factor
    self._target_update_interval = target_update_interval
    self._sac_alpha = sac_alpha
    self._copy_percentage = copy_percentage
    self._memory_buffer = memory_buffer
    self._actor_optimiser_spec: GDKC = actor_optimiser_spec
    self._critic_optimiser_spec: GDKC = critic_optimiser_spec
    self._actor_arch_spec = actor_arch_spec
    self._critic_arch_spec = critic_arch_spec

    self._num_inner_updates = num_inner_updates
    self._critic_criterion = critic_criterion

    self._auto_tune_sac_alpha = auto_tune_sac_alpha
    self._auto_tune_sac_alpha_optimiser_spec = auto_tune_sac_alpha_optimiser_spec
    self.inner_update_i = 0
Esempio n. 3
0
    def __init__(
            self,
            random_process_spec: GDKC = GDKC(
                constructor=OrnsteinUhlenbeckProcess),
            memory_buffer: Memory = TransitionPointBuffer(),
            evaluation_function: callable = F.mse_loss,
            actor_arch_spec: GDKC = GDKC(MLP,
                                         output_activation=torch.nn.Tanh()),
            critic_arch_spec: GDKC = GDKC(LateConcatInputMLP),
            discount_factor: float = 0.95,
            update_target_interval: int = 1,
            batch_size: int = 128,
            noise_factor: float = 1e-1,
            copy_percentage: float = 0.005,
            actor_optimiser_spec: GDKC = GDKC(constructor=torch.optim.Adam,
                                              lr=3e-4),
            critic_optimiser_spec: GDKC = GDKC(constructor=torch.optim.Adam,
                                               lr=3e-4),
            **kwargs):
        """

@param random_process_spec:
@param memory_buffer:
@param evaluation_function:
@param actor_arch_spec:
@param critic_arch_spec:
@param discount_factor:
@param update_target_interval:
@param batch_size:
@param noise_factor:
@param copy_percentage:
@param actor_optimiser_spec:
@param critic_optimiser_spec:
@param kwargs:
"""
        super().__init__(**kwargs)

        assert 0 <= discount_factor <= 1.0
        assert 0 <= copy_percentage <= 1.0

        self._copy_percentage = copy_percentage
        self._actor_optimiser_spec = actor_optimiser_spec
        self._critic_optimiser_spec = critic_optimiser_spec
        self._actor_arch_spec = actor_arch_spec
        self._critic_arch_spec = critic_arch_spec
        self._random_process_spec = random_process_spec

        self._memory_buffer = memory_buffer
        self._critic_criteria = evaluation_function
        self._discount_factor = discount_factor
        self._update_target_interval = update_target_interval

        self._batch_size = batch_size
        self._noise_factor = noise_factor
Esempio n. 4
0
    def __build__(
        self,
        observation_space: ObservationSpace,
        action_space: ActionSpace,
        signal_space: SignalSpace,
        metric_writer: Writer = MockWriter(),
        print_model_repr: bool = True,
        *,
        distributional_regressor: Module = None,
        optimiser: Optimizer = None,
    ) -> None:
        """

@param observation_space:
@param action_space:
@param signal_space:
@param metric_writer:
@param print_model_repr:
@param distributional_regressor:
@param optimiser:
@return:
"""

        if distributional_regressor:
            self.distributional_regressor = distributional_regressor
        else:
            self._policy_arch_spec.kwargs["input_shape"] = self._input_shape
            if action_space.is_discrete:
                self._policy_arch_spec = GDKC(
                    constructor=CategoricalMLP,
                    kwargs=self._policy_arch_spec.kwargs)
            else:
                self._policy_arch_spec = GDKC(
                    constructor=MultiDimensionalNormalMLP,
                    kwargs=self._policy_arch_spec.kwargs,
                )

            self._policy_arch_spec.kwargs["output_shape"] = self._output_shape

            self.distributional_regressor: Module = self._policy_arch_spec(
            ).to(self._device)

        if optimiser:
            self._optimiser = optimiser
        else:
            self._optimiser = self._optimiser_spec(
                self.distributional_regressor.parameters())

        if self._scheduler_spec:
            self._scheduler = self._scheduler_spec(self._optimiser)
        else:
            self._scheduler = None
Esempio n. 5
0
 def b():  # DOES NOT WORK!
     """ """
     print("start2")
     with CaptureEarlyStop(GDKC(exit, code=0)) as _:
         while True:
             sleep(0.1)
     print("done2")
Esempio n. 6
0
    def __init__(
            self,
            evaluation_function: callable = torch.nn.CrossEntropyLoss(),
            policy_arch_spec: GDKC = GDKC(constructor=CategoricalMLP),
            discount_factor: float = 0.95,
            optimiser_spec: GDKC = GDKC(constructor=torch.optim.Adam, lr=3e-4),
            scheduler_spec: GDKC = GDKC(
                constructor=torch.optim.lr_scheduler.StepLR,
                step_size=100,
                gamma=0.65),
            memory_buffer: Memory = SampleTrajectoryBuffer(),
            **kwargs,
    ) -> None:
        r"""
:param evaluation_function:
:param trajectory_trace:
:param policy_arch_spec:
:param discount_factor:
:param optimiser_spec:
:param state_type:
:param kwargs:
"""
        super().__init__(**kwargs)

        assert 0 <= discount_factor <= 1.0

        self._memory_buffer = memory_buffer

        self._evaluation_function = evaluation_function
        self._policy_arch_spec = policy_arch_spec
        self._discount_factor = discount_factor

        self._optimiser_spec = optimiser_spec
        self._scheduler_spec = scheduler_spec

        self._mask_terminated_signals = False
Esempio n. 7
0
from neodroidagent.common import ParallelSession
from neodroidagent.common.architectures.distributional.categorical import CategoricalMLP
from neodroidagent.configs.test_reference.base_dicrete_test_config import *

__author__ = "Christian Heider Nielsen"

from neodroidagent.entry_points.session_factory import session_factory
from warg import GDKC

CONFIG_NAME = __name__

CONFIG_FILE_PATH = pathlib.Path(__file__)

RENDER_ENVIRONMENT = True

OPTIMISER_SPEC = GDKC(torch.optim.Adam, lr=3e-4)
SCHEDULER_SPEC = None

POLICY_ARCH_SPEC = GDKC(constructor=CategoricalMLP, hidden_layers=128)

# RENDER_FREQUENCY = 1

pg_config = globals()


def pg_run(skip_confirmation: bool = True,
           environment_type: Union[bool, str] = True,
           *,
           config=None,
           **kwargs) -> None:
    if config is None:
Esempio n. 8
0
from typing import Tuple

from draugr.scipy_utilities import mag_decimation_subsample

__all__ = [
    "annotate_lines",
    "default_index_decimator",
]

from warg import GDKC, passes_kws_to

from matplotlib.pyplot import Axes

default_index_decimator = GDKC(
    mag_decimation_subsample, decimation_factor=5,
    return_indices=True)  # finds interesting features?


@passes_kws_to(Axes.annotate)
def annotate_lines(
    ax_: Axes,
    num_lines: int = 1,  # None for all
    index_decimator: callable = default_index_decimator,
    color: str = "k",  # None for auto color
    xycoords: Tuple[str, str] = (
        "data",
        # 'axes fraction',
        "data",
    ),  # TODO: NOT DONE! Where to place annotation, use 'axes fraction' for along axes'
    ha: str = "left",
Esempio n. 9
0
    def __init__(
        self,
        value_arch_spec: Architecture = GDKC(DuelingQMLP),
        exploration_spec: ExplorationSpecification = ExplorationSpecification(
            start=0.95, end=0.05, decay=3000),
        memory_buffer: Memory = TransitionPointPrioritisedBuffer(int(1e5)),
        batch_size: int = 256,
        discount_factor: float = 0.95,
        double_dqn: bool = True,
        use_per: bool = True,
        loss_function: callable = smooth_l1_loss,
        optimiser_spec: GDKC = GDKC(torch.optim.Adam, lr=3e-4),
        scheduler_spec: GDKC = None,
        sync_target_model_frequency: int = 1,
        initial_observation_period: int = 1000,
        learning_frequency: int = 1,
        copy_percentage: float = 1e-2,
        **kwargs,
    ):
        """
@param value_arch_spec:
@param exploration_spec:
@param memory_buffer:
@param batch_size:
@param discount_factor:
@param double_dqn: https://arxiv.org/abs/1509.06461
@param use_per:  https://arxiv.org/abs/1511.05952
@param loss_function:  default is huber loss
@param optimiser_spec:
@param scheduler_spec:
@param sync_target_model_frequency:
@param initial_observation_period:
@param learning_frequency:
@param copy_percentage:
@param kwargs:
"""
        super().__init__(**kwargs)

        self._exploration_spec = exploration_spec
        assert 0 <= self._exploration_spec.end <= self._exploration_spec.start
        assert 0 < self._exploration_spec.decay

        self._memory_buffer = memory_buffer
        assert self._memory_buffer.capacity > batch_size

        self._value_arch_spec: Architecture = value_arch_spec
        self._optimiser_spec = optimiser_spec
        self._scheduler_spec = scheduler_spec

        self._batch_size = batch_size
        assert batch_size > 0

        self._discount_factor = discount_factor
        assert 0 <= discount_factor <= 1.0

        self._double_dqn = double_dqn
        self._use_per = use_per and double_dqn
        self._loss_function = loss_function

        self._learning_frequency = learning_frequency
        self._sync_target_model_frequency = sync_target_model_frequency

        self._initial_observation_period = initial_observation_period
        assert initial_observation_period >= 0

        self._copy_percentage = copy_percentage
        assert 0 <= copy_percentage <= 1.0

        self._state_type = torch.float
        self._value_type = torch.float
        self._action_type = torch.long
Esempio n. 10
0
    def main():
        dataset_root = Path.home() / "Data"
        base_path = ensure_existence(PROJECT_APP_PATH.user_data / 'maskrcnn')
        log_path = ensure_existence(PROJECT_APP_PATH.user_log / 'maskrcnn')
        export_root = ensure_existence(base_path / 'models')
        model_name = f'maskrcnn_pennfudanped'

        batch_size = 4
        num_epochs = 10
        optimiser_spec = GDKC(torch.optim.Adam, lr=3e-4)
        scheduler_spec = GDKC(
            torch.optim.lr_scheduler.
            StepLR,  # a learning rate scheduler which decreases the learning rate by
            step_size=3,  # 10x every 3 epochs
            gamma=0.1,
        )
        num_workers = os.cpu_count()
        torch_seed(3825)

        dataset = PennFudanDataset(dataset_root / "PennFudanPed",
                                   Split.Training,
                                   return_variant=ReturnVariant.all)
        dataset_validation = PennFudanDataset(
            dataset_root / "PennFudanPed",
            Split.Validation,
            return_variant=ReturnVariant.all,
        )
        split = SplitIndexer(len(dataset), validation=0.3, testing=0)

        split_indices = torch.randperm(split.total_num).tolist()

        data_loader = DataLoader(
            Subset(dataset, split_indices[:-split.validation_num]),
            batch_size=batch_size,
            shuffle=True,
            num_workers=num_workers,
            collate_fn=collate_batch_fn,
        )

        data_loader_val = DataLoader(
            Subset(dataset_validation, split_indices[-split.validation_num:]),
            batch_size=1,
            shuffle=False,
            num_workers=num_workers,
            collate_fn=collate_batch_fn,
        )

        model = get_pretrained_instance_segmentation_maskrcnn(
            dataset.response_channels)
        optimiser = optimiser_spec(trainable_parameters(model))
        lr_scheduler = scheduler_spec(optimiser)

        if True:
            model = load_model(model_name=model_name,
                               model_directory=export_root)

        if True:
            with TorchTrainSession(model):
                with TensorBoardPytorchWriter(log_path / model_name) as writer:
                    for epoch_i in tqdm(range(num_epochs), desc="Epoch #"):
                        maskrcnn_train_single_epoch(model=model,
                                                    optimiser=optimiser,
                                                    data_loader=data_loader,
                                                    writer=writer)
                        lr_scheduler.step()  # update the learning rate
                        maskrcnn_evaluate(
                            model, data_loader_val, writer=writer
                        )  # evaluate on the validation dataset
                        save_model(model,
                                   model_name=model_name,
                                   save_directory=export_root)

        if True:
            with TorchEvalSession(model):  # put the model in evaluation mode
                img, _ = dataset_validation[
                    0]  # pick one image from the test set

                with torch.no_grad():
                    prediction = model([img.to(global_torch_device())])

                from matplotlib import pyplot
                pyplot.imshow(
                    Image.fromarray(
                        img.mul(255).permute(1, 2, 0).byte().numpy()))
                pyplot.show()

                import cv2

                pyplot.imshow(
                    Image.fromarray(prediction[0]["masks"][0, 0].mul(
                        255).byte().cpu().numpy()))
                pyplot.show()

                (boxes, labels, scores) = (
                    prediction[0]["boxes"].to('cpu').numpy(),
                    prediction[0]["labels"].to('cpu').numpy(),
                    torch.sigmoid(prediction[0]["scores"]).to('cpu').numpy(),
                )

                from draugr.opencv_utilities import draw_bounding_boxes
                from draugr.torch_utilities.images.conversion import quick_to_pil_image

                indices = scores > 0.1

                cv2.namedWindow(model_name, cv2.WINDOW_NORMAL)
                cv2.imshow(
                    model_name,
                    draw_bounding_boxes(
                        quick_to_pil_image(img),
                        boxes[indices],
                        labels=labels[indices],
                        scores=scores[indices],
                        #categories=categories,
                    ))

                cv2.waitKey()
Esempio n. 11
0
    def __init__(self,
                 discount_factor: float = 0.95,
                 gae_lambda: float = 0.95,
                 entropy_reg_coef: float = 0,
                 value_reg_coef: float = 5e-1,
                 num_inner_updates: int = 10,
                 mini_batch_size: int = 64,
                 update_target_interval: int = 1,
                 surrogate_clipping_value: float = 2e-1,
                 copy_percentage: float = 1.0,
                 target_kl: float = 1e-2,
                 memory_buffer: Any = TransitionPointTrajectoryBuffer(),
                 critic_criterion: callable = mse_loss,
                 optimiser_spec: GDKC = GDKC(constructor=torch.optim.Adam,
                                             lr=3e-4),
                 continuous_arch_spec: GDKC = GDKC(constructor=ActorCriticMLP),
                 discrete_arch_spec: GDKC = GDKC(
                     constructor=CategoricalActorCriticMLP),
                 gradient_norm_clipping: TogglableLowHigh = TogglableLowHigh(
                     True, 0, 0.5),
                 **kwargs) -> None:
        """

:param discount_factor:
:param gae_lambda:
:param actor_lr:
:param critic_lr:
:param entropy_reg_coef:
:param value_reg_coef:
:param num_inner_updates:
:param copy_percentage:
:param update_target_interval:
:param max_grad_norm:
:param solved_threshold:
:param test_interval:
:param early_stop:
:param rollouts:
:param surrogate_clipping_value:
:param state_type:
:param value_type:
:param action_type:
:param exploration_epsilon_start:
:param exploration_epsilon_end:
:param exploration_epsilon_decay:
:param kwargs:
"""
        super().__init__(gradient_norm_clipping=gradient_norm_clipping,
                         **kwargs)

        assert 0 <= discount_factor <= 1.0
        assert 0 <= gae_lambda <= 1.0

        self._copy_percentage = copy_percentage
        self._memory_buffer = memory_buffer
        self._optimiser_spec: GDKC = optimiser_spec
        self._continuous_arch_spec = continuous_arch_spec
        self._discrete_arch_spec = discrete_arch_spec

        self._discount_factor = discount_factor
        self._gae_lambda = gae_lambda
        self._target_kl = target_kl

        self._mini_batch_size = mini_batch_size
        self._entropy_reg_coefficient = entropy_reg_coef
        self._value_reg_coefficient = value_reg_coef
        self._num_inner_updates = num_inner_updates
        self._update_target_interval = update_target_interval
        self._critic_criterion = critic_criterion
        self._surrogate_clipping_value = surrogate_clipping_value
        self.inner_update_i = 0
Esempio n. 12
0
    def __call__(
            self,
            agent: Type[Agent],
            *,
            load_time: Any = str(int(time.time())),
            seed: int = 0,
            save_ending_model: bool = False,
            save_training_resume: bool = False,
            continue_training: bool = True,
            train_agent: bool = True,
            debug: bool = False,
            num_envs: int = cpu_count(),
            **kwargs,
    ):
        """
Start a session, builds Agent and starts/connect environment(s), and runs Procedure


:param args:
:param kwargs:
:return:
"""
        kwargs.update(num_envs=num_envs)
        kwargs.update(train_agent=train_agent)
        kwargs.update(debug=debug)
        kwargs.update(environment=self._environment)

        with ContextWrapper(torchsnooper.snoop, debug):
            with ContextWrapper(torch.autograd.detect_anomaly, debug):

                if agent is None:
                    raise NoAgent

                if inspect.isclass(agent):
                    sprint("Instantiating Agent",
                           color="crimson",
                           bold=True,
                           italic=True)
                    torch_seed(seed)
                    self._environment.seed(seed)

                    agent = agent(load_time=load_time, seed=seed, **kwargs)

                agent_class_name = agent.__class__.__name__

                total_shape = "_".join([
                    str(i)
                    for i in (self._environment.observation_space.shape +
                              self._environment.action_space.shape +
                              self._environment.signal_space.shape)
                ])

                environment_name = f"{self._environment.environment_name}_{total_shape}"

                save_directory = (PROJECT_APP_PATH.user_data /
                                  environment_name / agent_class_name)
                log_directory = (PROJECT_APP_PATH.user_log / environment_name /
                                 agent_class_name / load_time)

                if self._environment.action_space.is_discrete:
                    rollout_drawer = GDKC(DiscreteScrollPlot,
                                          num_actions=self._environment.
                                          action_space.discrete_steps,
                                          default_delta=None)
                else:
                    rollout_drawer = GDKC(SeriesScrollPlot,
                                          window_length=100,
                                          default_delta=None)

                if train_agent:  # TODO: allow metric writing while not training with flag
                    metric_writer = GDKC(TensorBoardPytorchWriter,
                                         path=log_directory)
                else:
                    metric_writer = GDKC(MockWriter)

                with ContextWrapper(metric_writer,
                                    train_agent) as metric_writer:
                    with ContextWrapper(rollout_drawer,
                                        num_envs == 1) as rollout_drawer:

                        agent.build(
                            self._environment.observation_space,
                            self._environment.action_space,
                            self._environment.signal_space,
                            metric_writer=metric_writer,
                        )

                        kwargs.update(
                            environment_name=(
                                self._environment.environment_name, ),
                            save_directory=save_directory,
                            log_directory=log_directory,
                            load_time=load_time,
                            seed=seed,
                            train_agent=train_agent,
                        )

                        found = False
                        if continue_training:
                            sprint(
                                "Searching for previously trained models for initialisation for this configuration "
                                "(Architecture, Action Space, Observation Space, ...)",
                                color="crimson",
                                bold=True,
                                italic=True,
                            )
                            found = agent.load(save_directory=save_directory,
                                               evaluation=not train_agent)
                            if not found:
                                sprint(
                                    "Did not find any previously trained models for this configuration",
                                    color="crimson",
                                    bold=True,
                                    italic=True,
                                )

                        if not train_agent:
                            agent.eval()
                        else:
                            agent.train()

                        if not found:
                            sprint(
                                "Training from new initialisation",
                                color="crimson",
                                bold=True,
                                italic=True,
                            )

                        session_proc = self._procedure(agent, **kwargs)

                        with CaptureEarlyStop(
                                callbacks=self._procedure.stop_procedure,
                                **kwargs):
                            with StopWatch() as timer:
                                with suppress(KeyboardInterrupt):
                                    training_resume = session_proc(
                                        metric_writer=metric_writer,
                                        rollout_drawer=rollout_drawer,
                                        **kwargs)
                                    if training_resume and "stats" in training_resume and save_training_resume:
                                        training_resume.stats.save(**kwargs)

                        end_message = f"Training ended, time elapsed: {timer // 60:.0f}m {timer % 60:.0f}s"
                        line_width = 9
                        sprint(
                            f'\n{"-" * line_width} {end_message} {"-" * line_width}\n',
                            color="crimson",
                            bold=True,
                            italic=True,
                        )

                        if save_ending_model:
                            agent.save(**kwargs)

                        try:
                            self._environment.close()
                        except BrokenPipeError:
                            pass

                        exit(0)