from ray.rllib.agents.trainer_template import build_trainer
from ray.rllib.evaluation.postprocessing import Postprocessing
from ray.rllib.policy.sample_batch import SampleBatch
from ray.rllib.policy.torch_policy import EntropyCoeffSchedule, LearningRateSchedule
from ray.rllib.policy.torch_policy_template import build_torch_policy
from ray.rllib.utils import try_import_torch
from ray.rllib.utils.torch_ops import sequence_mask

from algorithms.data_augmentation.data_augmentation import apply_data_augmentation
from algorithms.data_augmenting_ppo_agent.ppo_utils import (compute_running_mean_and_variance,
                                                            RunningStat,
                                                            ExpWeightedMovingAverageStat)
from algorithms.data_augmenting_ppo_agent.ucb_learner import UCBLearner
from algorithms.data_augmenting_ppo_agent.sync_phasic_optimizer import SyncPhasicOptimizer

torch, nn = try_import_torch()
logger = logging.getLogger(__name__)


def compute_ppo_loss(policy, dist_class, model, train_batch, action_dist, state):
    mask = None
    if state:
        max_seq_len = torch.max(train_batch["seq_lens"])
        mask = sequence_mask(train_batch["seq_lens"], max_seq_len)
        mask = torch.reshape(mask, [-1])

    policy.loss_obj = PPOLoss(
        dist_class,
        model,
        train_batch[Postprocessing.VALUE_TARGETS],
        train_batch[Postprocessing.ADVANTAGES],
import logging
import os
from typing import Dict

import ray
from ray.rllib import BaseEnv
from ray.rllib.utils import try_import_torch

torch, _ = try_import_torch()

from ray.rllib.utils.typing import PolicyID
from ray import tune
from ray.rllib.agents.callbacks import DefaultCallbacks
from ray.rllib.evaluation import MultiAgentEpisode, RolloutWorker
from ray.rllib.policy import Policy
from grl.utils.strategy_spec import StrategySpec
from grl.rllib_tools.space_saving_logger import get_trainer_logger_creator
from grl.utils.common import find_free_port
from grl.utils.common import data_dir
from grl.envs.oshi_zumo_multi_agent_env import ThousandActionOshiZumoMultiAgentEnv
from grl.rllib_tools.policy_checkpoints import load_pure_strat
from grl.rl_apps.scenarios.catalog import scenario_catalog
from grl.rl_apps.scenarios.nfsp_scenario import NFSPScenario
from ray.rllib.agents.sac.sac import SACTrainer

logger = logging.getLogger(__name__)

if __name__ == "__main__":
    tmp_br_env = ThousandActionOshiZumoMultiAgentEnv(
        env_config={
            'version': "oshi_zumo",