def __init__(self,
                 env_kwargs_list,
                 rew_kwargs_list,
                 batch_size,
                 action_script,
                 action_scale,
                 to_learn,
                 episode_length_list,
                 env_schedule=None):
        """
        Args:
            env_kwargs_list (list[dict]): list of parameters for training 
                environment.
            reward_kwargs_list (list[dict]): list of parameters for reward 
                functions. Should correspond to 'env_kwargs_list'.
            batch_size (int): number of episodes collected in parallel.
            action_script (str): name of action script. Action wrapper will 
                select actions from this script if they are not learned.
            action_scale (dict, str:float): dictionary mapping action dimensions
                to scaling factors. Action wrapper will rescale actions produced
                by the agent's neural net policy by these factors.
            to_learn (dict, str:bool): dictionary mapping action dimensions to 
                bool flags. Specifies if the action should be learned or scripted.
            episode_length_list (list[callable: int -> int]): list of schedule 
                functions for episode durations. Schedule functions take as 
                argument int epoch number and return int episode duration for 
                this epoch. The list should correspond to 'env_kwargs_list'.
            env_schedule (callable): function mapping epoch number to index
                of the environment from the list to use during this epoch
        """
        self.env_list, self.driver_list = [], []
        self.episode_length_list = episode_length_list
        for env_kwargs, rew_kwargs in zip(env_kwargs_list, rew_kwargs_list):
            # Create training env and wrap it
            env = gkp_init(batch_size=batch_size,
                           reward_kwargs=rew_kwargs,
                           **env_kwargs)
            action_script_m = action_scripts.__getattribute__(action_script)
            env = wrappers.ActionWrapper(env, action_script_m, action_scale,
                                         to_learn)

            # create dummy placeholder policy to initialize driver
            dummy_policy = PolicyPlaceholder(env.time_step_spec(),
                                             env.action_spec())

            # create driver for this environment
            driver = dynamic_episode_driver.DynamicEpisodeDriver(
                env, dummy_policy, num_episodes=batch_size)

            self.env_list.append(env)
            self.driver_list.append(driver)

        if env_schedule is None:
            # regularly switch between environments
            self.env_schedule = lambda epoch: epoch % len(self.env_list)
        else:
            self.env_schedule = env_schedule
    def __init__(self,
                 env_kwargs,
                 reward_kwargs,
                 batch_size,
                 action_script,
                 action_scale,
                 to_learn,
                 episode_length,
                 learn_residuals=False):
        """
        Args:
            env_kwargs (dict): optional parameters for training environment.
            reward_kwargs (dict): optional parameters for reward function.
            batch_size (int): number of episodes collected in parallel.
            action_script (str): name of action script. Action wrapper will 
                select actions from this script if they are not learned.
            action_scale (dict, str:float): dictionary mapping action dimensions
                to scaling factors. Action wrapper will rescale actions produced
                by the agent's neural net policy by these factors.
            to_learn (dict, str:bool): dictionary mapping action dimensions to 
                bool flags. Specifies if the action should be learned or scripted.
            episode_length (callable: int -> int): function that defines the 
                schedule for training episode durations. Takes as argument int 
                epoch number and returns int episode duration for this epoch.
            learn_residuals (bool): flag to learn residual over the scripted
                protocol. If False, will learn actions from scratch. If True,
                will learn a residual to be added to scripted protocol.        
        """
        self.episode_length = episode_length
        # Create training env and wrap it
        env = gkp_init(batch_size=batch_size,
                       reward_kwargs=reward_kwargs,
                       **env_kwargs)
        action_script = action_scripts.__getattribute__(action_script)
        env = wrappers.ActionWrapper(env,
                                     action_script,
                                     action_scale,
                                     to_learn,
                                     learn_residuals=learn_residuals)

        # create dummy placeholder policy to initialize parent class
        dummy_policy = PolicyPlaceholder(env.time_step_spec(),
                                         env.action_spec())

        super().__init__(env, dummy_policy, num_episodes=batch_size)
예제 #3
0
import gkp.action_script as action_scripts
from gkp.gkp_tf_env import tf_env_wrappers as wrappers

env_kwargs = dict(simulate='snap_and_displacement_miscalibrated', init='vac',
                  H=1, T=5, attn_step=1, N=50, batch_size=1000, episode_length=5)

target_state = qt.tensor(qt.basis(2,0), qt.basis(50,3))
reward_kwargs = {'reward_mode' : 'overlap',
                  'target_state' : target_state,
                  'postselect_0' : False}

action_script = 'snap_and_displacements'
action_scale = {'alpha':4, 'theta':pi}
to_learn = {'alpha':True, 'theta':True}

action_script = action_scripts.__getattribute__(action_script)

protocol = 'ideal'
max_epochs = 3000
gate_times = [0.4e-6, 3.4e-6]
seeds = ['seed2']
rewards = {t:{} for t in gate_times}
norms = {t:{} for t in gate_times}

for t in gate_times:
    env = gkp_init(**env_kwargs, reward_kwargs=reward_kwargs)
    env = wrappers.ActionWrapper(env, action_script, action_scale, to_learn)
    env._env.SNAP_miscalibrated.T = t
    env._env.bit_string = None # '00000'
    # collect episodes with different policies
    for sim_name in seeds: #os.listdir(root_dir[protocol]):