def __init__(self,
                 env_kwargs_list,
                 rew_kwargs_list,
                 batch_size,
                 action_script,
                 action_scale,
                 to_learn,
                 episode_length_list,
                 env_schedule=None):
        """
        Args:
            env_kwargs_list (list[dict]): list of parameters for training 
                environment.
            reward_kwargs_list (list[dict]): list of parameters for reward 
                functions. Should correspond to 'env_kwargs_list'.
            batch_size (int): number of episodes collected in parallel.
            action_script (str): name of action script. Action wrapper will 
                select actions from this script if they are not learned.
            action_scale (dict, str:float): dictionary mapping action dimensions
                to scaling factors. Action wrapper will rescale actions produced
                by the agent's neural net policy by these factors.
            to_learn (dict, str:bool): dictionary mapping action dimensions to 
                bool flags. Specifies if the action should be learned or scripted.
            episode_length_list (list[callable: int -> int]): list of schedule 
                functions for episode durations. Schedule functions take as 
                argument int epoch number and return int episode duration for 
                this epoch. The list should correspond to 'env_kwargs_list'.
            env_schedule (callable): function mapping epoch number to index
                of the environment from the list to use during this epoch
        """
        self.env_list, self.driver_list = [], []
        self.episode_length_list = episode_length_list
        for env_kwargs, rew_kwargs in zip(env_kwargs_list, rew_kwargs_list):
            # Create training env and wrap it
            env = gkp_init(batch_size=batch_size,
                           reward_kwargs=rew_kwargs,
                           **env_kwargs)
            action_script_m = action_scripts.__getattribute__(action_script)
            env = wrappers.ActionWrapper(env, action_script_m, action_scale,
                                         to_learn)

            # create dummy placeholder policy to initialize driver
            dummy_policy = PolicyPlaceholder(env.time_step_spec(),
                                             env.action_spec())

            # create driver for this environment
            driver = dynamic_episode_driver.DynamicEpisodeDriver(
                env, dummy_policy, num_episodes=batch_size)

            self.env_list.append(env)
            self.driver_list.append(driver)

        if env_schedule is None:
            # regularly switch between environments
            self.env_schedule = lambda epoch: epoch % len(self.env_list)
        else:
            self.env_schedule = env_schedule
コード例 #2
0
    def __init__(self,
                 env_kwargs,
                 reward_kwargs,
                 batch_size,
                 action_script,
                 action_scale,
                 to_learn,
                 episode_length,
                 learn_residuals=False):
        """
        Args:
            env_kwargs (dict): optional parameters for training environment.
            reward_kwargs (dict): optional parameters for reward function.
            batch_size (int): number of episodes collected in parallel.
            action_script (str): name of action script. Action wrapper will 
                select actions from this script if they are not learned.
            action_scale (dict, str:float): dictionary mapping action dimensions
                to scaling factors. Action wrapper will rescale actions produced
                by the agent's neural net policy by these factors.
            to_learn (dict, str:bool): dictionary mapping action dimensions to 
                bool flags. Specifies if the action should be learned or scripted.
            episode_length (callable: int -> int): function that defines the 
                schedule for training episode durations. Takes as argument int 
                epoch number and returns int episode duration for this epoch.
            learn_residuals (bool): flag to learn residual over the scripted
                protocol. If False, will learn actions from scratch. If True,
                will learn a residual to be added to scripted protocol.        
        """
        self.episode_length = episode_length
        # Create training env and wrap it
        env = gkp_init(batch_size=batch_size,
                       reward_kwargs=reward_kwargs,
                       **env_kwargs)
        action_script = action_scripts.__getattribute__(action_script)
        env = wrappers.ActionWrapper(env,
                                     action_script,
                                     action_scale,
                                     to_learn,
                                     learn_residuals=learn_residuals)

        # create dummy placeholder policy to initialize parent class
        dummy_policy = PolicyPlaceholder(env.time_step_spec(),
                                         env.action_spec())

        super().__init__(env, dummy_policy, num_episodes=batch_size)
コード例 #3
0
               channel='quantum_jumps',
               reward_kwargs=reward_kwargs,
               init='vac',
               H=1,
               T=6,
               attn_step=1,
               batch_size=1,
               episode_length=6,
               encoding='square',
               **kwargs)

# from gkp.action_script import v2_phase_estimation_with_trim_4round as action_script
from gkp.action_script import Alec_universal_gate_set_6round as action_script
# to_learn = {'alpha':True, 'beta':True, 'phi':False, 'theta':False}
to_learn = {'beta': True, 'phi': True}
env = wrappers.ActionWrapper(env, action_script, to_learn)

root_dir = r'E:\VladGoogleDrive\Qulab\GKP\sims\PPO\CT_qubit_rot\fock3_beta3_B100_tomo100_lr1e-3_baseline_2'
policy_dir = r'policy\000076000'
policy = tf.compat.v2.saved_model.load(os.path.join(root_dir, policy_dir))

# from gkp.action_script import v2_phase_estimation_with_trim_4round as action_script
# policy = plc.ScriptedPolicy(env.time_step_spec(), action_script)

### Plot cardinal points
if 0:
    for state_name in env.states.keys():
        state = tf.reshape(env.states[state_name], [1, env.N])
        hf.plot_wigner_tf_wrapper(state, title=state_name)

### Simulate one episode