Exemple #1
0
    def __init__(self,
                 env_kwargs,
                 reward_kwargs,
                 batch_size,
                 action_script,
                 action_scale,
                 to_learn,
                 episode_length,
                 learn_residuals=False,
                 remote=False):
        """
        Args:
            env_kwargs (dict): optional parameters for training environment.
            reward_kwargs (dict): optional parameters for reward function.
            batch_size (int): number of episodes collected in parallel.
            action_script (str): name of action script. Action wrapper will 
                select actions from this script if they are not learned.
            action_scale (dict, str:float): dictionary mapping action dimensions
                to scaling factors. Action wrapper will rescale actions produced
                by the agent's neural net policy by these factors.
            to_learn (dict, str:bool): dictionary mapping action dimensions to 
                bool flags. Specifies if the action should be learned or scripted.
            episode_length (callable: int -> int): function that defines the 
                schedule for training episode durations. Takes as argument int 
                epoch number and returns int episode duration for this epoch.
            learn_residuals (bool): flag to learn residual over the scripted
                protocol. If False, will learn actions from scratch. If True,
                will learn a residual to be added to scripted protocol.
            remote (bool): flag for remote environment to close the connection
                to a client upon finishing the training.
        """
        self.episode_length = episode_length
        self.remote = remote
        # Create training env and wrap it
        env = env_init(batch_size=batch_size,
                       reward_kwargs=reward_kwargs,
                       **env_kwargs)
        module_name = 'rl_tools.action_script.' + action_script
        action_script = importlib.import_module(module_name)
        env = wrappers.ActionWrapper(env,
                                     action_script,
                                     action_scale,
                                     to_learn,
                                     learn_residuals=learn_residuals)

        # create dummy placeholder policy to initialize parent class
        dummy_policy = PolicyPlaceholder(env.time_step_spec(),
                                         env.action_spec())

        super().__init__(env, dummy_policy, num_episodes=batch_size)
Exemple #2
0
 def __init__(self, env_kwargs_list, rew_kwargs_list, batch_size,
              action_script, action_scale, to_learn, episode_length_list,
              env_schedule=None):
     """
     Args:
         env_kwargs_list (list[dict]): list of parameters for training 
             environment.
         reward_kwargs_list (list[dict]): list of parameters for reward 
             functions. Should correspond to 'env_kwargs_list'.
         batch_size (int): number of episodes collected in parallel.
         action_script (str): name of action script. Action wrapper will 
             select actions from this script if they are not learned.
         action_scale (dict, str:float): dictionary mapping action dimensions
             to scaling factors. Action wrapper will rescale actions produced
             by the agent's neural net policy by these factors.
         to_learn (dict, str:bool): dictionary mapping action dimensions to 
             bool flags. Specifies if the action should be learned or scripted.
         episode_length_list (list[callable: int -> int]): list of schedule 
             functions for episode durations. Schedule functions take as 
             argument int epoch number and return int episode duration for 
             this epoch. The list should correspond to 'env_kwargs_list'.
         env_schedule (callable): function mapping epoch number to index
             of the environment from the list to use during this epoch
     """
     self.env_list, self.driver_list = [], []
     self.episode_length_list = episode_length_list
     for env_kwargs, rew_kwargs in zip(env_kwargs_list, rew_kwargs_list):
         # Create training env and wrap it
         env = env_init(batch_size=batch_size, reward_kwargs=rew_kwargs,
                        **env_kwargs)
         action_script_m = action_scripts.__getattribute__(action_script)
         env = wrappers.ActionWrapper(env, action_script_m, action_scale, 
                                      to_learn)
 
         # create dummy placeholder policy to initialize driver
         dummy_policy = PolicyPlaceholder(
             env.time_step_spec(), env.action_spec())
         
         # create driver for this environment
         driver = dynamic_episode_driver.DynamicEpisodeDriver(
             env, dummy_policy, num_episodes=batch_size)
         
         self.env_list.append(env)
         self.driver_list.append(driver)
     
     if env_schedule is None:
         # regularly switch between environments
         self.env_schedule = lambda epoch: epoch % len(self.env_list)
     else:
         self.env_schedule = env_schedule
Exemple #3
0
# Define Kerr sweep range and Kerr-dependent parameters
Kerr = np.linspace(1, 51, 11)
t_gate = 1.2e-6 / np.sqrt(Kerr)  # assume gate time can scale as 1/chi
rotation_angle = 2 * np.pi * Kerr * (1.2e-6 + t_gate) * 20  # simple heuristic

states = ['X+', 'Y+', 'Z+']
lifetimes = {state: np.zeros(len(Kerr)) for state in states}

savepath = r'E:\VladGoogleDrive\Qulab\GKP\sims\Kerr\hexagonal_sweep\no_rotation_perfect_qubit'

# Initialize environment and policy
env = env_init(control_circuit='oscillator',
               encoding='hexagonal',
               init='X+',
               H=1,
               batch_size=2000,
               episode_length=200,
               reward_mode='fidelity',
               quantum_circuit_type='v2')

from rl_tools.action_script import hexagonal_phase_estimation_symmetric_6round as action_script

policy = plc.ScriptedPolicy(env.time_step_spec(), action_script)

for k in range(len(Kerr)):

    env = env_init(control_circuit='oscillator',
                   encoding='hexagonal',
                   init='X+',
                   H=1,
                   batch_size=2000,
Exemple #4
0
    reward_kwargs = {'reward_mode': 'zero'}

    # Params for environment
    env_kwargs = {
        'control_circuit': 'snap_and_displacement',
        'init': 'vac',
        'T': min_T,
        'N': 100
    }

    # Params for action wrapper
    action_script = 'snap_and_displacements'
    action_scale = {'alpha': 4, 'theta': pi}
    to_learn = {'alpha': True, 'theta': True}

    env = env_init(batch_size=1, **env_kwargs, episode_length=env_kwargs['T'])

    action_script_obj = importlib.import_module('rl_tools.action_script.' +
                                                action_script)
    env = wrappers.ActionWrapper(env, action_script_obj, action_scale,
                                 to_learn)

    action_names = list(to_learn.keys())
    all_actions = {a: [] for a in action_names}

    time_step = env.reset()
    policy_state = policy.get_initial_state(env.batch_size)
    max_alpha, max_n = 0, 0
    while not time_step.is_last():
        action_step = policy.action(time_step, policy_state)
        policy_state = action_step.state
        self.beta = [b_amp + 0j, 1j * b_amp] * 2 + [eps + 0j, 1j * eps]

        self.alpha = [a_amp + 0j
                      ] + [-1j * delta, delta + 0j] * 2 + [-1j * a_amp]

        self.phi = [pi / 2] * 6


#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------

env = env_init(control_circuit='oscillator',
               init='Z+',
               H=1,
               batch_size=800,
               episode_length=60,
               reward_mode='fidelity',
               quantum_circuit_type='v2',
               encoding='square')

savepath = r'E:\VladGoogleDrive\Qulab\GKP\sims\osc_sims\test'
feedback_amps = np.linspace(0.15, 0.24, 10, dtype=complex)
trim_amps = np.linspace(0.15, 0.24, 10, dtype=complex)
states = ['Z+']
make_figure = False

#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------

lifetimes = np.zeros((len(feedback_amps), len(trim_amps)))
        data_dir = os.path.join(train_dir, 'K' + str(K), 'policy')
        policy_dir = os.path.join(data_dir, '000080000')
        policy = tf.compat.v2.saved_model.load(policy_dir)
        
        # Additional simulation parameters
        kwargs = {'K_osc' : K, 't_gate' : 1.2e-6/np.sqrt(np.sqrt(K)), 'T1_osc' : 250e-6}
        if 'perfect' in names[i]:
            kwargs['control_circuit'] = 'oscillator'
        else:
            kwargs['control_circuit'] = 'oscillator_qubit'
            T1_qb = int(names[i][:names[i].find('us')])
            kwargs['T1_qb'] = T1_qb*1e-6
        
        # Initialize environment
        env = env_init(init='X+', H=1, T=6, attn_step=1, batch_size=3000, 
                   episode_length=200, reward_mode = 'fidelity',
                   quantum_circuit_type='v2', encoding='hexagonal', **kwargs)
        env = wrappers.ActionWrapper(env, action_script, to_learn)

        # Fit logical lifetime
        fit_params = hf.fit_logical_lifetime(env, policy, plot=False, reps=1, 
                                             states=['X+'], save_dir=data_dir)
        T1[names[i]].append(fit_params['X+'][1]*1e6) # convert to us

# Plot things
fig, ax = plt.subplots(1,1, figsize=(7,4))
ax.set_title(r'Hexagonal code, $t_{gate}\propto 1\,/\,\sqrt[4]{Kerr}$')
ax.set_ylabel(r'Logical  lifetime ($\,\mu s\, $)')
ax.set_xlabel('Kerr (Hz)')
for i in range(len(names)):
    color = palette(i//2)
# Evaluation environment params
eval_env_kwargs = {
    'control_circuit': 'ECD_control_remote',
    'init': 'vac',
    'T': 11,
    'N': 100
}

# Create a target state with a quick simulation of the ECDC sequence
from rl_tools.tf_env import env_init
from rl_tools.tf_env import policy as plc
env = env_init(control_circuit='ECD_control',
               reward_kwargs=dict(reward_mode='zero'),
               init='vac',
               T=env_kwargs['T'],
               batch_size=1,
               N=100,
               episode_length=env_kwargs['T'])

from rl_tools.action_script import ECD_control_residuals_GKP_plusZ as action_script
policy = plc.ScriptedPolicy(env.time_step_spec(), action_script)

time_step = env.reset()
policy_state = policy.get_initial_state(env.batch_size)
while not time_step.is_last()[0]:
    action_step = policy.action(time_step, policy_state)
    policy_state = action_step.state
    time_step = env.step(action_step.action)

target_state = env.info['psi_cached']
Exemple #8
0
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = 'true'
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

import numpy as np
import matplotlib.pyplot as plt
from rl_tools.tf_env import policy as plc
from rl_tools.tf_env import env_init
from time import time
import tensorflow as tf
from math import sqrt, pi

# initialize environment and policy
env = env_init(control_circuit='oscillator',
               init='Z+',
               H=1,
               batch_size=2000,
               episode_length=30,
               reward_mode='fidelity',
               quantum_circuit_type='v2')

from rl_tools.action_script import phase_estimation_symmetric_with_trim_4round as action_script

policy = plc.ScriptedPolicy(env.time_step_spec(), action_script)

# collect trajectories
all_obs = []
reps = 5  # serialize if batch size is small due to memory issues
for i in range(reps):
    time_step = env.reset()
    policy_state = policy.get_initial_state(env.batch_size)
    counter = 0
Exemple #9
0
mpl.rcParams['lines.markersize'] = markersize
mpl.rcParams['lines.markeredgewidth'] = linewidth / 2

mpl.rcParams['legend.markerscale'] = 2.0

#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
### Initialize the environment and simulation/training parameters
N = 40

env = env_init(control_circuit='snap_and_displacement',
               channel='quantum_jumps',
               init='vac',
               H=1,
               T=3,
               attn_step=1,
               batch_size=1,
               N=N,
               episode_length=3,
               phase_space_rep='wigner')

action_script = 'snap_and_displacements'
action_scale = {'alpha': 4, 'theta': pi}
to_learn = {'alpha': True, 'theta': True}

action_script = action_scripts.__getattribute__(action_script)
env = wrappers.ActionWrapper(env, action_script, action_scale, to_learn)

root_dir = {
    'bin0': r'E:\data\gkp_sims\PPO\examples\bin0_state_prep_lr3e-4',
    'bin1': r'E:\data\gkp_sims\PPO\examples\bin1_state_prep_lr3e-4'
Exemple #10
0
            'alpha': [delta + 0j, -1j * delta],
            'beta': [b_amp + 0j, 1j * b_amp],
            'phi': [pi / 2] * 2,
            'theta': [0.0] * 2
        }


#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------

env = env_init(control_circuit='oscillator',
               init='Z+',
               H=1,
               batch_size=1000,
               episode_length=200,
               reward_mode='fidelity',
               channel='diffusion',
               quantum_circuit_type='v2',
               encoding='square',
               N=200)

savepath = r'E:\VladGoogleDrive\Qulab\GKP\sims\diffusion_channel'
params = [0j] + list(np.linspace(0.0, 0.5, 11, dtype=complex))
make_figure = True

#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------

lifetimes = np.zeros(len(params))
from time import time
from tensorflow.keras.backend import batch_dot
from math import sqrt, pi
import tensorflow as tf
from scipy.optimize import curve_fit
from rl_tools.tf_env import helper_functions as hf

#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------

# initialize environment and policy
env = env_init(control_circuit='oscillator',
               init='X+',
               H=1,
               batch_size=6000,
               episode_length=31,
               reward_mode='zero',
               quantum_circuit_type='v2',
               encoding='square')

from rl_tools.action_script import phase_estimation_symmetric_with_trim_4round as action_script
policy = plc.ScriptedPolicy(env.time_step_spec(), action_script)

translations = np.linspace(-sqrt(pi), sqrt(pi), 100)

T = env.episode_length
R = np.zeros([len(translations), T, T])  # correlation matrix (empty)

for k, a in enumerate(translations):
    # collect trajectories
    time_step = env.reset()
from rl_tools.tf_env import tf_env_wrappers as wrappers
from rl_tools.tf_env import env_init
from rl_tools.tf_env import policy as plc
import rl_tools.action_script as action_scripts

#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
### Initialize env and policy

env = env_init(control_circuit='gkp_qec_autonomous_sBs_osc_qb',
               reward_kwargs={
                   'reward_mode': 'fidelity',
                   'code_flips': True
               },
               init='X+',
               H=1,
               T=2,
               attn_step=1,
               batch_size=100,
               episode_length=12,
               encoding='square')

action_script = 'gkp_qec_autonomous_sBs_2round'
action_scale = {'beta': 1, 'phi': pi, 'eps1': 1, 'eps2': 1}
to_learn = {'beta': True, 'phi': False, 'eps1': True, 'eps2': True}
action_script = action_scripts.__getattribute__(action_script)
env = wrappers.ActionWrapper(env, action_script, action_scale, to_learn)

root_dir = r'E:\data\gkp_sims\PPO\examples\gkp_qec_autonomous_sBs'
policy_dir = r'policy\001100'
policy = tf.compat.v2.saved_model.load(os.path.join(root_dir, policy_dir))
Exemple #13
0
@author: Vladimir Sivak
"""
import os
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"]='true'
os.environ["CUDA_VISIBLE_DEVICES"]="0"

# append parent 'gkp-rl' directory to path 
import sys
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), os.pardir)))

import qutip as qt
from rl_tools.tf_env import env_init
from rl_tools.remote_env_tools.remote_env_tools import Client

# Create environment that will produce mock measurement outcomes
env = env_init(control_circuit='ECD_control', reward_kwargs={'reward_mode' : 'zero'},
               init='vac', T=8, batch_size=10, N=100, episode_length=8)

# connect to the agent
client_socket = Client()
(host, port) = '172.28.142.46', 5555
client_socket.connect((host, port))

# training loop
done = False
while not done:
    # receive action data from the agent
    message, done = client_socket.recv_data()
    if done: break
    action_batch = message['action_batch']
    mini_buffer = message['mini_buffer']
    N_msmt = message['N_msmt']
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from time import time
from math import pi
from rl_tools.tf_env import tf_env_wrappers as wrappers
from rl_tools.tf_env import policy as plc
from rl_tools.tf_env import env_init

#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------

env = env_init(control_circuit='gkp_qec_autonomous_sBs_osc_qb', 
                reward_kwargs={'reward_mode':'zero'},
                init='vac', H=1, T=2, attn_step=1, batch_size=2000, episode_length=60,
                encoding='square')

from rl_tools.action_script import gkp_qec_autonomous_sBs_2round as action_script
policy = plc.ScriptedPolicy(env.time_step_spec(), action_script)

#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# # What operators to measure 
# names = [r'Re($S_1$)', r'Re($S_2$)',
#          r'Im($S_1$)', r'Im($S_2$)']
# # Translation amplitudes
# stabilizers = [np.sqrt(pi), 2j*np.sqrt(pi)]*2
# # Qubit measurement angles
# angles = [0]*2 + [-pi/2]*2
Exemple #15
0
import numpy as np
from math import pi, sqrt
import matplotlib.pyplot as plt
from rl_tools.tf_env import policy as plc
from rl_tools.tf_env import helper_functions as hf
from rl_tools.tf_env import tf_env_wrappers as wrappers
from rl_tools.tf_env import env_init
from simulator.utils import expectation
import rl_tools.action_script as action_scripts
import importlib

if 1:
    env = env_init(control_circuit='snap_and_displacement',
                   encoding='gkp_square',
                   reward_kwargs={'reward_mode': 'zero'},
                   init='Z+',
                   T=4,
                   batch_size=1,
                   N=150,
                   episode_length=4)

    action_script = 'snap_and_displacements'
    action_scale = {'alpha': 4, 'theta': pi}
    to_learn = {'alpha': True, 'theta': True}

    module_name = 'rl_tools.action_script.' + action_script
    action_script = importlib.import_module(module_name)
    env = wrappers.ActionWrapper(env, action_script, action_scale, to_learn)

    root_dir = r'E:\data\gkp_sims\PPO\paper_data\gates\test\seed2'
    policy_dir = r'policy\004000'
    policy = tf.compat.v2.saved_model.load(os.path.join(root_dir, policy_dir))
Exemple #16
0
action_script = 'snap_and_displacements'
action_scale = {'alpha':4, 'theta':pi}
to_learn = {'alpha':True, 'theta':True}

action_script = action_scripts.__getattribute__(action_script)

protocol = 'ideal'
max_epochs = 3000
gate_times = [0.4e-6, 3.4e-6]
seeds = ['seed2']
rewards = {t:{} for t in gate_times}
norms = {t:{} for t in gate_times}

for t in gate_times:
    env = env_init(**env_kwargs, reward_kwargs=reward_kwargs)
    env = wrappers.ActionWrapper(env, action_script, action_scale, to_learn)
    env._env.SNAP_miscalibrated.T = t
    env._env.bit_string = None # '00000'
    # collect episodes with different policies
    for sim_name in seeds: #os.listdir(root_dir[protocol]):
        print(sim_name)
        rewards[t][sim_name] = []
        norms[t][sim_name] = []
        sim_dir = os.path.join(root_dir[protocol], sim_name)
        
        for policy_name in os.listdir(os.path.join(sim_dir, 'policy')):
            if int(policy_name) > max_epochs: break
            policy_dir = os.path.join(sim_dir, 'policy', policy_name)
            policy = tf.compat.v2.saved_model.load(policy_dir)
            
Exemple #17
0
avg_ideal_stabilizer, delta_effective = {}, {}

for Delta in deltas:
    
    # from rl_tools.tf_env import helper_functions as hf
    # target_state = hf.GKP_1D_state(False, 200, Delta*sqrt(2))
    # reward_kwargs = {'reward_mode' : 'overlap',
    #                   'target_state' : target_state,
    #                   'postselect_0' : False}
    
    reward_kwargs = {'reward_mode' : 'stabilizers_v2',
                      'Delta' : 0.0, 'beta' : sqrt(pi),
                      'sample' : False}
    
    env = env_init(control_circuit='snap_and_displacement', reward_kwargs=reward_kwargs,
                   init='vac', T=9, batch_size=1, N=200, episode_length=9)
    
    action_script = 'snap_and_displacements'
    action_scale = {'alpha':6, 'theta':pi}
    to_learn = {'alpha':True, 'theta':True}
    
    module_name = 'rl_tools.action_script.' + action_script
    action_script = importlib.import_module(module_name)
    env = wrappers.ActionWrapper(env, action_script, action_scale, to_learn)

    delta_dir = os.path.join(root_dir, 'delta' + str(Delta))
    seed_dir = os.path.join(delta_dir, best_seed[Delta])
    policy_dir = r'policy\010000'
    policy = tf.compat.v2.saved_model.load(os.path.join(seed_dir,policy_dir))
    
Exemple #18
0
lifetimes = np.zeros(len(params))
returns = np.zeros(len(params))

gfig, gax = plt.subplots(1, 1, dpi=300, figsize=(10, 6))
gax.set_title(r'Reward curves')
gax.set_ylabel(r'Reward')
gax.set_xlabel('Time')

for j in range(len(params)):
    t = time()
    env = env_init(control_circuit='oscillator_qubit',
                   init='Z+',
                   H=1,
                   batch_size=2500,
                   episode_length=200,
                   reward_mode='fidelity',
                   quantum_circuit_type='v2',
                   encoding='hexagonal',
                   t_feedback=params[j])
    action_script = ActionScript()
    policy = plc.ScriptedPolicy(env.time_step_spec(), action_script)

    for state in states:
        if '_env' in env.__dir__():
            env._env.init = state
        else:
            env.init = state

        # Collect batch of episodes
        time_step = env.reset()