Ejemplo n.º 1
0
 def __init__(self, env_kwargs_list, rew_kwargs_list, batch_size,
              action_script, action_scale, to_learn, episode_length_list,
              env_schedule=None):
     """
     Args:
         env_kwargs_list (list[dict]): list of parameters for training 
             environment.
         reward_kwargs_list (list[dict]): list of parameters for reward 
             functions. Should correspond to 'env_kwargs_list'.
         batch_size (int): number of episodes collected in parallel.
         action_script (str): name of action script. Action wrapper will 
             select actions from this script if they are not learned.
         action_scale (dict, str:float): dictionary mapping action dimensions
             to scaling factors. Action wrapper will rescale actions produced
             by the agent's neural net policy by these factors.
         to_learn (dict, str:bool): dictionary mapping action dimensions to 
             bool flags. Specifies if the action should be learned or scripted.
         episode_length_list (list[callable: int -> int]): list of schedule 
             functions for episode durations. Schedule functions take as 
             argument int epoch number and return int episode duration for 
             this epoch. The list should correspond to 'env_kwargs_list'.
         env_schedule (callable): function mapping epoch number to index
             of the environment from the list to use during this epoch
     """
     self.env_list, self.driver_list = [], []
     self.episode_length_list = episode_length_list
     for env_kwargs, rew_kwargs in zip(env_kwargs_list, rew_kwargs_list):
         # Create training env and wrap it
         env = gkp_init(batch_size=batch_size, reward_kwargs=rew_kwargs,
                        **env_kwargs)
         action_script_m = action_scripts.__getattribute__(action_script)
         env = wrappers.ActionWrapper(env, action_script_m, action_scale, 
                                      to_learn)
 
         # create dummy placeholder policy to initialize driver
         dummy_policy = PolicyPlaceholder(
             env.time_step_spec(), env.action_spec())
         
         # create driver for this environment
         driver = dynamic_episode_driver.DynamicEpisodeDriver(
             env, dummy_policy, num_episodes=batch_size)
         
         self.env_list.append(env)
         self.driver_list.append(driver)
     
     if env_schedule is None:
         # regularly switch between environments
         self.env_schedule = lambda epoch: epoch % len(self.env_list)
     else:
         self.env_schedule = env_schedule
    def __init__(self,
                 env_kwargs,
                 reward_kwargs,
                 batch_size,
                 action_script,
                 action_scale,
                 to_learn,
                 episode_length,
                 learn_residuals=False):
        """
        Args:
            env_kwargs (dict): optional parameters for training environment.
            reward_kwargs (dict): optional parameters for reward function.
            batch_size (int): number of episodes collected in parallel.
            action_script (str): name of action script. Action wrapper will 
                select actions from this script if they are not learned.
            action_scale (dict, str:float): dictionary mapping action dimensions
                to scaling factors. Action wrapper will rescale actions produced
                by the agent's neural net policy by these factors.
            to_learn (dict, str:bool): dictionary mapping action dimensions to 
                bool flags. Specifies if the action should be learned or scripted.
            episode_length (callable: int -> int): function that defines the 
                schedule for training episode durations. Takes as argument int 
                epoch number and returns int episode duration for this epoch.
            learn_residuals (bool): flag to learn residual over the scripted
                protocol. If False, will learn actions from scratch. If True,
                will learn a residual to be added to scripted protocol.        
        """
        self.episode_length = episode_length
        # Create training env and wrap it
        env = gkp_init(batch_size=batch_size,
                       reward_kwargs=reward_kwargs,
                       **env_kwargs)
        action_script = action_scripts.__getattribute__(action_script)
        env = wrappers.ActionWrapper(env,
                                     action_script,
                                     action_scale,
                                     to_learn,
                                     learn_residuals=learn_residuals)

        # create dummy placeholder policy to initialize parent class
        dummy_policy = PolicyPlaceholder(env.time_step_spec(),
                                         env.action_spec())

        super().__init__(env, dummy_policy, num_episodes=batch_size)
Ejemplo n.º 3
0
"""
import os
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"]='true'
os.environ["CUDA_VISIBLE_DEVICES"]="0"

import numpy as np
import matplotlib.pyplot as plt
from gkp.gkp_tf_env import policy as plc
from gkp.gkp_tf_env import gkp_init
from time import time
import tensorflow as tf 
from math import sqrt, pi

# initialize environment and policy
env = gkp_init(simulate='oscillator',
                init='Z+', H=1, batch_size=2000, episode_length=30, 
                reward_mode='fidelity', quantum_circuit_type='v2')

from gkp.action_script import phase_estimation_symmetric_with_trim_4round as action_script
policy = plc.ScriptedPolicy(env.time_step_spec(), action_script)

# collect trajectories
all_obs = []
reps = 5 # serialize if batch size is small due to memory issues
for i in range(reps):
    time_step = env.reset()
    policy_state = policy.get_initial_state(env.batch_size)
    counter = 0
    while not time_step.is_last()[0]:
        t = time()
        action_step = policy.action(time_step, policy_state)      
Ejemplo n.º 4
0
#     p = 1/(1+500*(a['alpha'] - 0.37)**2) # just needed a sharper reward funciton!
#     z = tfp.distributions.Bernoulli(probs=p).sample()
#     return 2*tf.cast(z, tf.float32)-1


B = 200
actions = ['alpha', 'phi_g', 'phi_e']

action_scale = {'alpha':27, 'phi_g':pi, 'phi_e':pi}


reward_kwargs = {'reward_mode' : 'measurement',
                  'sample' : True}

env = gkp_init(simulate='conditional_displacement_cal',
               reward_kwargs=reward_kwargs,
               init='vac', T=1, batch_size=B, N=50, episode_length=1,
               t_gate = 100e-9)


def reward_sampler(a):
    action = {s : a[s] * action_scale[s] for s in actions}
    time_step = env.reset()
    while not time_step.is_last()[0]:
        time_step = env.step(action)
    r = time_step.reward
    return r


# eval_reward_kwargs = {'reward_mode' : 'measurement',
#                       'sample' : False}
# Define Kerr sweep range and Kerr-dependent parameters
Kerr = np.linspace(1, 51, 11)
t_gate = 1.2e-6 / np.sqrt(Kerr)  # assume gate time can scale as 1/chi
rotation_angle = 2 * np.pi * Kerr * (1.2e-6 + t_gate) * 20  # simple heuristic

states = ['X+', 'Y+', 'Z+']
lifetimes = {state: np.zeros(len(Kerr)) for state in states}

savepath = r'E:\VladGoogleDrive\Qulab\GKP\sims\Kerr\hexagonal_sweep\no_rotation_perfect_qubit'

# Initialize environment and policy
env = gkp_init(simulate='oscillator',
               encoding='hexagonal',
               init='X+',
               H=1,
               batch_size=2000,
               episode_length=200,
               reward_mode='fidelity',
               quantum_circuit_type='v2')

from gkp.action_script import hexagonal_phase_estimation_symmetric_6round as action_script
policy = plc.ScriptedPolicy(env.time_step_spec(), action_script)

for k in range(len(Kerr)):

    env = gkp_init(simulate='oscillator',
                   encoding='hexagonal',
                   init='X+',
                   H=1,
                   batch_size=2000,
                   episode_length=200,
avg_ideal_stabilizer, delta_effective = {}, {}

for Delta in deltas:
    reward_kwargs = {
        'reward_mode': 'stabilizers_v2',
        'Delta': 0.0,
        'beta': sqrt(pi),
        'sample': False
    }

    env = gkp_init(simulate='snap_and_displacement',
                   reward_kwargs=reward_kwargs,
                   init='vac',
                   H=1,
                   T=9,
                   attn_step=1,
                   batch_size=1,
                   N=200,
                   episode_length=9)

    action_script = 'snap_and_displacements'
    action_scale = {'alpha': 6, 'theta': pi}
    to_learn = {'alpha': True, 'theta': True}
    action_script = action_scripts.__getattribute__(action_script)
    env = wrappers.ActionWrapper(env, action_script, action_scale, to_learn)

    delta_dir = os.path.join(root_dir, 'delta' + str(Delta))
    seed_dir = os.path.join(delta_dir, best_seed[Delta])
    policy_dir = r'policy\010000'
    policy = tf.compat.v2.saved_model.load(os.path.join(seed_dir, policy_dir))
lifetimes = np.zeros(len(params))
returns = np.zeros(len(params))

gfig, gax = plt.subplots(1, 1, dpi=300, figsize=(10, 6))
gax.set_title(r'Reward curves')
gax.set_ylabel(r'Reward')
gax.set_xlabel('Time')

for j in range(len(params)):
    t = time()
    env = gkp_init(simulate='oscillator_qubit',
                   init='Z+',
                   H=1,
                   batch_size=2500,
                   episode_length=200,
                   reward_mode='fidelity',
                   quantum_circuit_type='v2',
                   encoding='hexagonal',
                   t_feedback=params[j])
    action_script = ActionScript()
    policy = plc.ScriptedPolicy(env.time_step_spec(), action_script)

    for state in states:
        if '_env' in env.__dir__():
            env._env.init = state
        else:
            env.init = state

        # Collect batch of episodes
        time_step = env.reset()
Ejemplo n.º 8
0
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = 'true'
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

import tensorflow as tf
from gkp.gkp_tf_env import helper_functions as hf
from gkp.gkp_tf_env import tf_env_wrappers as wrappers
from gkp.gkp_tf_env import policy as plc
from gkp.gkp_tf_env import gkp_init

env = gkp_init(simulate='phase_estimation_osc_v2',
               channel='quantum_jumps',
               reward_kwargs={
                   'reward_mode': 'fidelity',
                   'code_flips': True
               },
               init='X+',
               H=1,
               T=4,
               attn_step=1,
               batch_size=1000,
               episode_length=50,
               encoding='square')

# from gkp.action_script import v2_phase_estimation_with_trim_4round as action_script
# # # from gkp.action_script import Alec_universal_gate_set_12round as action_script
# # # from gkp.action_script import hexagonal_phase_estimation_symmetric_6round as action_script
# to_learn = {'alpha':True, 'beta':True, 'phi':False, 'theta':False}
# # # to_learn = {'alpha':True, 'beta':True, 'phi':True}
# env = wrappers.ActionWrapper(env, action_script, to_learn)

# root_dir = r'E:\VladGoogleDrive\Qulab\GKP\sims\PPO\August\OscillatorGKP\mlp2_H3T4A4_steps36_64_qec_4'
from time import time
from tensorflow.keras.backend import batch_dot
from math import sqrt, pi
import tensorflow as tf
from scipy.optimize import curve_fit
from gkp.gkp_tf_env import helper_functions as hf

#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------

# initialize environment and policy
env = gkp_init(simulate='oscillator',
               init='X+',
               H=1,
               batch_size=6000,
               episode_length=31,
               reward_mode='zero',
               quantum_circuit_type='v2',
               encoding='square')

from gkp.action_script import phase_estimation_symmetric_with_trim_4round as action_script
policy = plc.ScriptedPolicy(env.time_step_spec(), action_script)

translations = np.linspace(-sqrt(pi), sqrt(pi), 100)

T = env.episode_length
R = np.zeros([len(translations), T, T])  # correlation matrix (empty)

for k, a in enumerate(translations):
    # collect trajectories
    time_step = env.reset()
Ejemplo n.º 10
0
mpl.rcParams['lines.markersize'] = markersize
mpl.rcParams['lines.markeredgewidth'] = linewidth / 2

mpl.rcParams['legend.markerscale'] = 2.0

#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
### Initialize the environment and simulation/training parameters
N = 40

env = gkp_init(simulate='snap_and_displacement',
               channel='quantum_jumps',
               init='vac',
               H=1,
               T=3,
               attn_step=1,
               batch_size=1,
               N=N,
               episode_length=3,
               phase_space_rep='wigner')

action_script = 'snap_and_displacements'
action_scale = {'alpha': 4, 'theta': pi}
to_learn = {'alpha': True, 'theta': True}

action_script = action_scripts.__getattribute__(action_script)
env = wrappers.ActionWrapper(env, action_script, action_scale, to_learn)

root_dir = {
    'bin0': r'E:\data\gkp_sims\PPO\examples\bin0_state_prep_lr3e-4',
    'bin1': r'E:\data\gkp_sims\PPO\examples\bin1_state_prep_lr3e-4'
Ejemplo n.º 11
0
        b_amp = 2*sqrt(pi)
        a_amp = sqrt(pi)

        self.beta = [b_amp+0j, 1j*b_amp]*2 + [eps+0j, 1j*eps]
        
        self.alpha = [a_amp+0j] + [-1j*delta, delta+0j]*2 + [-1j*a_amp]

        self.phi = [pi/2]*6

#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------

env = gkp_init(simulate='oscillator',
               init='Z+', H=1, batch_size=800, episode_length=60, 
               reward_mode = 'fidelity', quantum_circuit_type='v2',
               encoding = 'square')

savepath = r'E:\VladGoogleDrive\Qulab\GKP\sims\osc_sims\test'
feedback_amps = np.linspace(0.15, 0.24, 10, dtype=complex)
trim_amps = np.linspace(0.15, 0.24, 10, dtype=complex)
states = ['Z+']
make_figure = False

#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------

lifetimes = np.zeros((len(feedback_amps), len(trim_amps)))
returns = np.zeros((len(feedback_amps), len(trim_amps)))
            't_gate': 1.2e-6 / np.sqrt(np.sqrt(K)),
            'T1_osc': 250e-6
        }
        if 'perfect' in names[i]:
            kwargs['simulate'] = 'oscillator'
        else:
            kwargs['simulate'] = 'oscillator_qubit'
            T1_qb = int(names[i][:names[i].find('us')])
            kwargs['T1_qb'] = T1_qb * 1e-6

        # Initialize environment
        env = gkp_init(init='X+',
                       H=1,
                       T=6,
                       attn_step=1,
                       batch_size=3000,
                       episode_length=200,
                       reward_mode='fidelity',
                       quantum_circuit_type='v2',
                       encoding='hexagonal',
                       **kwargs)
        env = wrappers.ActionWrapper(env, action_script, to_learn)

        # Fit logical lifetime
        fit_params = hf.fit_logical_lifetime(env,
                                             policy,
                                             plot=False,
                                             reps=1,
                                             states=['X+'],
                                             save_dir=data_dir)
        T1[names[i]].append(fit_params['X+'][1] * 1e6)  # convert to us
from gkp.gkp_tf_env import tf_env_wrappers as wrappers
import gkp.action_script as action_scripts
from gkp.gkp_tf_env import policy as plc
from gkp.gkp_tf_env import gkp_init

# env = gkp_init(simulate='phase_estimation_osc_qb_v2',
#                 reward_kwargs={'reward_mode':'fidelity', 'code_flips':True},
#                 init='X+', H=1, T=4, attn_step=1, batch_size=1000, episode_length=100,
#                 encoding='square')

env = gkp_init(simulate='gkp_qec_autonomous_sBs_osc_qb',
               reward_kwargs={
                   'reward_mode': 'fidelity',
                   'code_flips': True
               },
               init='X+',
               H=1,
               T=2,
               attn_step=1,
               batch_size=500,
               episode_length=60,
               encoding='square')

# action_script = 'gkp_qec_autonomous_BsB_2round'
# action_scale = {'beta':1, 'phi':pi, 'epsilon':1}
# to_learn = {'beta':False, 'phi':False, 'epsilon':True}
# action_script = action_scripts.__getattribute__(action_script)
# env = wrappers.ActionWrapper(env, action_script, action_scale, to_learn)

# root_dir = r'E:\data\gkp_sims\PPO\examples\gkp_qec_autonomous_BsB'
# policy_dir = r'policy\000200'
# policy = tf.compat.v2.saved_model.load(os.path.join(root_dir,policy_dir))
Ejemplo n.º 14
0
action_script = 'snap_and_displacements'
action_scale = {'alpha':4, 'theta':pi}
to_learn = {'alpha':True, 'theta':True}

action_script = action_scripts.__getattribute__(action_script)

protocol = 'ideal'
max_epochs = 3000
gate_times = [0.4e-6, 3.4e-6]
seeds = ['seed2']
rewards = {t:{} for t in gate_times}
norms = {t:{} for t in gate_times}

for t in gate_times:
    env = gkp_init(**env_kwargs, reward_kwargs=reward_kwargs)
    env = wrappers.ActionWrapper(env, action_script, action_scale, to_learn)
    env._env.SNAP_miscalibrated.T = t
    env._env.bit_string = None # '00000'
    # collect episodes with different policies
    for sim_name in seeds: #os.listdir(root_dir[protocol]):
        print(sim_name)
        rewards[t][sim_name] = []
        norms[t][sim_name] = []
        sim_dir = os.path.join(root_dir[protocol], sim_name)
        
        for policy_name in os.listdir(os.path.join(sim_dir, 'policy')):
            if int(policy_name) > max_epochs: break
            policy_dir = os.path.join(sim_dir, 'policy', policy_name)
            policy = tf.compat.v2.saved_model.load(policy_dir)
            
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from time import time
from math import pi
from gkp.gkp_tf_env import tf_env_wrappers as wrappers
from gkp.gkp_tf_env import policy as plc
from gkp.gkp_tf_env import gkp_init

#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------

env = gkp_init(simulate='gkp_qec_autonomous_sBs_osc_qb', 
                reward_kwargs={'reward_mode':'zero'},
                init='vac', H=1, T=2, attn_step=1, batch_size=2000, episode_length=60,
                encoding='square')

from gkp.action_script import gkp_qec_autonomous_sBs_2round as action_script
policy = plc.ScriptedPolicy(env.time_step_spec(), action_script)

#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# # What operators to measure 
# names = [r'Re($S_1$)', r'Re($S_2$)',
#          r'Im($S_1$)', r'Im($S_2$)']
# # Translation amplitudes
# stabilizers = [np.sqrt(pi), 2j*np.sqrt(pi)]*2
# # Qubit measurement angles
# angles = [0]*2 + [-pi/2]*2
Ejemplo n.º 16
0
from gkp.gkp_tf_env import helper_functions as hf
from gkp.gkp_tf_env import tf_env_wrappers as wrappers
from gkp.gkp_tf_env import gkp_init
from simulator.utils import expectation

N = 40
target_state = qt.tensor(qt.basis(2, 0), qt.basis(N, 3))
reward_kwargs = {'reward_mode': 'overlap', 'target_state': target_state}
kwargs = {'N': N}

env = gkp_init(simulate='Alec_universal_gate_set',
               channel='quantum_jumps',
               reward_kwargs=reward_kwargs,
               init='vac',
               H=1,
               T=6,
               attn_step=1,
               batch_size=1,
               episode_length=6,
               encoding='square',
               **kwargs)

# from gkp.action_script import v2_phase_estimation_with_trim_4round as action_script
from gkp.action_script import Alec_universal_gate_set_6round as action_script
# to_learn = {'alpha':True, 'beta':True, 'phi':False, 'theta':False}
to_learn = {'beta': True, 'phi': True}
env = wrappers.ActionWrapper(env, action_script, to_learn)

root_dir = r'E:\VladGoogleDrive\Qulab\GKP\sims\PPO\CT_qubit_rot\fock3_beta3_B100_tomo100_lr1e-3_baseline_2'
policy_dir = r'policy\000076000'
policy = tf.compat.v2.saved_model.load(os.path.join(root_dir, policy_dir))
            'alpha': [delta + 0j, -1j * delta],
            'beta': [b_amp + 0j, 1j * b_amp],
            'phi': [pi / 2] * 2,
            'theta': [0.0] * 2
        }


#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------

env = gkp_init(simulate='oscillator',
               init='Z+',
               H=1,
               batch_size=1000,
               episode_length=200,
               reward_mode='fidelity',
               channel='diffusion',
               quantum_circuit_type='v2',
               encoding='square',
               N=200)

savepath = r'E:\VladGoogleDrive\Qulab\GKP\sims\diffusion_channel'
params = [0j] + list(np.linspace(0.0, 0.5, 11, dtype=complex))
make_figure = True

#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------

lifetimes = np.zeros(len(params))