def __init__(self, env_kwargs_list, rew_kwargs_list, batch_size, action_script, action_scale, to_learn, episode_length_list, env_schedule=None): """ Args: env_kwargs_list (list[dict]): list of parameters for training environment. reward_kwargs_list (list[dict]): list of parameters for reward functions. Should correspond to 'env_kwargs_list'. batch_size (int): number of episodes collected in parallel. action_script (str): name of action script. Action wrapper will select actions from this script if they are not learned. action_scale (dict, str:float): dictionary mapping action dimensions to scaling factors. Action wrapper will rescale actions produced by the agent's neural net policy by these factors. to_learn (dict, str:bool): dictionary mapping action dimensions to bool flags. Specifies if the action should be learned or scripted. episode_length_list (list[callable: int -> int]): list of schedule functions for episode durations. Schedule functions take as argument int epoch number and return int episode duration for this epoch. The list should correspond to 'env_kwargs_list'. env_schedule (callable): function mapping epoch number to index of the environment from the list to use during this epoch """ self.env_list, self.driver_list = [], [] self.episode_length_list = episode_length_list for env_kwargs, rew_kwargs in zip(env_kwargs_list, rew_kwargs_list): # Create training env and wrap it env = gkp_init(batch_size=batch_size, reward_kwargs=rew_kwargs, **env_kwargs) action_script_m = action_scripts.__getattribute__(action_script) env = wrappers.ActionWrapper(env, action_script_m, action_scale, to_learn) # create dummy placeholder policy to initialize driver dummy_policy = PolicyPlaceholder( env.time_step_spec(), env.action_spec()) # create driver for this environment driver = dynamic_episode_driver.DynamicEpisodeDriver( env, dummy_policy, num_episodes=batch_size) self.env_list.append(env) self.driver_list.append(driver) if env_schedule is None: # regularly switch between environments self.env_schedule = lambda epoch: epoch % len(self.env_list) else: self.env_schedule = env_schedule
def __init__(self, env_kwargs, reward_kwargs, batch_size, action_script, action_scale, to_learn, episode_length, learn_residuals=False): """ Args: env_kwargs (dict): optional parameters for training environment. reward_kwargs (dict): optional parameters for reward function. batch_size (int): number of episodes collected in parallel. action_script (str): name of action script. Action wrapper will select actions from this script if they are not learned. action_scale (dict, str:float): dictionary mapping action dimensions to scaling factors. Action wrapper will rescale actions produced by the agent's neural net policy by these factors. to_learn (dict, str:bool): dictionary mapping action dimensions to bool flags. Specifies if the action should be learned or scripted. episode_length (callable: int -> int): function that defines the schedule for training episode durations. Takes as argument int epoch number and returns int episode duration for this epoch. learn_residuals (bool): flag to learn residual over the scripted protocol. If False, will learn actions from scratch. If True, will learn a residual to be added to scripted protocol. """ self.episode_length = episode_length # Create training env and wrap it env = gkp_init(batch_size=batch_size, reward_kwargs=reward_kwargs, **env_kwargs) action_script = action_scripts.__getattribute__(action_script) env = wrappers.ActionWrapper(env, action_script, action_scale, to_learn, learn_residuals=learn_residuals) # create dummy placeholder policy to initialize parent class dummy_policy = PolicyPlaceholder(env.time_step_spec(), env.action_spec()) super().__init__(env, dummy_policy, num_episodes=batch_size)
""" import os os.environ["TF_FORCE_GPU_ALLOW_GROWTH"]='true' os.environ["CUDA_VISIBLE_DEVICES"]="0" import numpy as np import matplotlib.pyplot as plt from gkp.gkp_tf_env import policy as plc from gkp.gkp_tf_env import gkp_init from time import time import tensorflow as tf from math import sqrt, pi # initialize environment and policy env = gkp_init(simulate='oscillator', init='Z+', H=1, batch_size=2000, episode_length=30, reward_mode='fidelity', quantum_circuit_type='v2') from gkp.action_script import phase_estimation_symmetric_with_trim_4round as action_script policy = plc.ScriptedPolicy(env.time_step_spec(), action_script) # collect trajectories all_obs = [] reps = 5 # serialize if batch size is small due to memory issues for i in range(reps): time_step = env.reset() policy_state = policy.get_initial_state(env.batch_size) counter = 0 while not time_step.is_last()[0]: t = time() action_step = policy.action(time_step, policy_state)
# p = 1/(1+500*(a['alpha'] - 0.37)**2) # just needed a sharper reward funciton! # z = tfp.distributions.Bernoulli(probs=p).sample() # return 2*tf.cast(z, tf.float32)-1 B = 200 actions = ['alpha', 'phi_g', 'phi_e'] action_scale = {'alpha':27, 'phi_g':pi, 'phi_e':pi} reward_kwargs = {'reward_mode' : 'measurement', 'sample' : True} env = gkp_init(simulate='conditional_displacement_cal', reward_kwargs=reward_kwargs, init='vac', T=1, batch_size=B, N=50, episode_length=1, t_gate = 100e-9) def reward_sampler(a): action = {s : a[s] * action_scale[s] for s in actions} time_step = env.reset() while not time_step.is_last()[0]: time_step = env.step(action) r = time_step.reward return r # eval_reward_kwargs = {'reward_mode' : 'measurement', # 'sample' : False}
# Define Kerr sweep range and Kerr-dependent parameters Kerr = np.linspace(1, 51, 11) t_gate = 1.2e-6 / np.sqrt(Kerr) # assume gate time can scale as 1/chi rotation_angle = 2 * np.pi * Kerr * (1.2e-6 + t_gate) * 20 # simple heuristic states = ['X+', 'Y+', 'Z+'] lifetimes = {state: np.zeros(len(Kerr)) for state in states} savepath = r'E:\VladGoogleDrive\Qulab\GKP\sims\Kerr\hexagonal_sweep\no_rotation_perfect_qubit' # Initialize environment and policy env = gkp_init(simulate='oscillator', encoding='hexagonal', init='X+', H=1, batch_size=2000, episode_length=200, reward_mode='fidelity', quantum_circuit_type='v2') from gkp.action_script import hexagonal_phase_estimation_symmetric_6round as action_script policy = plc.ScriptedPolicy(env.time_step_spec(), action_script) for k in range(len(Kerr)): env = gkp_init(simulate='oscillator', encoding='hexagonal', init='X+', H=1, batch_size=2000, episode_length=200,
avg_ideal_stabilizer, delta_effective = {}, {} for Delta in deltas: reward_kwargs = { 'reward_mode': 'stabilizers_v2', 'Delta': 0.0, 'beta': sqrt(pi), 'sample': False } env = gkp_init(simulate='snap_and_displacement', reward_kwargs=reward_kwargs, init='vac', H=1, T=9, attn_step=1, batch_size=1, N=200, episode_length=9) action_script = 'snap_and_displacements' action_scale = {'alpha': 6, 'theta': pi} to_learn = {'alpha': True, 'theta': True} action_script = action_scripts.__getattribute__(action_script) env = wrappers.ActionWrapper(env, action_script, action_scale, to_learn) delta_dir = os.path.join(root_dir, 'delta' + str(Delta)) seed_dir = os.path.join(delta_dir, best_seed[Delta]) policy_dir = r'policy\010000' policy = tf.compat.v2.saved_model.load(os.path.join(seed_dir, policy_dir))
lifetimes = np.zeros(len(params)) returns = np.zeros(len(params)) gfig, gax = plt.subplots(1, 1, dpi=300, figsize=(10, 6)) gax.set_title(r'Reward curves') gax.set_ylabel(r'Reward') gax.set_xlabel('Time') for j in range(len(params)): t = time() env = gkp_init(simulate='oscillator_qubit', init='Z+', H=1, batch_size=2500, episode_length=200, reward_mode='fidelity', quantum_circuit_type='v2', encoding='hexagonal', t_feedback=params[j]) action_script = ActionScript() policy = plc.ScriptedPolicy(env.time_step_spec(), action_script) for state in states: if '_env' in env.__dir__(): env._env.init = state else: env.init = state # Collect batch of episodes time_step = env.reset()
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = 'true' os.environ["CUDA_VISIBLE_DEVICES"] = "0" import tensorflow as tf from gkp.gkp_tf_env import helper_functions as hf from gkp.gkp_tf_env import tf_env_wrappers as wrappers from gkp.gkp_tf_env import policy as plc from gkp.gkp_tf_env import gkp_init env = gkp_init(simulate='phase_estimation_osc_v2', channel='quantum_jumps', reward_kwargs={ 'reward_mode': 'fidelity', 'code_flips': True }, init='X+', H=1, T=4, attn_step=1, batch_size=1000, episode_length=50, encoding='square') # from gkp.action_script import v2_phase_estimation_with_trim_4round as action_script # # # from gkp.action_script import Alec_universal_gate_set_12round as action_script # # # from gkp.action_script import hexagonal_phase_estimation_symmetric_6round as action_script # to_learn = {'alpha':True, 'beta':True, 'phi':False, 'theta':False} # # # to_learn = {'alpha':True, 'beta':True, 'phi':True} # env = wrappers.ActionWrapper(env, action_script, to_learn) # root_dir = r'E:\VladGoogleDrive\Qulab\GKP\sims\PPO\August\OscillatorGKP\mlp2_H3T4A4_steps36_64_qec_4'
from time import time from tensorflow.keras.backend import batch_dot from math import sqrt, pi import tensorflow as tf from scipy.optimize import curve_fit from gkp.gkp_tf_env import helper_functions as hf #----------------------------------------------------------------------------- #----------------------------------------------------------------------------- #----------------------------------------------------------------------------- # initialize environment and policy env = gkp_init(simulate='oscillator', init='X+', H=1, batch_size=6000, episode_length=31, reward_mode='zero', quantum_circuit_type='v2', encoding='square') from gkp.action_script import phase_estimation_symmetric_with_trim_4round as action_script policy = plc.ScriptedPolicy(env.time_step_spec(), action_script) translations = np.linspace(-sqrt(pi), sqrt(pi), 100) T = env.episode_length R = np.zeros([len(translations), T, T]) # correlation matrix (empty) for k, a in enumerate(translations): # collect trajectories time_step = env.reset()
mpl.rcParams['lines.markersize'] = markersize mpl.rcParams['lines.markeredgewidth'] = linewidth / 2 mpl.rcParams['legend.markerscale'] = 2.0 #----------------------------------------------------------------------------- #----------------------------------------------------------------------------- #----------------------------------------------------------------------------- ### Initialize the environment and simulation/training parameters N = 40 env = gkp_init(simulate='snap_and_displacement', channel='quantum_jumps', init='vac', H=1, T=3, attn_step=1, batch_size=1, N=N, episode_length=3, phase_space_rep='wigner') action_script = 'snap_and_displacements' action_scale = {'alpha': 4, 'theta': pi} to_learn = {'alpha': True, 'theta': True} action_script = action_scripts.__getattribute__(action_script) env = wrappers.ActionWrapper(env, action_script, action_scale, to_learn) root_dir = { 'bin0': r'E:\data\gkp_sims\PPO\examples\bin0_state_prep_lr3e-4', 'bin1': r'E:\data\gkp_sims\PPO\examples\bin1_state_prep_lr3e-4'
b_amp = 2*sqrt(pi) a_amp = sqrt(pi) self.beta = [b_amp+0j, 1j*b_amp]*2 + [eps+0j, 1j*eps] self.alpha = [a_amp+0j] + [-1j*delta, delta+0j]*2 + [-1j*a_amp] self.phi = [pi/2]*6 #----------------------------------------------------------------------------- #----------------------------------------------------------------------------- #----------------------------------------------------------------------------- env = gkp_init(simulate='oscillator', init='Z+', H=1, batch_size=800, episode_length=60, reward_mode = 'fidelity', quantum_circuit_type='v2', encoding = 'square') savepath = r'E:\VladGoogleDrive\Qulab\GKP\sims\osc_sims\test' feedback_amps = np.linspace(0.15, 0.24, 10, dtype=complex) trim_amps = np.linspace(0.15, 0.24, 10, dtype=complex) states = ['Z+'] make_figure = False #----------------------------------------------------------------------------- #----------------------------------------------------------------------------- #----------------------------------------------------------------------------- lifetimes = np.zeros((len(feedback_amps), len(trim_amps))) returns = np.zeros((len(feedback_amps), len(trim_amps)))
't_gate': 1.2e-6 / np.sqrt(np.sqrt(K)), 'T1_osc': 250e-6 } if 'perfect' in names[i]: kwargs['simulate'] = 'oscillator' else: kwargs['simulate'] = 'oscillator_qubit' T1_qb = int(names[i][:names[i].find('us')]) kwargs['T1_qb'] = T1_qb * 1e-6 # Initialize environment env = gkp_init(init='X+', H=1, T=6, attn_step=1, batch_size=3000, episode_length=200, reward_mode='fidelity', quantum_circuit_type='v2', encoding='hexagonal', **kwargs) env = wrappers.ActionWrapper(env, action_script, to_learn) # Fit logical lifetime fit_params = hf.fit_logical_lifetime(env, policy, plot=False, reps=1, states=['X+'], save_dir=data_dir) T1[names[i]].append(fit_params['X+'][1] * 1e6) # convert to us
from gkp.gkp_tf_env import tf_env_wrappers as wrappers import gkp.action_script as action_scripts from gkp.gkp_tf_env import policy as plc from gkp.gkp_tf_env import gkp_init # env = gkp_init(simulate='phase_estimation_osc_qb_v2', # reward_kwargs={'reward_mode':'fidelity', 'code_flips':True}, # init='X+', H=1, T=4, attn_step=1, batch_size=1000, episode_length=100, # encoding='square') env = gkp_init(simulate='gkp_qec_autonomous_sBs_osc_qb', reward_kwargs={ 'reward_mode': 'fidelity', 'code_flips': True }, init='X+', H=1, T=2, attn_step=1, batch_size=500, episode_length=60, encoding='square') # action_script = 'gkp_qec_autonomous_BsB_2round' # action_scale = {'beta':1, 'phi':pi, 'epsilon':1} # to_learn = {'beta':False, 'phi':False, 'epsilon':True} # action_script = action_scripts.__getattribute__(action_script) # env = wrappers.ActionWrapper(env, action_script, action_scale, to_learn) # root_dir = r'E:\data\gkp_sims\PPO\examples\gkp_qec_autonomous_BsB' # policy_dir = r'policy\000200' # policy = tf.compat.v2.saved_model.load(os.path.join(root_dir,policy_dir))
action_script = 'snap_and_displacements' action_scale = {'alpha':4, 'theta':pi} to_learn = {'alpha':True, 'theta':True} action_script = action_scripts.__getattribute__(action_script) protocol = 'ideal' max_epochs = 3000 gate_times = [0.4e-6, 3.4e-6] seeds = ['seed2'] rewards = {t:{} for t in gate_times} norms = {t:{} for t in gate_times} for t in gate_times: env = gkp_init(**env_kwargs, reward_kwargs=reward_kwargs) env = wrappers.ActionWrapper(env, action_script, action_scale, to_learn) env._env.SNAP_miscalibrated.T = t env._env.bit_string = None # '00000' # collect episodes with different policies for sim_name in seeds: #os.listdir(root_dir[protocol]): print(sim_name) rewards[t][sim_name] = [] norms[t][sim_name] = [] sim_dir = os.path.join(root_dir[protocol], sim_name) for policy_name in os.listdir(os.path.join(sim_dir, 'policy')): if int(policy_name) > max_epochs: break policy_dir = os.path.join(sim_dir, 'policy', policy_name) policy = tf.compat.v2.saved_model.load(policy_dir)
import numpy as np import tensorflow as tf import matplotlib.pyplot as plt from time import time from math import pi from gkp.gkp_tf_env import tf_env_wrappers as wrappers from gkp.gkp_tf_env import policy as plc from gkp.gkp_tf_env import gkp_init #----------------------------------------------------------------------------- #----------------------------------------------------------------------------- #----------------------------------------------------------------------------- env = gkp_init(simulate='gkp_qec_autonomous_sBs_osc_qb', reward_kwargs={'reward_mode':'zero'}, init='vac', H=1, T=2, attn_step=1, batch_size=2000, episode_length=60, encoding='square') from gkp.action_script import gkp_qec_autonomous_sBs_2round as action_script policy = plc.ScriptedPolicy(env.time_step_spec(), action_script) #----------------------------------------------------------------------------- #----------------------------------------------------------------------------- #----------------------------------------------------------------------------- # # What operators to measure # names = [r'Re($S_1$)', r'Re($S_2$)', # r'Im($S_1$)', r'Im($S_2$)'] # # Translation amplitudes # stabilizers = [np.sqrt(pi), 2j*np.sqrt(pi)]*2 # # Qubit measurement angles # angles = [0]*2 + [-pi/2]*2
from gkp.gkp_tf_env import helper_functions as hf from gkp.gkp_tf_env import tf_env_wrappers as wrappers from gkp.gkp_tf_env import gkp_init from simulator.utils import expectation N = 40 target_state = qt.tensor(qt.basis(2, 0), qt.basis(N, 3)) reward_kwargs = {'reward_mode': 'overlap', 'target_state': target_state} kwargs = {'N': N} env = gkp_init(simulate='Alec_universal_gate_set', channel='quantum_jumps', reward_kwargs=reward_kwargs, init='vac', H=1, T=6, attn_step=1, batch_size=1, episode_length=6, encoding='square', **kwargs) # from gkp.action_script import v2_phase_estimation_with_trim_4round as action_script from gkp.action_script import Alec_universal_gate_set_6round as action_script # to_learn = {'alpha':True, 'beta':True, 'phi':False, 'theta':False} to_learn = {'beta': True, 'phi': True} env = wrappers.ActionWrapper(env, action_script, to_learn) root_dir = r'E:\VladGoogleDrive\Qulab\GKP\sims\PPO\CT_qubit_rot\fock3_beta3_B100_tomo100_lr1e-3_baseline_2' policy_dir = r'policy\000076000' policy = tf.compat.v2.saved_model.load(os.path.join(root_dir, policy_dir))
'alpha': [delta + 0j, -1j * delta], 'beta': [b_amp + 0j, 1j * b_amp], 'phi': [pi / 2] * 2, 'theta': [0.0] * 2 } #----------------------------------------------------------------------------- #----------------------------------------------------------------------------- #----------------------------------------------------------------------------- env = gkp_init(simulate='oscillator', init='Z+', H=1, batch_size=1000, episode_length=200, reward_mode='fidelity', channel='diffusion', quantum_circuit_type='v2', encoding='square', N=200) savepath = r'E:\VladGoogleDrive\Qulab\GKP\sims\diffusion_channel' params = [0j] + list(np.linspace(0.0, 0.5, 11, dtype=complex)) make_figure = True #----------------------------------------------------------------------------- #----------------------------------------------------------------------------- #----------------------------------------------------------------------------- lifetimes = np.zeros(len(params))