def main(config): if config.gpu_growth: for gpu in tf.config.experimental.list_physical_devices("GPU"): tf.config.experimental.set_memory_growth(gpu, True) assert config.precision in (16, 32), config.precision if config.precision == 16: prec.set_policy(prec.Policy("mixed_float16")) config.steps = int(config.steps) config.logdir.mkdir(parents=True, exist_ok=True) print("Logdir", config.logdir) # Create environments. datadir = config.logdir / "episodes" writer = tf.summary.create_file_writer( str(config.logdir), max_queue=1000, flush_millis=20000 ) writer.set_as_default() train_envs = [ wrappers.Async( lambda: make_env(config, writer, "train", datadir, store=True), config.parallel, ) for _ in range(config.envs) ] test_envs = [ wrappers.Async( lambda: make_env(config, writer, "test", datadir, store=False), config.parallel, ) for _ in range(config.envs) ] actspace = train_envs[0].action_space # Prefill dataset with random episodes. step = count_steps(datadir, config) prefill = max(0, config.prefill - step) print(f"Prefill dataset with {prefill} steps.") random_agent = lambda o, d, _: ([actspace.sample() for _ in d], None) tools.simulate(random_agent, train_envs, prefill / config.action_repeat) writer.flush() # Train and regularly evaluate the agent. step = count_steps(datadir, config) print(f"Simulating agent for {config.steps-step} steps.") agent = Dreamer(config, datadir, actspace, writer) if (config.logdir / "variables.pkl").exists(): print("Load checkpoint.") agent.load(config.logdir / "variables.pkl") state = None while step < config.steps: print("Start evaluation.") tools.simulate(functools.partial(agent, training=False), test_envs, episodes=1) writer.flush() print("Start collection.") steps = config.eval_every // config.action_repeat state = tools.simulate(agent, train_envs, steps, state=state) step = count_steps(datadir, config) agent.save(config.logdir / "variables.pkl") for env in train_envs + test_envs: env.close()
def define_simulation_graph(batch_env, algo_cls, config): """Define the algortihm and environment interaction. Args: batch_env: In-graph environments object. algo_cls: Constructor of a batch algorithm. config: Configuration object for the algorithm. Returns: Object providing graph elements via attributes. """ # pylint: disable=unused-variable step = tf.Variable(0, False, dtype=tf.int32, name='global_step') is_training = tf.placeholder(tf.bool, name='is_training') should_log = tf.placeholder(tf.bool, name='should_log') do_report = tf.placeholder(tf.bool, name='do_report') force_reset = tf.placeholder(tf.bool, name='force_reset') algo = algo_cls(batch_env, step, is_training, should_log, config) #algo_cls=ppo.PPOAlgorithm, and it returns a vectorized implementation of PPO. #"""A vectorized implementation of the PPO algorithm by John Schulman.""" done, score, summary = tools.simulate(batch_env, algo, should_log, force_reset) """" tools.simulate Returns: Tuple of tensors containing done flags for the current episodes, possibly intermediate scores for the episodes, and a summary tensor. """ message = 'Graph contains {} trainable variables.' tf.logging.info(message.format(tools.count_weights())) # pylint: enable=unused-variable return tools.AttrDict(locals())
def define_simulation_graph(batch_env, algo_cls, config): """Define the algortihm and environment interaction. Args: batch_env: In-graph environments object. algo_cls: Constructor of a batch algorithm. config: Configuration object for the algorithm. Returns: Object providing graph elements via attributes. """ # pylint: disable=unused-variable step = tf.Variable(0, False, dtype=tf.int32, name='global_step') is_training = tf.placeholder(tf.bool, name='is_training') should_log = tf.placeholder(tf.bool, name='should_log') do_report = tf.placeholder(tf.bool, name='do_report') force_reset = tf.placeholder(tf.bool, name='force_reset') algo = algo_cls(batch_env, step, is_training, should_log, config) done, score, summary = tools.simulate(batch_env, algo, should_log, force_reset) message = 'Graph contains {} trainable variables.' tf.logging.info(message.format(tools.count_weights())) # pylint: enable=unused-variable return tools.AttrDict(locals())
def __init__(self, checkpoint_path, obs_type="lidar", action_dist="tanh_normal"): config = init_config(obs_type, action_dist) writer, datadir, env = make_initialization_env(config) random_agent = lambda o, d, s: ([env.action_space['A'].sample()], None) tools.simulate([random_agent for _ in range(env.n_agents)], env, config, datadir, writer, prefix='prefill', episodes=10) # initialize model actspace, obspace = env.action_space, env.observation_space super().__init__(config, datadir, actspace, obspace, writer=None) shutil.rmtree(datadir) # remove tmp directory self.load(checkpoint_path) print(f"[Info] Agent Variables: {len(self.variables)}")
def main(config): # Setup logging setup_experiments(config) config.steps = int(config.steps) config.logdir, datadir, cp_dir = create_log_dirs(config) #set_logging(config) writer = tf.summary.create_file_writer(str(config.logdir), max_queue=1000, flush_millis=20000) writer.set_as_default() print(f"[Info] Logdir {config.logdir}") # Create environments. train_env = make_train_env(config, writer, datadir, gui=False) test_env = make_test_env(config, writer, datadir, gui=False) agent_ids = train_env.agent_ids actspace = train_env.action_space obspace = train_env.observation_space # Prefill phase. step = tools.count_steps(datadir, config) prefill = max(0, config.prefill - step) print( f'[Info] Prefill dataset (strategy={config.prefill_agent}) with {prefill} steps.' ) # Choose prefill strategy. if config.prefill_agent == 'random': # Prefill strategy: random actions random_agent = lambda o, d, s: ( [train_env.action_space[agent_ids[0]].sample()], None) agents = [random_agent for _ in range(train_env.n_agents)] elif config.prefill_agent == 'gap_follower': # Prefil strategy: FTG with a fixed low speed (negative value because of shifting in 0,1) ftg = GapFollower() fix_speed = -0.96 gap_follower_agent = lambda o, d, s: ( [np.clip(np.array([fix_speed, ftg.action(o)[-1]]), -1, +1)], None) agents = [gap_follower_agent for _ in range(train_env.n_agents)] else: raise NotImplementedError( f'prefill agent {config.prefill_agent} not implemented') # Run prefill simulations. tools.simulate(agents, train_env, config, datadir, writer, prefix='prefill', steps=prefill / config.action_repeat, agents_ids=agent_ids) writer.flush() # Initialize Dreamer model step = tools.count_steps(datadir, config) agent = Dreamer(config, datadir, actspace, obspace, writer) # Resume last checkpoint (checkpoints pattern `{checkpoint_dir}/{step}.pkl` checkpoints = sorted(cp_dir.glob('*pkl'), key=lambda f: int(f.name.split('.')[0])) if len(checkpoints): try: agent.load(checkpoints[-1]) print('Load checkpoint.') except: raise Exception( f"the resume of checkpoint {checkpoints[-1]} failed") # Train and Evaluate the agent over the simulation process print(f'[Info] Simulating agent for {config.steps - step} steps.') simulation_state = None best_test_return = 0.0 # for storing the best model so far while step < config.steps: # Evaluation phase print('[Info] Start evaluation.') eval_agent = functools.partial(agent, training=False) eval_agents = [eval_agent for _ in range(train_env.n_agents) ] # for multi-agent compatibility _, cum_reward = tools.simulate(eval_agents, test_env, config, datadir, writer, prefix='test', episodes=config.eval_episodes, agents_ids=agent_ids) writer.flush() # Save best model if cum_reward > best_test_return: best_test_return = cum_reward print(f'[Info] Found New Best Model: {best_test_return:.5f}') for model in [ agent._encode, agent._dynamics, agent._decode, agent._reward, agent._actor ]: model.save(cp_dir / 'best' / f'{model._name}.pkl') agent.save(cp_dir / 'best' / 'variables.pkl') # store also the whole model # Save regular checkpoint step = tools.count_steps(datadir, config) agent.save(cp_dir / f'{step}.pkl') # Training phase print('[Info] Start collection.') steps = config.eval_every // config.action_repeat # compute the n steps until next evaluation train_agent = functools.partial( agent, training=True) # for multi-agent: only 1 agent is training eval_agent = functools.partial( agent, training=False) # the other ones are fixed in evaluation mode training_agents = [train_agent] + [ eval_agent for _ in range(train_env.n_agents - 1) ] simulation_state, _ = tools.simulate(training_agents, train_env, config, datadir, writer, prefix='train', steps=steps, sim_state=simulation_state, agents_ids=agent_ids) step = tools.count_steps(datadir, config)
d = {"A": 2, "W": 2, 'Z': 2, "Y": 1, "X": 2, "H": 1} d['O'] = d['X'] + d['Y'] + d['H'] ### Parameters beta = Id(d['A'], d['W']) pars = { 'M': np.random.poisson(lam=2, size=(d['O'], d['A'])), 'B': np.random.normal(size=(d['O'], d['O'])) / 3, 'beta': beta } np.fill_diagonal(pars['B'], 0) noise_W = np.array([[0.5, 0], [-0.8, 1.5]]) # 1) Simulate n = 100000 data = simulate(n, d, pars, noise_W=noise_W) A, X, Y, W, Z = data['A'], data['X'], data['Y'], data['W'], data['Z'] ### Simulation setups # rotat = np.diag([np.sqrt(2), 1]) rotat = np.array([[1.6, 0.8], [-0.8, .5]]) shift = np.array([3, -3]) # Set lambda eta = shift.reshape(-1, 1) lamb = np.linalg.eigvals(eta @ eta.T).max() - 1 ### Save matrix eigendecomposition for plotting in R radius, U = np.linalg.eig(rotat @ rotat.T) np.savetxt("figures/figures3-4/extra-files/figure-4-matrix-eigvals.csv", np.concatenate(
def main(config): print(config) os.environ["CUDA_VISIBLE_DEVICES"] = str(config.gpu_id) if config.gpu_growth: # for gpu in tf.config.experimental.list_physical_devices('GPU'): # tf.config.experimental.set_memory_growth(gpu, True) print(tf.config.experimental.list_physical_devices('GPU')) tf.config.experimental.set_memory_growth( tf.config.experimental.list_physical_devices('GPU')[0], True) assert config.precision in (16, 32), config.precision if config.precision == 16: prec.set_policy(prec.Policy('mixed_float16')) config.steps = int(config.steps) config.logdir.mkdir(parents=True, exist_ok=True) print('Logdir', config.logdir) # Create environments. datadir = config.logdir / 'episodes' writer = tf.summary.create_file_writer(str(config.logdir), max_queue=1000, flush_millis=20000) writer.set_as_default() train_envs = [ wrappers.Async( lambda: make_env(config, writer, 'train', datadir, store=True), config.parallel) for _ in range(config.envs) ] test_envs = [ wrappers.Async( lambda: make_env(config, writer, 'test', datadir, store=False), config.parallel) for _ in range(config.envs) ] actspace = train_envs[0].action_space # Prefill dataset with random episodes. step = count_steps(datadir, config) prefill = max(0, config.prefill - step) print(f'Prefill dataset with {prefill} steps.') random_agent = lambda o, d, _: ([actspace.sample() for _ in d], None) tools.simulate(random_agent, train_envs, prefill / config.action_repeat) writer.flush() # Train and regularly evaluate the agent. step = count_steps(datadir, config) print(f'Simulating agent for {config.steps-step} steps.') agent = Dreamer(config, datadir, actspace, writer) if (config.logdir / 'variables.pkl').exists(): print('Load checkpoint.') agent.load(config.logdir / 'variables.pkl') state = None last_time = time.time() while step < config.steps: print("current_time is:", time.time() - last_time) last_time = time.time() print('Start evaluation.') tools.simulate(functools.partial(agent, training=False), test_envs, episodes=1) writer.flush() print('Start collection.') steps = config.eval_every // config.action_repeat state = tools.simulate(agent, train_envs, steps, state=state) step = count_steps(datadir, config) agent.save(config.logdir / 'variables.pkl') for env in train_envs + test_envs: env.close()
s2 = (1-x)/x # Pack parameter inputs to population mean functions params = pack_params(pars, c, d, np.sqrt(s2)) # We only the population version once at every value of s^2 results.append([theo, x] + [get_mse_v(gamma_ar(params, lamb), v, params, c), get_mse_v(gamma_par(params, lamb), v, params, c), get_mse_v(gamma_cross(params, lamb), v, params, c), get_mse_v(gamma_ols(params), v, params, c)]) # Loop over simulations for n in [250, 2500]: for _ in range(5000): # Simulate training data data = simulate(n, d, pars, noise_W=np.sqrt(s2), noise_Z=np.sqrt(s2)) X, Y, A, W, Z = data['X'], data['Y'], data['A'], data['W'], data['Z'] # Compute estimators from training data gammas = { 'ar': ar(X, Y, A, lamb=lamb), "par": ar(X, Y, W, lamb=lamb), "cross": cross(X, Y, W, Z, lamb=lamb), "ols": ols(X, Y)} # Simulate test data from intervention do(A:=v) test_data = simulate(n, d, pars, noise_W=np.sqrt(s2), noise_Z=np.sqrt(s2), v=v) # Append results results.append([n, x] + [get_mse(test_data, gamma) for gamma in gammas.values()])
def main(config): if config.gpu_growth: for gpu in tf.config.experimental.list_physical_devices('GPU'): tf.config.experimental.set_memory_growth(gpu, True) assert config.precision in (16, 32), config.precision if config.precision == 16: prec.set_policy(prec.Policy('mixed_float16')) config.steps = int(config.steps) config.logdir.mkdir(parents=True, exist_ok=True) print('Logdir', config.logdir) # Create environments. datadir = config.logdir / 'episodes' writer = tf.summary.create_file_writer(str(config.logdir), max_queue=1000, flush_millis=20000) writer.set_as_default() train_sim_envs = [ wrappers.Async( lambda: make_env(config, writer, 'sim_train', datadir, store=True, real_world=False), config.parallel) for i in range(config.envs) ] if config.real_world_prob > 0: train_real_envs = [ wrappers.Async( lambda: make_env(config, writer, 'real_train', datadir, store=True, real_world=True), config.parallel) for _ in range(config.envs) ] else: train_real_envs = None test_envs = [ wrappers.Async( lambda: make_env( config, writer, 'test', datadir, store=False, real_world=True), config.parallel) for _ in range(config.envs) ] actspace = train_sim_envs[0].action_space # Prefill dataset with random episodes. step = count_steps(datadir, config) prefill = max(0, config.prefill - step) print(f'Prefill dataset with {prefill} steps.') random_agent = lambda o, d, _: ([actspace.sample() for _ in d], None) tools.simulate(random_agent, train_sim_envs, prefill / config.action_repeat) writer.flush() train_real_step_target = config.sample_real_every * config.time_limit # Train and regularly evaluate the agent. step = count_steps(datadir, config) print(f'Simulating agent for {config.steps-step} steps.') agent = Dreamer(config, datadir, actspace, writer) if (config.logdir / 'variables.pkl').exists(): print('Load checkpoint.') agent.load(config.logdir / 'variables.pkl') else: print("checkpoint not loaded") print(config.logdir / 'variables.pkl') print((config.logdir / 'variables.pkl').exists()) state = None while step < config.steps: print('Start evaluation.') tools.simulate(functools.partial(agent, training=False), test_envs, episodes=1) writer.flush() steps = config.eval_every // config.action_repeat print('Start collection from simulator.') state = tools.simulate(agent, train_sim_envs, steps, state=state) if step >= train_real_step_target and train_real_envs is not None: print("Start collection from the real world") state = tools.simulate(agent, train_real_envs, episodes=1, state=state) train_real_step_target += config.sample_real_every * config.time_limit old_step = step step = count_steps(datadir, config) agent.save(config.logdir / 'variables.pkl') for env in train_sim_envs + test_envs: env.close() if train_real_envs is not None: for env in train_real_envs: env.close()
def main(logdir, config): logdir = pathlib.Path(logdir).expanduser() config.traindir = config.traindir or logdir / 'train_eps' config.evaldir = config.evaldir or logdir / 'eval_eps' config.steps //= config.action_repeat config.eval_every //= config.action_repeat config.log_every //= config.action_repeat config.time_limit //= config.action_repeat config.act = getattr(tf.nn, config.act) if config.debug: tf.config.experimental_run_functions_eagerly(True) if config.gpu_growth: message = 'No GPU found. To actually train on CPU remove this assert.' assert tf.config.experimental.list_physical_devices('GPU'), message for gpu in tf.config.experimental.list_physical_devices('GPU'): tf.config.experimental.set_memory_growth(gpu, True) assert config.precision in (16, 32), config.precision if config.precision == 16: prec.set_policy(prec.Policy('mixed_float16')) print('Logdir', logdir) logdir.mkdir(parents=True, exist_ok=True) step = count_steps(config.traindir) logger = tools.Logger(logdir, config.action_repeat * step) print('Create envs.') if config.offline_traindir: directory = config.offline_traindir.format(**vars(config)) else: directory = config.traindir train_eps = tools.load_episodes(directory, limit=config.dataset_size) if config.offline_evaldir: directory = config.offline_evaldir.format(**vars(config)) else: directory = config.evaldir eval_eps = tools.load_episodes(directory, limit=1) make = lambda mode: make_env(config, logger, mode, train_eps, eval_eps) train_envs = [make('train') for _ in range(config.envs)] eval_envs = [make('eval') for _ in range(config.envs)] acts = train_envs[0].action_space config.num_actions = acts.n if hasattr(acts, 'n') else acts.shape[0] prefill = max(0, config.prefill - count_steps(config.traindir)) print(f'Prefill dataset ({prefill} steps).') random_agent = lambda o, d, s: ([acts.sample() for _ in d], s) tools.simulate(random_agent, train_envs, prefill) tools.simulate(random_agent, eval_envs, episodes=1) logger.step = config.action_repeat * count_steps(config.traindir) print('Simulate agent.') train_dataset = make_dataset(train_eps, config) eval_dataset = iter(make_dataset(eval_eps, config)) agent = Dreamer(config, logger, train_dataset) if (logdir / 'variables.pkl').exists(): agent.load(logdir / 'variables.pkl') agent._should_pretrain._once = False state = None while agent._step.numpy().item() < config.steps: logger.write() print('Start evaluation.') video_pred = agent._wm.video_pred(next(eval_dataset)) logger.video('eval_openl', video_pred) eval_policy = functools.partial(agent, training=False) tools.simulate(eval_policy, eval_envs, episodes=1) print('Start training.') state = tools.simulate(agent, train_envs, config.eval_every, state=state) agent.save(logdir / 'variables.pkl') for env in train_envs + eval_envs: try: env.close() except Exception: pass
def main(config): print(config) #Set random seeds os.environ['PYTHONHASHSEED'] = str(config.seed) os.environ['TF_CUDNN_DETERMINISTIC'] = '1' random.seed(config.seed) np.random.seed(config.seed) tf.random.set_seed(config.seed) if config.gpu_growth: for gpu in tf.config.experimental.list_physical_devices('GPU'): tf.config.experimental.set_memory_growth(gpu, True) config.logdir = config.logdir / config.task config.logdir = config.logdir / 'seed_{}'.format(config.seed) config.logdir.mkdir(parents=True, exist_ok=True) datadir = config.datadir tf_dir = config.logdir / 'tensorboard' writer = tf.summary.create_file_writer(str(tf_dir), max_queue=1000, flush_millis=20000) writer.set_as_default() # Create environments. train_envs = [ wrappers.Async( lambda: make_env(config, writer, 'train', '.', store=False), config.parallel) for _ in range(config.envs) ] test_envs = [ wrappers.Async( lambda: make_env(config, writer, 'test', '.', store=False), config.parallel) for _ in range(config.envs) ] actspace = train_envs[0].action_space # Train and regularly evaluate the agent. agent = Lompo(config, datadir, actspace, writer) if agent._c.load_model: agent.load_model(config.logdir / 'final_model') print('Load pretarined model') if agent._c.load_buffer: agent.latent_buffer.load(agent._c.logdir / 'buffer.h5py') else: agent._process_data_to_latent() agent.latent_buffer.save(agent._c.logdir / 'buffer.h5py') else: agent.fit_model(agent._c.model_train_steps) #agent.save_model(config.logdir) #agent._generate_real_data(steps = 5000) agent._process_data_to_latent() agent.latent_buffer.save(agent._c.logdir / 'buffer.h5py') if agent._c.load_agent: agent.load_agent(config.logdir) print('Load pretarined actor') while agent.latent_buffer._latent_stored_steps < agent._c.start_training: agent._generate_latent_data(next(agent._dataset)) while agent._agent_step < int(config.agent_train_steps): print('Start evaluation.') tools.simulate(functools.partial(agent, training=False), test_envs, episodes=1) #agent._latent_evaluate(train_envs[0]) writer.flush() print('Start collection.') agent.train_agent(agent._c.agent_itters_per_step) #agent._generate_real_data(steps = 5) if config.sample: agent._add_data(num_episodes=1) else: agent._process_data_to_latent(num_episodes=1) for env in train_envs + test_envs: env.close()
gammas = {"ols": gamma_ols(params), "par5": gamma_par(params, lamb), "par10": gamma_par(params, lamb2), "cross": gamma_cross(params, lamb), "ar": gamma_ar(params, lamb) } # 2) Simulate interventions for scatter plot results = {k: [] for k in gammas.keys()} for intervention_strength in tqdm(np.arange(50)/8): # Interventions vs = N(int(8*(intervention_strength + 0.1)**1.1), d['A']) for v in vs: # Normalize v *= intervention_strength # /norm(v) # Evaluate estimators in intervened dataset for method, gamma in gammas.items(): results[method].append([intervention_strength, get_mse(simulate(n=50000, d=d, pars=pars, v=v, noise_W=noise_W), gamma), method, v]) # Convert to dataframe df = pd.concat(pd.DataFrame(results[method], columns=( "Strength", "MSE", "Method", "A")) for method in gammas.keys()).reset_index(drop=True) # Add columns with intervened value to plot in A-space df = df.join(pd.DataFrame(df.A.tolist(), index=df.index, columns=[f"A{i}" for i in range(d['A'])])) df.to_csv("figures/figures3-4/figure-3-data.csv")
# Pack parameters in dict pars = {'M': M, 'B': B, 'beta': Id(d['A']), 'beta_z': Id(d['A'])} # Variable 'noise' specifies the error variance of the proxies. # The experiment regards considers a larger variance in proxy of A1 than in A2. noise = np.diag([1 for i in cA1] + [3 for i in cA2]) # 1) Simulate n = 10000 out = None # Loop repeats experiment 1000 times for _ in tqdm(range(1000)): # Simulate data data = simulate(n, d, pars, noise_W=noise) X, Y, A, W, Z = data['X'], data['Y'], data['A'], data['W'], data['Z'] # Fit estimators par5 = ar(X, Y, W, lamb=5) cross5 = cross(X, Y, W, Z, lamb=5) ar5 = ar(X, Y, A, lamb=5) # Cast to dataframe df = pd.DataFrame(cb(par5, cross5, ar5), columns=["par5", "cross5", "ar5"]) # 'Causal' encodes for whether predictor is causal. df['Causal'] = 3 * [1] + 3 * [0] # 'X.coord' encodes variable number (e.g. X_1, X_2, X_3, ...) df['X.coord'] = np.arange(1, 7) # Melt dataframe
def main(config): if config.gpu_growth: for gpu in tf.config.experimental.list_physical_devices('GPU'): tf.config.experimental.set_memory_growth(gpu, True) assert config.precision in (16, 32), config.precision if config.precision == 16: prec.set_policy(prec.Policy('mixed_float16')) config.steps = int(config.steps) config.logdir.mkdir(parents=True, exist_ok=True) print('Logdir', config.logdir) arg_dict = vars(config).copy() del arg_dict['logdir'] with open(os.path.join(config.logdir, 'args.json'), 'w') as fout: import json json.dump(arg_dict, fout) # Create environments. datadir = config.logdir / 'episodes' writer = tf.summary.create_file_writer(str(config.logdir), max_queue=1000, flush_millis=20000) writer.set_as_default() train_envs = [ wrappers.Async( lambda: make_env(config, writer, 'train', datadir, train=True), config.parallel) for _ in range(config.envs) ] test_envs = [ wrappers.Async( lambda: make_env(config, writer, 'test', datadir, train=False), config.parallel) for _ in range(config.envs) ] actspace = train_envs[0].action_space # Prefill dataset with random episodes. step = count_steps(datadir, config) prefill = max(0, config.prefill - step) print(f'Prefill dataset with {prefill} steps.') def random_agent(o, d, _): return ([actspace.sample() for _ in d], None) tools.simulate(random_agent, train_envs, prefill / config.action_repeat) writer.flush() # Train and regularly evaluate the agent. step = count_steps(datadir, config) print(f'Simulating agent for {config.steps-step} steps.') agent = CVRL(config, datadir, actspace, writer) if (config.logdir / 'variables.pkl').exists(): print('Load checkpoint.') agent.load(config.logdir / 'variables.pkl') state = None while step < config.steps: print('Start evaluation.') tools.simulate(functools.partial(agent, training=False), test_envs, episodes=1) writer.flush() print('Start collection.') steps = config.eval_every // config.action_repeat state = tools.simulate(agent, train_envs, steps, state=state) step = count_steps(datadir, config) agent.save(config.logdir / 'variables.pkl') for env in train_envs + test_envs: env.close()
"eta_sim": shift, "cov_A_tar": rotat @ rotat.T, "cov_A_sim": rotat @ rotat.T } } # We select lambda such that B B.T + eta eta.T <= (1+lambda) Id (EAA.T = Id) eta = shift.reshape(-1, 1) lamb = np.linalg.eigvals(rotat @ rotat.T + eta @ eta.T).max() - 1 ### Simulate results = [] n = 10000 # training size m = 10000 # test size for i in tqdm(range(10000)): # Simulate training data data = simulate(n, d, pars) A, X, Y, W, Z = data['A'], data['X'], data['Y'], data['W'], data['Z'] # Fit estimators lamb = 4 gammas = {"ols": ols(X, Y), "ar": ar(X, Y, A, lamb=lamb)} for setup, s in sim_setups.items(): # Get simulation settings eta_tar, eta_sim, cov_A_tar, cov_A_sim = list(s.values()) # Target etstimator gamma_tar, alpha_tar = tar(X, Y, A, Sigma=cov_A_tar, nu=eta_tar) # Simulate test data _data = simulate(m, d, pars, v=eta_sim, cov_A=cov_A_sim) #v=eta_sim