Ejemplo n.º 1
0
def main(config):
    if config.gpu_growth:
        for gpu in tf.config.experimental.list_physical_devices("GPU"):
            tf.config.experimental.set_memory_growth(gpu, True)
    assert config.precision in (16, 32), config.precision
    if config.precision == 16:
        prec.set_policy(prec.Policy("mixed_float16"))
    config.steps = int(config.steps)
    config.logdir.mkdir(parents=True, exist_ok=True)
    print("Logdir", config.logdir)

    # Create environments.
    datadir = config.logdir / "episodes"
    writer = tf.summary.create_file_writer(
        str(config.logdir), max_queue=1000, flush_millis=20000
    )
    writer.set_as_default()
    train_envs = [
        wrappers.Async(
            lambda: make_env(config, writer, "train", datadir, store=True),
            config.parallel,
        )
        for _ in range(config.envs)
    ]
    test_envs = [
        wrappers.Async(
            lambda: make_env(config, writer, "test", datadir, store=False),
            config.parallel,
        )
        for _ in range(config.envs)
    ]
    actspace = train_envs[0].action_space

    # Prefill dataset with random episodes.
    step = count_steps(datadir, config)
    prefill = max(0, config.prefill - step)
    print(f"Prefill dataset with {prefill} steps.")
    random_agent = lambda o, d, _: ([actspace.sample() for _ in d], None)
    tools.simulate(random_agent, train_envs, prefill / config.action_repeat)
    writer.flush()

    # Train and regularly evaluate the agent.
    step = count_steps(datadir, config)
    print(f"Simulating agent for {config.steps-step} steps.")
    agent = Dreamer(config, datadir, actspace, writer)
    if (config.logdir / "variables.pkl").exists():
        print("Load checkpoint.")
        agent.load(config.logdir / "variables.pkl")
    state = None
    while step < config.steps:
        print("Start evaluation.")
        tools.simulate(functools.partial(agent, training=False), test_envs, episodes=1)
        writer.flush()
        print("Start collection.")
        steps = config.eval_every // config.action_repeat
        state = tools.simulate(agent, train_envs, steps, state=state)
        step = count_steps(datadir, config)
        agent.save(config.logdir / "variables.pkl")
    for env in train_envs + test_envs:
        env.close()
Ejemplo n.º 2
0
def define_simulation_graph(batch_env, algo_cls, config):
    """Define the algortihm and environment interaction.

  Args:
    batch_env: In-graph environments object.
    algo_cls: Constructor of a batch algorithm.
    config: Configuration object for the algorithm.

  Returns:
    Object providing graph elements via attributes.
  """
    # pylint: disable=unused-variable
    step = tf.Variable(0, False, dtype=tf.int32, name='global_step')

    is_training = tf.placeholder(tf.bool, name='is_training')
    should_log = tf.placeholder(tf.bool, name='should_log')
    do_report = tf.placeholder(tf.bool, name='do_report')
    force_reset = tf.placeholder(tf.bool, name='force_reset')

    algo = algo_cls(batch_env, step, is_training, should_log, config)
    #algo_cls=ppo.PPOAlgorithm, and it returns a vectorized implementation of PPO.
    #"""A vectorized implementation of the PPO algorithm by John Schulman."""

    done, score, summary = tools.simulate(batch_env, algo, should_log,
                                          force_reset)
    """"
   tools.simulate Returns:
    Tuple of tensors containing done flags for the current episodes, possibly
    intermediate scores for the episodes, and a summary tensor.
  """
    message = 'Graph contains {} trainable variables.'
    tf.logging.info(message.format(tools.count_weights()))
    # pylint: enable=unused-variable
    return tools.AttrDict(locals())
Ejemplo n.º 3
0
def define_simulation_graph(batch_env, algo_cls, config):
    """Define the algortihm and environment interaction.

  Args:
    batch_env: In-graph environments object.
    algo_cls: Constructor of a batch algorithm.
    config: Configuration object for the algorithm.

  Returns:
    Object providing graph elements via attributes.
  """
    # pylint: disable=unused-variable

    step = tf.Variable(0, False, dtype=tf.int32, name='global_step')
    is_training = tf.placeholder(tf.bool, name='is_training')
    should_log = tf.placeholder(tf.bool, name='should_log')
    do_report = tf.placeholder(tf.bool, name='do_report')
    force_reset = tf.placeholder(tf.bool, name='force_reset')
    algo = algo_cls(batch_env, step, is_training, should_log, config)
    done, score, summary = tools.simulate(batch_env, algo, should_log,
                                          force_reset)
    message = 'Graph contains {} trainable variables.'
    tf.logging.info(message.format(tools.count_weights()))
    # pylint: enable=unused-variable
    return tools.AttrDict(locals())
Ejemplo n.º 4
0
 def __init__(self,
              checkpoint_path,
              obs_type="lidar",
              action_dist="tanh_normal"):
     config = init_config(obs_type, action_dist)
     writer, datadir, env = make_initialization_env(config)
     random_agent = lambda o, d, s: ([env.action_space['A'].sample()], None)
     tools.simulate([random_agent for _ in range(env.n_agents)],
                    env,
                    config,
                    datadir,
                    writer,
                    prefix='prefill',
                    episodes=10)
     # initialize model
     actspace, obspace = env.action_space, env.observation_space
     super().__init__(config, datadir, actspace, obspace, writer=None)
     shutil.rmtree(datadir)  # remove tmp directory
     self.load(checkpoint_path)
     print(f"[Info] Agent Variables: {len(self.variables)}")
Ejemplo n.º 5
0
def main(config):
    # Setup logging
    setup_experiments(config)
    config.steps = int(config.steps)
    config.logdir, datadir, cp_dir = create_log_dirs(config)
    #set_logging(config)
    writer = tf.summary.create_file_writer(str(config.logdir),
                                           max_queue=1000,
                                           flush_millis=20000)
    writer.set_as_default()
    print(f"[Info] Logdir {config.logdir}")

    # Create environments.
    train_env = make_train_env(config, writer, datadir, gui=False)
    test_env = make_test_env(config, writer, datadir, gui=False)
    agent_ids = train_env.agent_ids
    actspace = train_env.action_space
    obspace = train_env.observation_space

    # Prefill phase.
    step = tools.count_steps(datadir, config)
    prefill = max(0, config.prefill - step)
    print(
        f'[Info] Prefill dataset (strategy={config.prefill_agent}) with {prefill} steps.'
    )
    # Choose prefill strategy.
    if config.prefill_agent == 'random':
        # Prefill strategy: random actions
        random_agent = lambda o, d, s: (
            [train_env.action_space[agent_ids[0]].sample()], None)
        agents = [random_agent for _ in range(train_env.n_agents)]
    elif config.prefill_agent == 'gap_follower':
        # Prefil strategy: FTG with a fixed low speed (negative value because of shifting in 0,1)
        ftg = GapFollower()
        fix_speed = -0.96
        gap_follower_agent = lambda o, d, s: (
            [np.clip(np.array([fix_speed, ftg.action(o)[-1]]), -1, +1)], None)
        agents = [gap_follower_agent for _ in range(train_env.n_agents)]
    else:
        raise NotImplementedError(
            f'prefill agent {config.prefill_agent} not implemented')
    # Run prefill simulations.
    tools.simulate(agents,
                   train_env,
                   config,
                   datadir,
                   writer,
                   prefix='prefill',
                   steps=prefill / config.action_repeat,
                   agents_ids=agent_ids)
    writer.flush()

    # Initialize Dreamer model
    step = tools.count_steps(datadir, config)
    agent = Dreamer(config, datadir, actspace, obspace, writer)
    # Resume last checkpoint (checkpoints pattern `{checkpoint_dir}/{step}.pkl`
    checkpoints = sorted(cp_dir.glob('*pkl'),
                         key=lambda f: int(f.name.split('.')[0]))
    if len(checkpoints):
        try:
            agent.load(checkpoints[-1])
            print('Load checkpoint.')
        except:
            raise Exception(
                f"the resume of checkpoint {checkpoints[-1]} failed")

    # Train and Evaluate the agent over the simulation process
    print(f'[Info] Simulating agent for {config.steps - step} steps.')
    simulation_state = None
    best_test_return = 0.0  # for storing the best model so far
    while step < config.steps:
        # Evaluation phase
        print('[Info] Start evaluation.')
        eval_agent = functools.partial(agent, training=False)
        eval_agents = [eval_agent for _ in range(train_env.n_agents)
                       ]  # for multi-agent compatibility
        _, cum_reward = tools.simulate(eval_agents,
                                       test_env,
                                       config,
                                       datadir,
                                       writer,
                                       prefix='test',
                                       episodes=config.eval_episodes,
                                       agents_ids=agent_ids)
        writer.flush()
        # Save best model
        if cum_reward > best_test_return:
            best_test_return = cum_reward
            print(f'[Info] Found New Best Model: {best_test_return:.5f}')
            for model in [
                    agent._encode, agent._dynamics, agent._decode,
                    agent._reward, agent._actor
            ]:
                model.save(cp_dir / 'best' / f'{model._name}.pkl')
            agent.save(cp_dir / 'best' /
                       'variables.pkl')  # store also the whole model
        # Save regular checkpoint
        step = tools.count_steps(datadir, config)
        agent.save(cp_dir / f'{step}.pkl')
        # Training phase
        print('[Info] Start collection.')
        steps = config.eval_every // config.action_repeat  # compute the n steps until next evaluation
        train_agent = functools.partial(
            agent, training=True)  # for multi-agent: only 1 agent is training
        eval_agent = functools.partial(
            agent,
            training=False)  # the other ones are fixed in evaluation mode
        training_agents = [train_agent] + [
            eval_agent for _ in range(train_env.n_agents - 1)
        ]
        simulation_state, _ = tools.simulate(training_agents,
                                             train_env,
                                             config,
                                             datadir,
                                             writer,
                                             prefix='train',
                                             steps=steps,
                                             sim_state=simulation_state,
                                             agents_ids=agent_ids)
        step = tools.count_steps(datadir, config)
Ejemplo n.º 6
0
d = {"A": 2, "W": 2, 'Z': 2, "Y": 1, "X": 2, "H": 1}
d['O'] = d['X'] + d['Y'] + d['H']

### Parameters
beta = Id(d['A'], d['W'])
pars = {
    'M': np.random.poisson(lam=2, size=(d['O'], d['A'])),
    'B': np.random.normal(size=(d['O'], d['O'])) / 3,
    'beta': beta
}
np.fill_diagonal(pars['B'], 0)
noise_W = np.array([[0.5, 0], [-0.8, 1.5]])

# 1) Simulate
n = 100000
data = simulate(n, d, pars, noise_W=noise_W)
A, X, Y, W, Z = data['A'], data['X'], data['Y'], data['W'], data['Z']

### Simulation setups
# rotat = np.diag([np.sqrt(2), 1])
rotat = np.array([[1.6, 0.8], [-0.8, .5]])
shift = np.array([3, -3])

# Set lambda
eta = shift.reshape(-1, 1)
lamb = np.linalg.eigvals(eta @ eta.T).max() - 1

### Save matrix eigendecomposition for plotting in R
radius, U = np.linalg.eig(rotat @ rotat.T)
np.savetxt("figures/figures3-4/extra-files/figure-4-matrix-eigvals.csv",
           np.concatenate(
Ejemplo n.º 7
0
def main(config):
    print(config)
    os.environ["CUDA_VISIBLE_DEVICES"] = str(config.gpu_id)
    if config.gpu_growth:
        # for gpu in tf.config.experimental.list_physical_devices('GPU'):
        #   tf.config.experimental.set_memory_growth(gpu, True)
        print(tf.config.experimental.list_physical_devices('GPU'))
        tf.config.experimental.set_memory_growth(
            tf.config.experimental.list_physical_devices('GPU')[0], True)
    assert config.precision in (16, 32), config.precision
    if config.precision == 16:
        prec.set_policy(prec.Policy('mixed_float16'))
    config.steps = int(config.steps)
    config.logdir.mkdir(parents=True, exist_ok=True)
    print('Logdir', config.logdir)

    # Create environments.
    datadir = config.logdir / 'episodes'
    writer = tf.summary.create_file_writer(str(config.logdir),
                                           max_queue=1000,
                                           flush_millis=20000)
    writer.set_as_default()
    train_envs = [
        wrappers.Async(
            lambda: make_env(config, writer, 'train', datadir, store=True),
            config.parallel) for _ in range(config.envs)
    ]
    test_envs = [
        wrappers.Async(
            lambda: make_env(config, writer, 'test', datadir, store=False),
            config.parallel) for _ in range(config.envs)
    ]
    actspace = train_envs[0].action_space

    # Prefill dataset with random episodes.
    step = count_steps(datadir, config)
    prefill = max(0, config.prefill - step)
    print(f'Prefill dataset with {prefill} steps.')
    random_agent = lambda o, d, _: ([actspace.sample() for _ in d], None)
    tools.simulate(random_agent, train_envs, prefill / config.action_repeat)
    writer.flush()

    # Train and regularly evaluate the agent.
    step = count_steps(datadir, config)
    print(f'Simulating agent for {config.steps-step} steps.')
    agent = Dreamer(config, datadir, actspace, writer)
    if (config.logdir / 'variables.pkl').exists():
        print('Load checkpoint.')
        agent.load(config.logdir / 'variables.pkl')
    state = None
    last_time = time.time()
    while step < config.steps:
        print("current_time is:", time.time() - last_time)
        last_time = time.time()
        print('Start evaluation.')
        tools.simulate(functools.partial(agent, training=False),
                       test_envs,
                       episodes=1)
        writer.flush()
        print('Start collection.')
        steps = config.eval_every // config.action_repeat
        state = tools.simulate(agent, train_envs, steps, state=state)
        step = count_steps(datadir, config)
        agent.save(config.logdir / 'variables.pkl')
    for env in train_envs + test_envs:
        env.close()
Ejemplo n.º 8
0
    s2 = (1-x)/x

    # Pack parameter inputs to population mean functions
    params = pack_params(pars, c, d, np.sqrt(s2))

    # We only the population version once at every value of s^2
    results.append([theo, x] + [get_mse_v(gamma_ar(params, lamb), v, params, c),
                                get_mse_v(gamma_par(params, lamb), v, params, c),
                                get_mse_v(gamma_cross(params, lamb), v, params, c),
                                get_mse_v(gamma_ols(params), v, params, c)])

    # Loop over simulations
    for n in [250, 2500]:
        for _ in range(5000):
            # Simulate training data
            data = simulate(n, d, pars, noise_W=np.sqrt(s2), noise_Z=np.sqrt(s2))
            X, Y, A, W, Z = data['X'], data['Y'], data['A'], data['W'], data['Z']

            # Compute estimators from training data
            gammas = {
                'ar': ar(X, Y, A, lamb=lamb),
                "par": ar(X, Y, W, lamb=lamb),
                "cross": cross(X, Y, W, Z, lamb=lamb),
                "ols": ols(X, Y)}

            # Simulate test data from intervention do(A:=v)
            test_data = simulate(n, d, pars, noise_W=np.sqrt(s2), noise_Z=np.sqrt(s2), v=v)

            # Append results
            results.append([n, x] + [get_mse(test_data, gamma)
                                     for gamma in gammas.values()])
Ejemplo n.º 9
0
def main(config):
    if config.gpu_growth:
        for gpu in tf.config.experimental.list_physical_devices('GPU'):
            tf.config.experimental.set_memory_growth(gpu, True)
    assert config.precision in (16, 32), config.precision
    if config.precision == 16:
        prec.set_policy(prec.Policy('mixed_float16'))
    config.steps = int(config.steps)
    config.logdir.mkdir(parents=True, exist_ok=True)
    print('Logdir', config.logdir)

    # Create environments.
    datadir = config.logdir / 'episodes'
    writer = tf.summary.create_file_writer(str(config.logdir),
                                           max_queue=1000,
                                           flush_millis=20000)
    writer.set_as_default()
    train_sim_envs = [
        wrappers.Async(
            lambda: make_env(config,
                             writer,
                             'sim_train',
                             datadir,
                             store=True,
                             real_world=False), config.parallel)
        for i in range(config.envs)
    ]
    if config.real_world_prob > 0:
        train_real_envs = [
            wrappers.Async(
                lambda: make_env(config,
                                 writer,
                                 'real_train',
                                 datadir,
                                 store=True,
                                 real_world=True), config.parallel)
            for _ in range(config.envs)
        ]
    else:
        train_real_envs = None
    test_envs = [
        wrappers.Async(
            lambda: make_env(
                config, writer, 'test', datadir, store=False, real_world=True),
            config.parallel) for _ in range(config.envs)
    ]
    actspace = train_sim_envs[0].action_space

    # Prefill dataset with random episodes.
    step = count_steps(datadir, config)
    prefill = max(0, config.prefill - step)
    print(f'Prefill dataset with {prefill} steps.')
    random_agent = lambda o, d, _: ([actspace.sample() for _ in d], None)
    tools.simulate(random_agent, train_sim_envs,
                   prefill / config.action_repeat)
    writer.flush()
    train_real_step_target = config.sample_real_every * config.time_limit

    # Train and regularly evaluate the agent.
    step = count_steps(datadir, config)
    print(f'Simulating agent for {config.steps-step} steps.')
    agent = Dreamer(config, datadir, actspace, writer)
    if (config.logdir / 'variables.pkl').exists():
        print('Load checkpoint.')
        agent.load(config.logdir / 'variables.pkl')
    else:
        print("checkpoint not loaded")
        print(config.logdir / 'variables.pkl')
        print((config.logdir / 'variables.pkl').exists())
    state = None
    while step < config.steps:
        print('Start evaluation.')
        tools.simulate(functools.partial(agent, training=False),
                       test_envs,
                       episodes=1)
        writer.flush()
        steps = config.eval_every // config.action_repeat
        print('Start collection from simulator.')
        state = tools.simulate(agent, train_sim_envs, steps, state=state)
        if step >= train_real_step_target and train_real_envs is not None:
            print("Start collection from the real world")
            state = tools.simulate(agent,
                                   train_real_envs,
                                   episodes=1,
                                   state=state)
            train_real_step_target += config.sample_real_every * config.time_limit
        old_step = step
        step = count_steps(datadir, config)
        agent.save(config.logdir / 'variables.pkl')
    for env in train_sim_envs + test_envs:
        env.close()
    if train_real_envs is not None:
        for env in train_real_envs:
            env.close()
Ejemplo n.º 10
0
def main(logdir, config):
    logdir = pathlib.Path(logdir).expanduser()
    config.traindir = config.traindir or logdir / 'train_eps'
    config.evaldir = config.evaldir or logdir / 'eval_eps'
    config.steps //= config.action_repeat
    config.eval_every //= config.action_repeat
    config.log_every //= config.action_repeat
    config.time_limit //= config.action_repeat
    config.act = getattr(tf.nn, config.act)

    if config.debug:
        tf.config.experimental_run_functions_eagerly(True)
    if config.gpu_growth:
        message = 'No GPU found. To actually train on CPU remove this assert.'
        assert tf.config.experimental.list_physical_devices('GPU'), message
        for gpu in tf.config.experimental.list_physical_devices('GPU'):
            tf.config.experimental.set_memory_growth(gpu, True)
    assert config.precision in (16, 32), config.precision
    if config.precision == 16:
        prec.set_policy(prec.Policy('mixed_float16'))
    print('Logdir', logdir)
    logdir.mkdir(parents=True, exist_ok=True)
    step = count_steps(config.traindir)
    logger = tools.Logger(logdir, config.action_repeat * step)

    print('Create envs.')
    if config.offline_traindir:
        directory = config.offline_traindir.format(**vars(config))
    else:
        directory = config.traindir
    train_eps = tools.load_episodes(directory, limit=config.dataset_size)
    if config.offline_evaldir:
        directory = config.offline_evaldir.format(**vars(config))
    else:
        directory = config.evaldir
    eval_eps = tools.load_episodes(directory, limit=1)
    make = lambda mode: make_env(config, logger, mode, train_eps, eval_eps)
    train_envs = [make('train') for _ in range(config.envs)]
    eval_envs = [make('eval') for _ in range(config.envs)]
    acts = train_envs[0].action_space
    config.num_actions = acts.n if hasattr(acts, 'n') else acts.shape[0]

    prefill = max(0, config.prefill - count_steps(config.traindir))
    print(f'Prefill dataset ({prefill} steps).')
    random_agent = lambda o, d, s: ([acts.sample() for _ in d], s)
    tools.simulate(random_agent, train_envs, prefill)
    tools.simulate(random_agent, eval_envs, episodes=1)
    logger.step = config.action_repeat * count_steps(config.traindir)

    print('Simulate agent.')
    train_dataset = make_dataset(train_eps, config)
    eval_dataset = iter(make_dataset(eval_eps, config))
    agent = Dreamer(config, logger, train_dataset)
    if (logdir / 'variables.pkl').exists():
        agent.load(logdir / 'variables.pkl')
        agent._should_pretrain._once = False

    state = None
    while agent._step.numpy().item() < config.steps:
        logger.write()
        print('Start evaluation.')
        video_pred = agent._wm.video_pred(next(eval_dataset))
        logger.video('eval_openl', video_pred)
        eval_policy = functools.partial(agent, training=False)
        tools.simulate(eval_policy, eval_envs, episodes=1)
        print('Start training.')
        state = tools.simulate(agent,
                               train_envs,
                               config.eval_every,
                               state=state)
        agent.save(logdir / 'variables.pkl')
    for env in train_envs + eval_envs:
        try:
            env.close()
        except Exception:
            pass
Ejemplo n.º 11
0
def main(config):
    print(config)

    #Set random seeds
    os.environ['PYTHONHASHSEED'] = str(config.seed)
    os.environ['TF_CUDNN_DETERMINISTIC'] = '1'
    random.seed(config.seed)
    np.random.seed(config.seed)
    tf.random.set_seed(config.seed)

    if config.gpu_growth:
        for gpu in tf.config.experimental.list_physical_devices('GPU'):
            tf.config.experimental.set_memory_growth(gpu, True)

    config.logdir = config.logdir / config.task
    config.logdir = config.logdir / 'seed_{}'.format(config.seed)
    config.logdir.mkdir(parents=True, exist_ok=True)
    datadir = config.datadir
    tf_dir = config.logdir / 'tensorboard'
    writer = tf.summary.create_file_writer(str(tf_dir),
                                           max_queue=1000,
                                           flush_millis=20000)
    writer.set_as_default()

    # Create environments.
    train_envs = [
        wrappers.Async(
            lambda: make_env(config, writer, 'train', '.', store=False),
            config.parallel) for _ in range(config.envs)
    ]
    test_envs = [
        wrappers.Async(
            lambda: make_env(config, writer, 'test', '.', store=False),
            config.parallel) for _ in range(config.envs)
    ]
    actspace = train_envs[0].action_space

    # Train and regularly evaluate the agent.
    agent = Lompo(config, datadir, actspace, writer)

    if agent._c.load_model:
        agent.load_model(config.logdir / 'final_model')
        print('Load pretarined model')
        if agent._c.load_buffer:
            agent.latent_buffer.load(agent._c.logdir / 'buffer.h5py')
        else:
            agent._process_data_to_latent()
            agent.latent_buffer.save(agent._c.logdir / 'buffer.h5py')
    else:
        agent.fit_model(agent._c.model_train_steps)
        #agent.save_model(config.logdir)
        #agent._generate_real_data(steps = 5000)
        agent._process_data_to_latent()
        agent.latent_buffer.save(agent._c.logdir / 'buffer.h5py')

    if agent._c.load_agent:
        agent.load_agent(config.logdir)
        print('Load pretarined actor')

    while agent.latent_buffer._latent_stored_steps < agent._c.start_training:
        agent._generate_latent_data(next(agent._dataset))

    while agent._agent_step < int(config.agent_train_steps):
        print('Start evaluation.')
        tools.simulate(functools.partial(agent, training=False),
                       test_envs,
                       episodes=1)
        #agent._latent_evaluate(train_envs[0])
        writer.flush()
        print('Start collection.')
        agent.train_agent(agent._c.agent_itters_per_step)
        #agent._generate_real_data(steps = 5)

        if config.sample:
            agent._add_data(num_episodes=1)
        else:
            agent._process_data_to_latent(num_episodes=1)

    for env in train_envs + test_envs:
        env.close()
gammas = {"ols": gamma_ols(params),
          "par5": gamma_par(params, lamb),
          "par10": gamma_par(params, lamb2),
          "cross": gamma_cross(params, lamb),
          "ar": gamma_ar(params, lamb)
          }

# 2) Simulate interventions for scatter plot
results = {k: [] for k in gammas.keys()}
for intervention_strength in tqdm(np.arange(50)/8):
    # Interventions
    vs = N(int(8*(intervention_strength + 0.1)**1.1), d['A'])
    for v in vs:
        # Normalize
        v *= intervention_strength  # /norm(v)

        # Evaluate estimators in intervened dataset
        for method, gamma in gammas.items():
            results[method].append([intervention_strength,
                                    get_mse(simulate(n=50000, d=d, pars=pars, v=v, noise_W=noise_W), gamma),
                                    method,
                                    v])

# Convert to dataframe
df = pd.concat(pd.DataFrame(results[method], columns=(
    "Strength", "MSE", "Method", "A")) for method in gammas.keys()).reset_index(drop=True)
# Add columns with intervened value to plot in A-space
df = df.join(pd.DataFrame(df.A.tolist(), index=df.index,
                          columns=[f"A{i}" for i in range(d['A'])]))
df.to_csv("figures/figures3-4/figure-3-data.csv")
# Pack parameters in dict
pars = {'M': M, 'B': B, 'beta': Id(d['A']), 'beta_z': Id(d['A'])}

# Variable 'noise' specifies the error variance of the proxies.
# The experiment regards considers a larger variance in proxy of A1 than in A2.
noise = np.diag([1 for i in cA1] + [3 for i in cA2])

# 1) Simulate
n = 10000
out = None

# Loop repeats experiment 1000 times
for _ in tqdm(range(1000)):
    # Simulate data
    data = simulate(n, d, pars, noise_W=noise)
    X, Y, A, W, Z = data['X'], data['Y'], data['A'], data['W'], data['Z']

    # Fit estimators
    par5 = ar(X, Y, W, lamb=5)
    cross5 = cross(X, Y, W, Z, lamb=5)
    ar5 = ar(X, Y, A, lamb=5)

    # Cast to dataframe
    df = pd.DataFrame(cb(par5, cross5, ar5), columns=["par5", "cross5", "ar5"])

    # 'Causal' encodes for whether predictor is causal.
    df['Causal'] = 3 * [1] + 3 * [0]
    # 'X.coord' encodes variable number (e.g. X_1, X_2, X_3, ...)
    df['X.coord'] = np.arange(1, 7)
    # Melt dataframe
Ejemplo n.º 14
0
def main(config):
    if config.gpu_growth:
        for gpu in tf.config.experimental.list_physical_devices('GPU'):
            tf.config.experimental.set_memory_growth(gpu, True)
    assert config.precision in (16, 32), config.precision
    if config.precision == 16:
        prec.set_policy(prec.Policy('mixed_float16'))
    config.steps = int(config.steps)
    config.logdir.mkdir(parents=True, exist_ok=True)
    print('Logdir', config.logdir)

    arg_dict = vars(config).copy()
    del arg_dict['logdir']

    with open(os.path.join(config.logdir, 'args.json'), 'w') as fout:
        import json
        json.dump(arg_dict, fout)

    # Create environments.
    datadir = config.logdir / 'episodes'
    writer = tf.summary.create_file_writer(str(config.logdir),
                                           max_queue=1000,
                                           flush_millis=20000)
    writer.set_as_default()
    train_envs = [
        wrappers.Async(
            lambda: make_env(config, writer, 'train', datadir, train=True),
            config.parallel) for _ in range(config.envs)
    ]
    test_envs = [
        wrappers.Async(
            lambda: make_env(config, writer, 'test', datadir, train=False),
            config.parallel) for _ in range(config.envs)
    ]
    actspace = train_envs[0].action_space

    # Prefill dataset with random episodes.
    step = count_steps(datadir, config)
    prefill = max(0, config.prefill - step)
    print(f'Prefill dataset with {prefill} steps.')

    def random_agent(o, d, _):
        return ([actspace.sample() for _ in d], None)

    tools.simulate(random_agent, train_envs, prefill / config.action_repeat)
    writer.flush()

    # Train and regularly evaluate the agent.
    step = count_steps(datadir, config)
    print(f'Simulating agent for {config.steps-step} steps.')
    agent = CVRL(config, datadir, actspace, writer)
    if (config.logdir / 'variables.pkl').exists():
        print('Load checkpoint.')
        agent.load(config.logdir / 'variables.pkl')
    state = None
    while step < config.steps:
        print('Start evaluation.')
        tools.simulate(functools.partial(agent, training=False),
                       test_envs,
                       episodes=1)
        writer.flush()
        print('Start collection.')
        steps = config.eval_every // config.action_repeat
        state = tools.simulate(agent, train_envs, steps, state=state)
        step = count_steps(datadir, config)
        agent.save(config.logdir / 'variables.pkl')
    for env in train_envs + test_envs:
        env.close()
        "eta_sim": shift,
        "cov_A_tar": rotat @ rotat.T,
        "cov_A_sim": rotat @ rotat.T
    }
}
# We select lambda such that B B.T + eta eta.T <= (1+lambda) Id (EAA.T = Id)
eta = shift.reshape(-1, 1)
lamb = np.linalg.eigvals(rotat @ rotat.T + eta @ eta.T).max() - 1

### Simulate
results = []
n = 10000  # training size
m = 10000  # test size
for i in tqdm(range(10000)):
    # Simulate training data
    data = simulate(n, d, pars)
    A, X, Y, W, Z = data['A'], data['X'], data['Y'], data['W'], data['Z']

    # Fit estimators
    lamb = 4
    gammas = {"ols": ols(X, Y), "ar": ar(X, Y, A, lamb=lamb)}

    for setup, s in sim_setups.items():
        # Get simulation settings
        eta_tar, eta_sim, cov_A_tar, cov_A_sim = list(s.values())

        # Target etstimator
        gamma_tar, alpha_tar = tar(X, Y, A, Sigma=cov_A_tar, nu=eta_tar)

        # Simulate test data
        _data = simulate(m, d, pars, v=eta_sim, cov_A=cov_A_sim)  #v=eta_sim