def experiment(log_dir, variant_overwrite, cpu=False): if not cpu: ptu.set_gpu_mode(True) # optionally set the GPU (default=False) # Load experiment from file. env, _, data, variant = load_experiment(log_dir, variant_overwrite) assert all([ a == b for a, b in zip(env.sampled_goal, variant['env_kwargs']['goal_prior']) ]) # Set log directory. exp_id = 'eval/ne{}-mpl{}-{}-rs{}/nhp{}'.format( variant['algo_kwargs']['num_episodes'], variant['algo_kwargs']['max_path_length'], ','.join(variant_overwrite['env_kwargs']['shaped_rewards']), variant['algo_kwargs']['reward_scale'], variant['historical_policies_kwargs']['num_historical_policies'], ) exp_id = create_exp_name(exp_id) out_dir = os.path.join(log_dir, exp_id) print('Logging to:', out_dir) setup_logger( log_dir=out_dir, variant=variant, snapshot_mode='none', snapshot_gap=50, ) # Load trained model from file. policy = data['policy'] vf = data['vf'] qf = data['qf'] algorithm = SoftActorCritic( env=env, training_env=env, # can't clone box2d env cause of swig save_environment=False, # can't save box2d env cause of swig policy=policy, qf=qf, vf=vf, **variant['algo_kwargs'], ) # Overwrite algorithm for p(z) adaptation (if model is SMM). if variant['intrinsic_reward'] == 'smm': discriminator = data['discriminator'] density_model = data['density_model'] SMMHook(base_algorithm=algorithm, discriminator=discriminator, density_model=density_model, **variant['smm_kwargs']) # Overwrite algorithm for historical averaging. if variant['historical_policies_kwargs']['num_historical_policies'] > 0: HistoricalPoliciesHook( base_algorithm=algorithm, log_dir=log_dir, **variant['historical_policies_kwargs'], ) algorithm.to(ptu.device) algorithm.train()
def experiment(log_dir, variant_overwrite, cpu=False): if not cpu: ptu.set_gpu_mode(True) # optionally set the GPU (default=False) # Load experiment from file. env, _, data, variant = load_experiment(log_dir, variant_overwrite) #assert all([a == b for a, b in zip(print(samples)env.sampled_goal, variant['env_kwargs']['goal_prior'])]) # Set log directory. exp_id = 'eval/ne{}-mpl{}-{}-rs{}/nhp{}'.format( variant['algo_kwargs']['num_episodes'], variant['algo_kwargs']['max_path_length'], ','.join(variant_overwrite['env_kwargs']['shaped_rewards']), variant['algo_kwargs']['reward_scale'], variant['historical_policies_kwargs']['num_historical_policies'], ) exp_id = create_exp_name(exp_id) out_dir = os.path.join(log_dir, exp_id) print('Logging to:', out_dir) setup_logger( log_dir=out_dir, variant=variant, snapshot_mode='none', snapshot_gap=50, ) # Load trained model from file. policy = data['policy'] vf = data['vf'] qf = data['qf'] algorithm = SoftActorCritic( env=env, training_env=env, # can't clone box2d env cause of swig save_environment=False, # can't save box2d env cause of swig policy=policy, qf=qf, vf=vf, **variant['algo_kwargs'], ) # Overwrite algorithm for p(z) adaptation (if model is SMM). if variant['intrinsic_reward'] == 'smm': discriminator = data['discriminator'] density_model = data['density_model'] SMMHook(base_algorithm=algorithm, discriminator=discriminator, density_model=density_model, **variant['smm_kwargs']) # Overwrite algorithm for historical averaging. if variant['historical_policies_kwargs']['num_historical_policies'] > 0: HistoricalPoliciesHook( base_algorithm=algorithm, log_dir=log_dir, **variant['historical_policies_kwargs'], ) algorithm.to(ptu.device) #algorithm.train() samples = algorithm.get_eval_paths() #for path in samples: # print(path['observations']) #plt.figure() #plt.plot(samples[0]['observations'][:, 0], samples[0]['observations'][:, 1]) #plt.plot(3, 2) #plt.show() print(env.reset()) print(samples[0]['observations']) i = 0 for path in samples: np.save('./outtem/out%i.npy' % i, path['observations']) i = i + 1 #print(algorithm.policy.get_action(np.array([0,0]))) from rlkit.samplers.util import rollout from rlkit.samplers.in_place import InPlacePathSampler #path=rollout(env,algorithm.eval_policy,50) eval_sampler = InPlacePathSampler( env=env, policy=algorithm.eval_policy, max_samples=100, max_path_length=50, ) path = algorithm.eval_sampler.obtain_samples() print(path[0]['observations'])
def gen_state_space(toposorted_nodes): return itertools.product((0, 1), repeat=len(toposorted_nodes)) def extract_state_space_configuration_as_dict(configuration, toposorted_nodes, index): configuration_dict = {} for node_index, node in enumerate(toposorted_nodes): configuration_dict[node] = configuration[node_index] return configuration_dict for EXPERIMENT_ID in tqdm(EXPERIMENT_RANGE): model, intervention, evidence, var_to_predict = load_experiment(EXPERIMENT_ID) toposorted_nodes = toposort(model) logprobs = [] for index, configuration in enumerate(gen_state_space(toposorted_nodes)): # The values of a particular state space configurations. configuration_dict = extract_state_space_configuration_as_dict( configuration, toposorted_nodes, index ) # (Note: for endogenous variables, these are the values of # its noisy-flipper rather than the endogenous node values themselves.)
def run_pyro(experiment_id, num_samples, type_to_run): # Note that for Pyro we use this number of samples twice because we sample twice: # once for abduction step and another time for prediction step. # See the paper for more details. assert type_to_run == "pyro_with_guide" or type_to_run == "pyro_without_guide" model, intervention, evidence, var_to_predict = load_experiment( experiment_id) toposorted_nodes = toposort(model) USE_GUIDE = type_to_run == "pyro_with_guide" exog_sites = set() for var_name in toposorted_nodes: if model[var_name]['type'] == "exogenous": exog_sites.add(var_name) else: # a noisy flipper of an endogenous variable: exog_sites.add(get_noisy_flipper_name(var_name)) exog_sites_list = list(exog_sites) def create_prob_proc_object__pyro(var_name, dict_values, data, guide, exog_values=None): if model[var_name]['type'] == "exogenous": if exog_values is not None and var_name in exog_values: dict_values[var_name] = exog_values[var_name] else: dict_values[var_name] = pyro.sample( var_name, dist.Bernoulli(model[var_name]['prior']), obs=data.get(var_name, None)) elif model[var_name]['type'] == "endogenous": weights = model[var_name]['parameters'] parents = model[var_name]['parents'] sum_parents = 0 for index, parent in enumerate(parents): sum_parents += weights[index] * float(dict_values[parent]) if sum_parents > 0.5: # activation function val = 1 else: val = 0 exog_noise_parent_name = get_noisy_flipper_name(var_name) if exog_values is not None and exog_noise_parent_name in exog_values: dict_values[exog_noise_parent_name] = exog_values[ exog_noise_parent_name] else: if USE_GUIDE and guide == True and var_name in data: if round(float(val)) == round(float(data[var_name])): noise_flipper_prob = 0.0 else: noise_flipper_prob = 1.0 else: noise_flipper_prob = model[var_name]['flip_noise'] dict_values[exog_noise_parent_name] = pyro.sample( exog_noise_parent_name, dist.Bernoulli(noise_flipper_prob), ) if round(float(dict_values[exog_noise_parent_name])) == 1: val = 1 if val == 0 else 0 dict_values[var_name] = pyro.sample(var_name, dist.Delta( torch.tensor(val).float()), obs=data.get(var_name, None)) def generative_model(data={}, exog_values=None): dict_values = {} for node in toposorted_nodes: create_prob_proc_object__pyro(node, dict_values, data, guide=False, exog_values=exog_values) return dict_values def guide(data={}): dict_values = {} for node in toposorted_nodes: create_prob_proc_object__pyro(node, dict_values, data, guide=True, exog_values=None) return dict_values def abduction(evidence, n_samples): evidence = {d: torch.tensor(evidence[d]).float() for d in evidence} guide_to_use = None if USE_GUIDE: guide_to_use = guide posterior = pyro.infer.Importance(generative_model, guide_to_use, n_samples) posterior.run(evidence) if math.isnan(float(posterior.get_ESS())): raise AllSamplesRejectedException posterior = pyro.infer.EmpiricalMarginal(posterior, sites=exog_sites_list) return posterior def intervention_prediction(node_of_interest, intervention, posterior, n_samples): intervention = { k: torch.tensor(intervention[k]).float().flatten() for k in intervention } intervened_model = pyro.do(generative_model, data=intervention) estimate = [] for _ in range(n_samples): exog_values_ = posterior.sample() exog_values = {} for index, var_name in enumerate(exog_sites_list): if var_name not in intervention.keys(): exog_values[var_name] = exog_values_[index] intervened_model_with_values = intervened_model( exog_values=exog_values) result = intervened_model_with_values[node_of_interest] estimate.append(result) return estimate start = timeit.default_timer() posterior = abduction(evidence, num_samples) results = intervention_prediction(var_to_predict, intervention, posterior, num_samples) stop = timeit.default_timer() time_took = stop - start result = float(numpy.mean(results)) return result, time_took
OUTPUT_FOLDER = "output/" os.makedirs(OUTPUT_FOLDER, exist_ok=True) parser = argparse.ArgumentParser(description='Visualise a network') parser.add_argument( '--exp_id', required=False, type=int, default=0, help='an experiment id of a network', ) args = parser.parse_args() model, intervention, evidence, var_to_predict = load_experiment(args.exp_id) Graph = DiGraph() def get_noisy_flipper_name(node_id): return node_id + "_NF" all_nodes = [] for node_id, node_data in model.items(): all_nodes.append(node_id) if node_data['type'] == "endogenous": # Noisy Flippers: all_nodes.append(get_noisy_flipper_name(node_id))