def record_demo(use_discrete, num_visual=0, num_vector=1): env = RecordEnvironment( [BRAIN_NAME], use_discrete=use_discrete, num_visual=num_visual, num_vector=num_vector, n_demos=100, ) # If we want to use true demos, we can solve the env in the usual way # Otherwise, we can just call solve to execute the optimal policy env.solve() agent_info_protos = env.demonstration_protos[BRAIN_NAME] meta_data_proto = DemonstrationMetaProto() brain_param_proto = BrainParametersProto( vector_action_size=[2] if use_discrete else [1], vector_action_descriptions=[""], vector_action_space_type=discrete if use_discrete else continuous, brain_name=BRAIN_NAME, is_training=True, ) action_type = "Discrete" if use_discrete else "Continuous" demo_path_name = "1DTest" + action_type + ".demo" demo_path = str(tmpdir_factory.mktemp("tmp_demo").join(demo_path_name)) write_demo(demo_path, meta_data_proto, brain_param_proto, agent_info_protos) return demo_path
def record_demo(action_sizes, num_visual=0, num_vector=1): env = RecordEnvironment( [BRAIN_NAME], action_sizes=action_sizes, num_visual=num_visual, num_vector=num_vector, n_demos=100, ) # If we want to use true demos, we can solve the env in the usual way # Otherwise, we can just call solve to execute the optimal policy env.solve() agent_info_protos = env.demonstration_protos[BRAIN_NAME] meta_data_proto = DemonstrationMetaProto() continuous_action_size, discrete_action_size = action_sizes action_spec_proto = ActionSpecProto( num_continuous_actions=continuous_action_size, num_discrete_actions=discrete_action_size, discrete_branch_sizes=[2] if discrete_action_size > 0 else None, ) brain_param_proto = BrainParametersProto(brain_name=BRAIN_NAME, is_training=True, action_spec=action_spec_proto) action_type = "Discrete" if action_sizes else "Continuous" demo_path_name = "1DTest" + action_type + ".demo" demo_path = str(tmpdir_factory.mktemp("tmp_demo").join(demo_path_name)) write_demo(demo_path, meta_data_proto, brain_param_proto, agent_info_protos) return demo_path