Example #1
0
    def create_agent(self):
        """Instantiate the RL agent who interacts with the environment.

        Returns:
            RL agent

        """
        change_print_color.change('CYAN')
        print("\nCreating Agent...")

        policy_params = {
            'network_model': tf_network,  # tf_network, multi_modal_network, multi_modal_network_fp
            'network_params': {
                'n_layers': 2,  # Number of Hidden layers
                'dim_hidden': [32, 32],  # List of size per n_layers
                'obs_names': self.env.get_obs_info()['names'],
                'obs_dof': self.env.get_obs_info()['dimensions'],  # DoF for observation data tensor
            },
            # Initialization.
            'init_var': 0.1,  # Initial policy variance.
            'ent_reg': 0.0,  # Entropy regularizer (Used to update policy variance)
            # Solver hyperparameters.
            'iterations': self.task_params['tf_iterations'],  # Number of iterations per inner iteration (Default:5000).
            'batch_size': 15,
            'lr': 0.001,  # Base learning rate (by default it's fixed).
            'lr_policy': 'fixed',  # Learning rate policy.
            'momentum': 0.9,  # Momentum.
            'weight_decay': 0.005,  # Weight decay to prevent overfitting.
            'solver_type': 'Adam',  # Solver type (e.g. 'SGD', 'Adam', 'RMSPROP', 'MOMENTUM', 'ADAGRAD').
            # GPU usage.
            'use_gpu': self.task_params['use_gpu'],  # Whether or not to use the GPU for training.
            'gpu_id': 0,
            'gpu_mem_percentage': self.task_params['gpu_mem_percentage'],
            # Training data.
            'fc_only_iterations': 0,  # Iterations of only FC before normal training
            # Others.
            'random_seed': self.hyperparams['seed'] \
                if self.task_params['tf_seed'] == -1 \
                else self.task_params['tf_seed'],  # TF random seed
            'log_dir': self.hyperparams['log_dir'],
            # 'weights_file_prefix': EXP_DIR + 'policy',
        }

        policy_opt = {'type': PolicyOptTf, 'hyperparams': policy_params}

        agent = GPSAgent(act_dim=self.action_dim,
                         obs_dim=self.obs_dim,
                         state_dim=self.state_dim,
                         policy_opt=policy_opt,
                         agent_name='agent' +
                         str('%02d' % self.hyperparams['run_num']))
        print("Agent:%s OK\n" % type(agent).__name__)

        return agent
    'lr_policy': 'fixed',  # Learning rate policy.
    'momentum': 0.9,  # Momentum.
    'weight_decay': 0.005,  # Weight decay.
    'solver_type': 'Adam',  # Solver type (e.g. 'SGD', 'Adam', etc.).
    # set gpu usage.
    'use_gpu': 1,  # Whether or not to use the GPU for training.
    'gpu_id': 0,
    'random_seed': 1,
    'fc_only_iterations': 0,  # TODO: Only forwardcontrol? if it is CNN??
    'gpu_mem_percentage': 0.2,
    # 'weights_file_prefix': EXP_DIR + 'policy',
}
policy_opt = {'type': PolicyOptTf, 'hyperparams': policy_params}

bigman_agent = GPSAgent(act_dim=action_dim,
                        obs_dim=observation_dim,
                        state_dim=state_dim,
                        policy_opt=policy_opt)
print("Bigman Agent:%s OK\n" % type(bigman_agent))

# ################# #
# ################# #
# ##### COSTS ##### #
# ################# #
# ################# #
# Action Cost
act_cost = {
    'type': CostAction,
    'wu': np.ones(action_dim) * 1e-4,
    'target': None,  # Target action value
}
                 'solver_type': 'Adam',  # Solver type (e.g. 'SGD', 'Adam', etc.).
                 # set gpu usage.
                 'use_gpu': 1,  # Whether or not to use the GPU for training.
                 'gpu_id': 0,
                 'random_seed': 1,
                 'fc_only_iterations': 0,  # TODO: Only forwardcontrol? if it is CNN??
                 'gpu_mem_percentage': 0.2,
                 # 'weights_file_prefix': EXP_DIR + 'policy',
             }

policy_opt = {
    'type': PolicyOptTf,
    'hyperparams': policy_params
    }

manipulator2d_agent = GPSAgent(act_dim=action_dim, obs_dim=observation_dim, state_dim=state_dim, policy_opt=policy_opt,
                               agent_name="bigman_agent")


# ################# #
# ################# #
# ##### COSTS ##### #
# ################# #
# ################# #
# Action Cost
act_cost = {
    'type': CostAction,
    'wu': np.ones(action_dim) * 1e-4,
    'target': None,   # Target action value
}

# State Cost
    #     'use_gpu': 1,  # Whether or not to use the GPU for training.
    #     'gpu_id': 0,
    #     'random_seed': 1,
    #     'fc_only_iterations': 0,  # TODO: Only forwardcontrol? if it is CNN??
    #     'gpu_mem_percentage': 0.2,
    #     # 'weights_file_prefix': EXP_DIR + 'policy',
    # },
                ]
bigman_agents = list()
for pp, pol_param in enumerate(policy_params):
    policy_opt = {
        'type': PolicyOptTf,
        'hyperparams': pol_param
        }

    bigman_agents.append(GPSAgent(act_dim=action_dim, obs_dim=observation_dim, state_dim=state_dim, policy_opt=policy_opt,
                                  agent_name="bigman_agent"+str(pp)))
    print("Bigman Agent:%s OK\n" % type(bigman_agents[-1]))
print("TOTAL BIGMAN AGENTS: %d" % len(bigman_agents))


# ################# #
# ################# #
# ##### COSTS ##### #
# ################# #
# ################# #
# Action Cost
act_cost = {
    'type': CostAction,
    'wu': np.ones(action_dim) * 1e-4,
    'target': None,   # Target action value
}
Example #5
0
        'obs_names': bigman_env.get_obs_info()['names'],
        'obs_dof': bigman_env.get_obs_info()['dimensions'],  # DoF for observation data tensor
        'batch_size': 15,  # TODO: Check if this value is OK (same than name_samples)
        #'num_filters': [5, 10],
        #'obs_include': [JOINT_ANGLES, JOINT_VELOCITIES, RGB_IMAGE],  # Deprecated from original GPS code
        #'obs_vector_data': [JOINT_ANGLES, JOINT_VELOCITIES],  # Deprecated from original GPS code
        #'obs_image_data': [RGB_IMAGE],  # Deprecated from original GPS code
        #'sensor_dims': SENSOR_DIMS,  # Deprecated from original GPS code
        #'image_width': IMAGE_WIDTH (80),  # For multi_modal_network
        #'image_height': IMAGE_HEIGHT (64),  # For multi_modal_network
        #'image_channels': IMAGE_CHANNELS (3),  # For multi_modal_network
    }
}
policy = PolicyOptTf(policy_params, observation_dim, action_dim)
#policy = None
bigman_agent = GPSAgent(act_dim=action_dim, obs_dim=observation_dim, state_dim=state_dim, policy=policy)
# Load previous learned variables
#bigman_agent.load(file_save_restore)
print("Bigman Agent:%s OK\n" % type(bigman_agent))


# ################# #
# ################# #
# ##### COSTS ##### #
# ################# #
# ################# #

# Action Cost  #TODO: I think it doesn't have sense if the control is joint position
act_cost = {
    'type': CostAction,
    'wu': np.ones(action_dim) * 1e-4,