Python GPSAgent Examples

Programming Language: Python

Namespace/Package Name: robolearn.old_agents

Class/Type: GPSAgent

Examples at hotexamples.com: 5

Python GPSAgent - 5 examples found. These are the top rated real world Python examples of robolearn.old_agents.GPSAgent extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

GPSAgent(4)

act(1)

Frequently Used Methods

GPSAgent (4)

act (1)

Example #1

Show file

    def create_agent(self):
        """Instantiate the RL agent who interacts with the environment.

        Returns:
            RL agent

        """
        change_print_color.change('CYAN')
        print("\nCreating Agent...")

        policy_params = {
            'network_model': tf_network,  # tf_network, multi_modal_network, multi_modal_network_fp
            'network_params': {
                'n_layers': 2,  # Number of Hidden layers
                'dim_hidden': [32, 32],  # List of size per n_layers
                'obs_names': self.env.get_obs_info()['names'],
                'obs_dof': self.env.get_obs_info()['dimensions'],  # DoF for observation data tensor
            },
            # Initialization.
            'init_var': 0.1,  # Initial policy variance.
            'ent_reg': 0.0,  # Entropy regularizer (Used to update policy variance)
            # Solver hyperparameters.
            'iterations': self.task_params['tf_iterations'],  # Number of iterations per inner iteration (Default:5000).
            'batch_size': 15,
            'lr': 0.001,  # Base learning rate (by default it's fixed).
            'lr_policy': 'fixed',  # Learning rate policy.
            'momentum': 0.9,  # Momentum.
            'weight_decay': 0.005,  # Weight decay to prevent overfitting.
            'solver_type': 'Adam',  # Solver type (e.g. 'SGD', 'Adam', 'RMSPROP', 'MOMENTUM', 'ADAGRAD').
            # GPU usage.
            'use_gpu': self.task_params['use_gpu'],  # Whether or not to use the GPU for training.
            'gpu_id': 0,
            'gpu_mem_percentage': self.task_params['gpu_mem_percentage'],
            # Training data.
            'fc_only_iterations': 0,  # Iterations of only FC before normal training
            # Others.
            'random_seed': self.hyperparams['seed'] \
                if self.task_params['tf_seed'] == -1 \
                else self.task_params['tf_seed'],  # TF random seed
            'log_dir': self.hyperparams['log_dir'],
            # 'weights_file_prefix': EXP_DIR + 'policy',
        }

        policy_opt = {'type': PolicyOptTf, 'hyperparams': policy_params}

        agent = GPSAgent(act_dim=self.action_dim,
                         obs_dim=self.obs_dim,
                         state_dim=self.state_dim,
                         policy_opt=policy_opt,
                         agent_name='agent' +
                         str('%02d' % self.hyperparams['run_num']))
        print("Agent:%s OK\n" % type(agent).__name__)

        return agent

Example #2

Show file

File: bigman-reach-drill.py Project: domingoesteban/robolearn

    'lr_policy': 'fixed',  # Learning rate policy.
    'momentum': 0.9,  # Momentum.
    'weight_decay': 0.005,  # Weight decay.
    'solver_type': 'Adam',  # Solver type (e.g. 'SGD', 'Adam', etc.).
    # set gpu usage.
    'use_gpu': 1,  # Whether or not to use the GPU for training.
    'gpu_id': 0,
    'random_seed': 1,
    'fc_only_iterations': 0,  # TODO: Only forwardcontrol? if it is CNN??
    'gpu_mem_percentage': 0.2,
    # 'weights_file_prefix': EXP_DIR + 'policy',
}
policy_opt = {'type': PolicyOptTf, 'hyperparams': policy_params}

bigman_agent = GPSAgent(act_dim=action_dim,
                        obs_dim=observation_dim,
                        state_dim=state_dim,
                        policy_opt=policy_opt)
print("Bigman Agent:%s OK\n" % type(bigman_agent))

# ################# #
# ################# #
# ##### COSTS ##### #
# ################# #
# ################# #
# Action Cost
act_cost = {
    'type': CostAction,
    'wu': np.ones(action_dim) * 1e-4,
    'target': None,  # Target action value
}

Example #3

Show file

File: manipulator2d_gps.py Project: domingoesteban/robolearn

                 'solver_type': 'Adam',  # Solver type (e.g. 'SGD', 'Adam', etc.).
                 # set gpu usage.
                 'use_gpu': 1,  # Whether or not to use the GPU for training.
                 'gpu_id': 0,
                 'random_seed': 1,
                 'fc_only_iterations': 0,  # TODO: Only forwardcontrol? if it is CNN??
                 'gpu_mem_percentage': 0.2,
                 # 'weights_file_prefix': EXP_DIR + 'policy',
             }

policy_opt = {
    'type': PolicyOptTf,
    'hyperparams': policy_params
    }

manipulator2d_agent = GPSAgent(act_dim=action_dim, obs_dim=observation_dim, state_dim=state_dim, policy_opt=policy_opt,
                               agent_name="bigman_agent")


# ################# #
# ################# #
# ##### COSTS ##### #
# ################# #
# ################# #
# Action Cost
act_cost = {
    'type': CostAction,
    'wu': np.ones(action_dim) * 1e-4,
    'target': None,   # Target action value
}

# State Cost

Example #4

Show file

File: bmdgps-bigman.py Project: domingoesteban/robolearn

    #     'use_gpu': 1,  # Whether or not to use the GPU for training.
    #     'gpu_id': 0,
    #     'random_seed': 1,
    #     'fc_only_iterations': 0,  # TODO: Only forwardcontrol? if it is CNN??
    #     'gpu_mem_percentage': 0.2,
    #     # 'weights_file_prefix': EXP_DIR + 'policy',
    # },
                ]
bigman_agents = list()
for pp, pol_param in enumerate(policy_params):
    policy_opt = {
        'type': PolicyOptTf,
        'hyperparams': pol_param
        }

    bigman_agents.append(GPSAgent(act_dim=action_dim, obs_dim=observation_dim, state_dim=state_dim, policy_opt=policy_opt,
                                  agent_name="bigman_agent"+str(pp)))
    print("Bigman Agent:%s OK\n" % type(bigman_agents[-1]))
print("TOTAL BIGMAN AGENTS: %d" % len(bigman_agents))


# ################# #
# ################# #
# ##### COSTS ##### #
# ################# #
# ################# #
# Action Cost
act_cost = {
    'type': CostAction,
    'wu': np.ones(action_dim) * 1e-4,
    'target': None,   # Target action value
}

Example #5

Show file

        'obs_names': bigman_env.get_obs_info()['names'],
        'obs_dof': bigman_env.get_obs_info()['dimensions'],  # DoF for observation data tensor
        'batch_size': 15,  # TODO: Check if this value is OK (same than name_samples)
        #'num_filters': [5, 10],
        #'obs_include': [JOINT_ANGLES, JOINT_VELOCITIES, RGB_IMAGE],  # Deprecated from original GPS code
        #'obs_vector_data': [JOINT_ANGLES, JOINT_VELOCITIES],  # Deprecated from original GPS code
        #'obs_image_data': [RGB_IMAGE],  # Deprecated from original GPS code
        #'sensor_dims': SENSOR_DIMS,  # Deprecated from original GPS code
        #'image_width': IMAGE_WIDTH (80),  # For multi_modal_network
        #'image_height': IMAGE_HEIGHT (64),  # For multi_modal_network
        #'image_channels': IMAGE_CHANNELS (3),  # For multi_modal_network
    }
}
policy = PolicyOptTf(policy_params, observation_dim, action_dim)
#policy = None
bigman_agent = GPSAgent(act_dim=action_dim, obs_dim=observation_dim, state_dim=state_dim, policy=policy)
# Load previous learned variables
#bigman_agent.load(file_save_restore)
print("Bigman Agent:%s OK\n" % type(bigman_agent))


# ################# #
# ################# #
# ##### COSTS ##### #
# ################# #
# ################# #

# Action Cost  #TODO: I think it doesn't have sense if the control is joint position
act_cost = {
    'type': CostAction,
    'wu': np.ones(action_dim) * 1e-4,