예제 #1
0
    def __init__(self, config, action_mask):
        super(CL_DPG, self).__init__(config)

        # Set Hyper-parameters

        self.initial_phase = not config.true_embeddings and not config.load_embed and not config.restore  # Initial training phase required if learning embeddings
        self.batch_norm = False

        # Function to get state features and action representation
        self.state_features = Basis.get_Basis(config=config)
        self.action_rep = CL_ActionRepresentation.VAE_Action_representation(
            action_dim=self.action_dim,
            state_dim=self.state_features.feature_dim,
            config=config)
        # Create instances for Actor and Q_fn
        self.actor = Actor(action_dim=self.action_rep.reduced_action_dim,
                           state_dim=self.state_features.feature_dim,
                           config=config)
        self.Q = Q_fn(action_dim=self.action_rep.reduced_action_dim,
                      state_dim=self.state_features.feature_dim,
                      config=config)

        # Create target networks
        # Deepcopy not working.
        self.target_state_features = Basis.get_Basis(config=config)
        self.target_actor = Actor(
            action_dim=self.action_rep.reduced_action_dim,
            state_dim=self.state_features.feature_dim,
            config=config)
        self.target_Q = Q_fn(action_dim=self.action_rep.reduced_action_dim,
                             state_dim=self.state_features.feature_dim,
                             config=config)
        # self.target_action_rep = ActionRepresentation.Action_representation_deep(action_dim=self.action_dim, config=config)
        # Copy the initialized values to target
        self.target_state_features.load_state_dict(
            self.state_features.state_dict())
        self.target_actor.load_state_dict(self.actor.state_dict())
        self.target_Q.load_state_dict(self.Q.state_dict())
        # self.target_action_rep.load_state_dict(self.action_rep.state_dict())

        self.memory = MemoryBuffer(
            max_len=self.config.buffer_size,
            state_dim=self.state_dim,
            action_dim=1,
            atype=long,
            config=config,
            dist_dim=self.action_rep.reduced_action_dim)  # off-policy
        self.noise = OrnsteinUhlenbeckActionNoise(
            self.config.reduced_action_dim)

        self.modules = [('actor', self.actor), ('Q', self.Q),
                        ('state_features', self.state_features),
                        ('action_rep', self.action_rep),
                        ('target_actor', self.target_actor),
                        ('target_state_features', self.target_state_features),
                        ('target_Q', self.target_Q)]  #,
        # ('target_action_rep', self.target_action_rep)]

        self.init()
        self.update_mask(action_mask=action_mask)
    def __init__(self, config, action_mask):
        super(CL_ActorCritic, self).__init__(config)

        # Initial training phase required if learning embeddings from scratch
        self.initial_phase = not config.true_embeddings and not config.load_embed

        # Function to get state features and action representation
        self.state_features = Basis.get_Basis(config=config)
        self.action_rep = CL_ActionRepresentation.VAE_Action_representation(state_dim=self.state_features.feature_dim,
                                                                     action_dim=self.action_dim, config=config)

        # Create instances for Actor and Q_fn
        self.critic = Critic.Critic_with_traces(state_dim=self.state_features.feature_dim, config=config)
        self.actor = Policy.embed_Gaussian(action_dim=self.action_rep.reduced_action_dim,
                                           state_dim=self.state_features.feature_dim, config=config)

        # Initialize storage containers
        self.memory =   MemoryBuffer(max_len=self.config.buffer_size, state_dim=self.state_dim,
                                     action_dim=1, atype=long, config=config,
                                     dist_dim=self.action_rep.reduced_action_dim)  # off-policy
        self.trajectory = Trajectory(max_len=self.config.batch_size, state_dim=self.state_dim,
                                     action_dim=1, atype=long, config=config,
                                     dist_dim=self.action_rep.reduced_action_dim)  # on-policy

        self.modules = [('actor', self.actor), ('critic', self.critic),
                        ('state_features', self.state_features), ('action_rep', self.action_rep)]

        self.init()
        self.update_mask(action_mask=action_mask)
예제 #3
0
    def __init__(self, config):
        super(embed_Reinforce, self).__init__(config)

        self.ep_rewards = []
        self.ep_states = []
        self.ep_actions = []
        self.ep_exec_action_embs = []
        self.ep_chosen_action_embs = []

        # Set Hyper-parameters
        self.memory = MemoryBuffer(size=config.buffer_size)
        self.counter = 0

        self.initial_phase = not config.true_embeddings  # Initial training phase required if learning embeddings

        # Function to get state features and action representation
        if config.fourier_order > 0:
            self.state_features = Basis.Fourier_Basis(config=config)
        else:
            self.state_features = Basis.NN_Basis(config=config)

        # Function to get state features and action representation
        self.action_rep = Action_representation(
            state_dim=self.state_features.feature_dim,
            action_dim=self.action_dim,
            config=config)
        self.baseline = Critic.Critic(
            state_dim=self.state_features.feature_dim, config=config)

        # Create instances for Actor and Q_fn
        self.atype = config.dtype
        self.actor = Policy.embed_Gaussian(
            action_dim=self.action_rep.reduced_action_dim,
            state_dim=self.state_features.feature_dim,
            config=config)
        self.action_size = self.action_dim

        self.modules = [('actor', self.actor), ('baseline', self.baseline),
                        ('state_features', self.state_features),
                        ('action_rep', self.action_rep)]

        self.init()
예제 #4
0
    def __init__(self, config, action_mask):
        super(CL_Vanilla_ActorCritic, self).__init__(config)
        # Get state features and instances for Actor and Value function
        self.state_features = Basis.get_Basis(config=config)
        self.actor, self.atype, self.action_size = Policy.get_Policy(state_dim=self.state_features.feature_dim, config=config)
        self.critic = Critic.Critic(state_dim=self.state_features.feature_dim, config=config)
        self.trajectory = utils.Trajectory(max_len=self.config.batch_size, state_dim=self.state_dim,
                                           action_dim=self.action_size, atype=self.atype, config=config, dist_dim=1)

        self.modules = [('actor', self.actor), ('baseline', self.critic), ('state_features', self.state_features)]

        self.init()
        self.update_mask(action_mask=action_mask)
예제 #5
0
    def __init__(self, config):
        super(OFPG, self).__init__(config)
        # Get state features and instances for Actor and Value function
        self.state_features = Basis.get_Basis(config=config)
        self.actor, self.atype, self.action_size = NS_utils.get_Policy(
            state_dim=self.state_features.feature_dim, config=config)
        self.memory = utils.TrajectoryBuffer(buffer_size=config.buffer_size,
                                             state_dim=self.state_dim,
                                             action_dim=self.action_size,
                                             atype=self.atype,
                                             config=config,
                                             dist_dim=1)

        self.modules = [('actor', self.actor),
                        ('state_features', self.state_features)]
        self.counter = 0
        self.init()
예제 #6
0
    def __init__(self, config):
        super(ProOLS, self).__init__(config)
        # Get state features and instances for Actor and Value function
        self.state_features = Basis.get_Basis(config=config)
        self.actor, self.atype, self.action_size = NS_utils.get_Policy(
            state_dim=self.state_features.feature_dim, config=config)
        self.memory = utils.TrajectoryBuffer(buffer_size=config.buffer_size,
                                             state_dim=self.state_dim,
                                             action_dim=self.action_size,
                                             atype=self.atype,
                                             config=config,
                                             dist_dim=1)
        self.extrapolator = OLS(max_len=config.buffer_size,
                                delta=config.delta,
                                basis_type=config.extrapolator_basis,
                                k=config.fourier_k)

        self.modules = [('actor', self.actor),
                        ('state_features', self.state_features)]
        self.counter = 0
        self.init()