コード例 #1
0
    def __init__(self, episode_buffer, replay_buffer, action_space=3):
        self.lr = PARAM.LEARNING_RATE
        self.episode_buffer = episode_buffer
        self.replay_buffer = replay_buffer
        self.N = PARAM.N
        self.gamma = PARAM.gamma
        self.seq_len = PARAM.A2C_SEQUENCE_LENGTH
        self.aux_batch_size = PARAM.AUX_TASK_BATCH_SIZE
        self.vfr_weight = PARAM.VFR_LOSS_WEIGHT
        self.rp_weight = PARAM.RP_LOSS_WEIGHT
        self.pc_weight = PARAM.PC_LOSS_WEIGHT

        # A2C network
        self.A = AuxNetwork(state_size=PARAM.STATE_SIZE,
                            action_space=action_space,
                            seq_len=self.seq_len)

        # GPU availability
        self.gpu = torch.cuda.is_available()
        if self.gpu:
            print("Using GPU")
            self.A = self.A.cuda()
        else:
            print("Using CPU")

        # Loss Function and Optimizer
        self.optimizer = optim.Adam(self.A.parameters(),
                                    lr=self.lr,
                                    weight_decay=1e-6)
        self.vfr_criterion = nn.MSELoss()  # Value Function Replay loss
        self.rp_criterion = nn.CrossEntropyLoss()  # Reward Prediction loss
        self.pc_criterion = nn.MSELoss()  # Value Function Replay loss
コード例 #2
0
  def __init__(self, episode_buffer, replay_buffer, action_space=3):
    self.lr = PARAM.LEARNING_RATE
    self.episode_buffer = episode_buffer
    self.replay_buffer = replay_buffer
    self.N = PARAM.N
    self.gamma = PARAM.gamma
    self.seq_len = PARAM.A2C_SEQUENCE_LENGTH
    self.aux_batch_size = PARAM.AUX_TASK_BATCH_SIZE
    self.vfr_weight = PARAM.VFR_LOSS_WEIGHT
    self.rp_weight = PARAM.RP_LOSS_WEIGHT
    self.pc_weight = PARAM.PC_LOSS_WEIGHT

    self.ppo_epochs = 10 #PARAM.PPO_EPOCHS
    self.num_mini_batch = 12 #PARAM.PPO_NUM_MINI_BATCH
    self.clip_param = 0.2

    #self.max_grad_norm = PARAM.MAX_GRAD_NORM
    #self.use_clipped_value_loss = PARAM.USE_CLIPPED_VALUE_LOSS

    # A2C network
    self.A = AuxNetwork(state_size=PARAM.STATE_SIZE, action_space=action_space, seq_len=self.seq_len)

    # GPU availability
    self.gpu = torch.cuda.is_available()
    if self.gpu:
      print("Using GPU")
      self.A = self.A.cuda()
    else:
      print("Using CPU")

    # Loss Function and Optimizer
    self.optimizer = optim.Adam(self.A.parameters(), lr=self.lr, weight_decay=1e-6)
    self.vfr_criterion = nn.MSELoss()           # Value Function Replay loss
    self.rp_criterion = nn.CrossEntropyLoss()   # Reward Prediction loss
    self.pc_criterion = nn.MSELoss()            # Value Function Replay loss
コード例 #3
0
ファイル: A2C.py プロジェクト: karunraju/NFF
    def __init__(self, ReplayBuffer, action_space=3, network=None):
        self.lr = PARAM.LEARNING_RATE
        self.N = PARAM.N
        self.gamma = PARAM.gamma
        self.seq_len = PARAM.A2C_SEQUENCE_LENGTH
        self.aux_batch_size = PARAM.AUX_TASK_BATCH_SIZE
        self.vfr_weight = PARAM.VFR_LOSS_WEIGHT
        self.rp_weight = PARAM.RP_LOSS_WEIGHT
        self.pc_weight = PARAM.PC_LOSS_WEIGHT
        self.gpu = torch.cuda.is_available()

        # A2C network
        if PARAM.ENSEMBLE < 1:
            self.A = AuxNetwork(state_size=PARAM.STATE_SIZE,
                                action_space=action_space,
                                seq_len=self.seq_len)
            # GPU availability
            if self.gpu:
                print("Using GPU")
                self.A = self.A.cuda()
            else:
                print("Using CPU")
            self.replay_buffer = ReplayBuffer(PARAM.REPLAY_MEMORY_SIZE)
            # Loss Function and Optimizer
            self.optimizer = optim.Adam(self.A.parameters(),
                                        lr=self.lr,
                                        weight_decay=1e-6)
        else:
            self.Ensemble = Ensemble(PARAM.ENSEMBLE, action_space,
                                     self.seq_len, ReplayBuffer, network)
            self.source_context()

        self.vfr_criterion = nn.MSELoss()  # Value Function Replay loss
        self.rp_criterion = nn.CrossEntropyLoss()  # Reward Prediction loss
        self.pc_criterion = nn.MSELoss()  # Value Function Replay loss
コード例 #4
0
def main():
    man = Manager()
    if cuda.is_available():
        list_of_networks = man.list([
            AuxNetwork(state_size=PARAM.STATE_SIZE,
                       action_space=3,
                       seq_len=PARAM.A2C_SEQUENCE_LENGTH).cuda()
            for i in range(PARAM.ENSEMBLE)
        ])
    else:
        list_of_networks = man.list([
            AuxNetwork(state_size=PARAM.STATE_SIZE,
                       action_space=3,
                       seq_len=PARAM.A2C_SEQUENCE_LENGTH)
            for i in range(PARAM.ENSEMBLE)
        ])
    args = parse_arguments()
    p = Pool(PARAM.AGENTS)
    p.map(train, [list_of_networks] * PARAM.AGENTS, chunksize=1)
コード例 #5
0
 def __init__(self, size, action_space, seq_len, ReplayBuffer, network):
     self.list_of_networks = network
     if network is None:
         self.list_of_networks = [AuxNetwork(state_size=PARAM.STATE_SIZE, action_space=3, seq_len=PARAM.A2C_SEQUENCE_LENGTH) for i in range(PARAM.ENSEMBLE)]
         self.gpu = torch.cuda.is_available()
         if self.gpu:
             print("Using GPU")
             self.list_of_networks = [network.cuda() for network in self.list_of_networks]
     else:
         print("Using CPU")
     self.list_of_optimizers = [optim.Adam(network.parameters(), lr=PARAM.LEARNING_RATE, weight_decay=1e-6) for network in self.list_of_networks]
     self.list_of_replay_buffers = [ReplayBuffer(PARAM.REPLAY_MEMORY_SIZE) for network in self.list_of_networks]
     self.list_of_action_repeats = PARAM.ACTION_REPEAT
     self.current=len(self.list_of_networks)-1
     self.update_context()
     if PARAM.USE_ALTERNATE_SWITCHING_POLICY==True:
         self.analyze_rewards = self.analyze_rewards_1