Beispiel #1
0
    def __init__(self, env, gamma, buffer_size, ddqn):
        self.env = env
        [Sdim, Adim] = env.get_dims()
        self.model = ConvNet(Sdim[0], Sdim[0], 3, Adim).cuda()
        self.target_model = copy.deepcopy(self.model).cuda()
        self.her = HER()
        self.gamma = gamma
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=0.0001)
        self.batch_size = 16
        self.epsilon = 0.1
        self.buffer_size = buffer_size
        self.step_counter = 0
        self.epsi_high = 0.9
        self.epsi_low = 0.1
        self.steps = 0
        self.count = 0
        self.decay = 2000
        self.eps = self.epsi_high
        self.update_target_step = 3000
        self.log = logger()
        self.log.add_log('tot_return')
        self.log.add_log('avg_loss')
        self.log.add_log('final_dist')
        self.log.add_log('buffer')
        self.image_mean = 0
        self.image_std = 0
        self.ddqn = ddqn

        self.replay_buffer = deque(maxlen=buffer_size)
Beispiel #2
0
    def __init__(self, env, gamma, buffer_size, cuda_flag):
        self.env = env
        self.N = env.N
        self.cuda_flag = cuda_flag
        if self.cuda_flag:
            self.model = Policy(2 * self.N, self.N).cuda()
            self.target_model = copy.deepcopy(self.model).cuda()
        else:
            self.model = Policy(2 * self.N, self.N)
            self.target_model = copy.deepcopy(self.model)
        self.her = HER(self.N)
        self.gamma = gamma
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=0.0005)
        self.batch_size = 64
        self.epsilon = 0.1
        self.buffer_size = buffer_size
        self.step_counter = 0
        self.epsi_high = 0.9
        self.epsi_low = 0.05
        self.steps = 0
        self.count = 0
        self.decay = 200
        self.eps = self.epsi_high
        self.update_target_step = 1000
        self.log = logger()
        self.log.add_log('tot_return')
        self.log.add_log('avg_loss')
        self.log.add_log('final_dist')

        self.replay_buffer = deque(maxlen=buffer_size)
Beispiel #3
0
    def __init__(self, env, gamma, buffer_size):
        self.env = env
        acts = env.action_space
        obs = env.observation_space
        self.model = QNet(obs.shape[0], acts.n, 64)
        self.target_model = copy.deepcopy(self.model)
        self.rnd = RND(obs.shape[0], 64, 124)  #随机神经蒸馏网络
        self.gamma = gamma  #折扣因子
        self.optimizer = torch.optim.Adam(self.model.parameters(),
                                          lr=0.001)  #只优化DQN的参数
        self.batch_size = 64
        self.epsilon = 0.1
        self.buffer_size = buffer_size
        self.step_counter = 0
        self.epsi_high = 0.9
        self.epsi_low = 0.05
        self.steps = 0
        self.count = 0
        self.decay = 200
        self.eps = self.epsi_high
        self.update_target_step = 500
        self.log = logger()
        self.log.add_log('real_return')
        self.log.add_log('combined_return')
        self.log.add_log('avg_loss')

        self.replay_buffer = deque(maxlen=buffer_size)
Beispiel #4
0
 def __init__(self, problem, gamma=1.0, eps=0.1, lr=1e-4, cuda_flag=True):
     self.problem = problem
     self.G = problem.g
     self.k = problem.k
     self.m = problem.m
     self.ajr = problem.adjacent_reserve
     self.hidden_dim = problem.hidden_dim
     self.n = self.k * self.m
     self.eps = eps
     if cuda_flag:
         self.model = DQNet(k=self.k,
                            m=self.m,
                            ajr=self.ajr,
                            num_head=4,
                            hidden_dim=self.hidden_dim).cuda()
     else:
         self.model = DQNet(k=self.k,
                            m=self.m,
                            ajr=self.ajr,
                            num_head=4,
                            hidden_dim=self.hidden_dim)
     self.gamma = gamma
     self.optimizer = torch.optim.Adam(self.model.parameters(), lr=lr)
     self.experience_replay_buffer = []
     self.replay_buffer_max_size = 1e3
     self.cuda = cuda_flag
     self.log = logger()
     self.log.add_log('tot_return')
     self.log.add_log('TD_error')
     self.log.add_log('entropy')
Beispiel #5
0
    def __init__(self, problem
                 , action_type='swap'
                 , gamma=1.0, eps=0.1, lr=1e-4, action_dropout=1.0
                 , sample_batch_episode=False
                 , replay_buffer_max_size=5000
                 , epi_len=50, new_epi_batch_size=10
                 , edge_info='adj_weight'
                 , readout='mlp'
                 , explore_method='epsilon_greedy'
                 , priority_sampling='False'
                 , clip_target=False):

        self.problem = problem
        self.action_type = action_type
        self.G = problem.g  # the graph
        self.k = problem.k  # num of clusters
        self.ajr = problem.adjacent_reserve  # degree of node in graph
        self.hidden_dim = problem.hidden_dim  # hidden dimension for node representation
        self.n = problem.N  # num of nodes
        self.eps = eps  # constant for exploration in dqn
        self.edge_info = edge_info
        self.explore_method = explore_method
        self.clip_target = clip_target
        self.model = DQNet(k=self.k, n=self.n, num_head=2, hidden_dim=self.hidden_dim, edge_info=self.edge_info, readout=readout).cuda()
        self.model_target = dc(self.model)
        self.gamma = gamma  # reward decay const
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=lr)
        self.sample_batch_episode = sample_batch_episode
        self.experience_replay_buffer = []
        self.replay_buffer_max_size = replay_buffer_max_size
        self.buf_epi_len = epi_len  # 50
        self.new_epi_batch_size = new_epi_batch_size  # 10
        self.cascade_replay_buffer = [[] for _ in range(self.buf_epi_len)]
        self.cascade_replay_buffer_weight = torch.zeros((self.buf_epi_len, self.new_epi_batch_size))
        self.stage_max_sizes = [self.replay_buffer_max_size // self.buf_epi_len] * self.buf_epi_len  # [100, 100, ..., 100]
        # self.stage_max_sizes = list(range(100,100+4*50, 4))
        self.buffer_actual_size = sum(self.stage_max_sizes)
        self.priority_sampling = priority_sampling
        self.cascade_buffer_kcut_value = torch.zeros((self.buf_epi_len, self.new_epi_batch_size))
        self.action_dropout = action_dropout
        self.log = logger()
        self.Q_err = 0  # Q error
        self.log.add_log('tot_return')
        self.log.add_log('Q_error')
        self.log.add_log('entropy')
        self.log.add_log('R_signal')
 def __init__(self, problem, cuda_flag):
     self.problem = problem
     ndim = self.problem.get_graph_dims()
     if cuda_flag:
         self.model = ACNet(ndim, 264, 1).cuda()
     else:
         #128
         #264
         self.model = ACNet(ndim, 264, 1)
     self.gamma = 0.98
     self.optimizer = torch.optim.Adam(self.model.parameters(), lr=0.0001)
     self.batch_size = 32
     self.num_episodes = 1
     self.cuda = cuda_flag
     self.log = logger()
     self.log.add_log('tot_return')
     self.log.add_log('TD_error')
     self.log.add_log('entropy')
Beispiel #7
0
 def __init__(self, problem, cuda_flag):
     self.problem = problem
     ndim = self.problem.get_graph_dims()
     self.num_grp = self.problem.num_grp
     self.n = self.problem.N
     if cuda_flag:
         self.model = ACNet(ndim, 128*2, self.num_grp, self.n).cuda()
         # self.model = torch.nn.DataParallel(self.model)
     else:
         self.model = ACNet(ndim, 128*2, self.num_grp, self.n)
     self.gamma = 0.98
     self.optimizer = torch.optim.Adam(self.model.parameters(), lr=0.0003)
     self.batch_size = 32
     self.num_episodes = 1
     self.cuda = cuda_flag
     self.log = logger()
     self.log.add_log('tot_return')
     self.log.add_log('TD_error')
     self.log.add_log('entropy')
Beispiel #8
0
    def __init__(self, env, gamma, timer, buffer_size, scale_intrinsic):
        self.env = env
        actions = env.action_space
        observations = env.observation_space
        # observations = np.ones((2,1))
        # observations = np.ones((2, 1))
        self.timer = timer
        self.gamma = gamma
        self.buffer_size = buffer_size
        self.scale_intrinsic = scale_intrinsic

        self.model = NeuralNet(observations.shape[0], actions.n, 64)
        self.target_model = copy.deepcopy(self.model)
        self.rnd = RandomNetwork(observations.shape[0], 64, 124)
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=0.001)

        self.batch_size = 64

        self.epsi_high = 0.9
        # self.epsilon = self.epsi_high
        self.epsi_low = 0.05

        self.step_counter = 0
        # self.epsi_high = 0.9
        # self.epsi_low = 0.05
        self.steps = 0
        self.count = 0
        self.decay = 200
        self.eps = self.epsi_high
        # self.eps = 0.3

        self.update_target_step = 300
        self.log = logger()
        self.log.add_log('real_return')
        self.log.add_log('combined_return')
        self.log.add_log('avg_loss')

        self.replay_buffer = deque(maxlen=buffer_size)
Beispiel #9
0
    def __init__(self,
                 problem,
                 action_type='swap',
                 gamma=1.0,
                 eps=0.1,
                 lr=1e-4,
                 replay_buffer_max_size=10,
                 replay_buffer_max_size2=5000,
                 extended_h=False,
                 time_aware=False,
                 use_x=False,
                 edge_info='adj_weight',
                 readout='mlp',
                 clip_target=False,
                 use_calib_reward=False,
                 cuda_flag=True):

        self.problem = problem
        self.action_type = action_type
        self.G = problem.g  # the graph
        self.k = problem.k  # num of clusters
        self.m = problem.m  # num of nodes in cluster
        self.ajr = problem.adjacent_reserve  # degree of node in graph
        self.hidden_dim = problem.hidden_dim  # hidden dimension for node representation
        self.n = self.k * self.m  # num of nodes
        self.eps = eps  # constant for exploration in dqn
        self.extended_h = extended_h
        self.use_x = use_x
        self.edge_info = edge_info
        self.clip_target = clip_target
        self.use_calib_reward = use_calib_reward
        if cuda_flag:
            self.model = DQNet(k=self.k,
                               m=self.m,
                               ajr=self.ajr,
                               num_head=4,
                               hidden_dim=self.hidden_dim,
                               extended_h=self.extended_h,
                               use_x=self.use_x,
                               edge_info=self.edge_info,
                               readout=readout).cuda()
        else:
            self.model = DQNet(k=self.k,
                               m=self.m,
                               ajr=self.ajr,
                               num_head=4,
                               hidden_dim=self.hidden_dim,
                               extended_h=self.extended_h,
                               use_x=self.use_x,
                               edge_info=self.edge_info,
                               readout=readout)
        # self.model.apply(self.weights_init)  # initialize weight
        self.model_target = dc(self.model)
        self.gamma = gamma  # reward decay const
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=lr)
        self.experience_replay_buffer = []
        self.experience_replay_buffer2 = []
        self.buffer_episode_offset = [0]
        self.buffer_indices = []
        self.replay_buffer_max_size = replay_buffer_max_size
        self.replay_buffer_max_size2 = replay_buffer_max_size2
        self.time_aware = time_aware
        self.cuda = cuda_flag
        self.log = logger()
        self.Q_err = 0  # Q error
        self.log.add_log('tot_return')
        self.log.add_log('Q_error')
        self.log.add_log('entropy')
        self.log.add_log('R_signal')