elif atom == "NObusy": mode = (obs1[-1] == 0) else: print(atom, "atom~~~~~") mode = "None!!" modes.append(mode) if modes[0] == True: #print(" modes[0]...",obs1[-1],"target_move...",target) return target else: return None n_agents = 5 height = 15 world = env1.Lift(n_agents, height) reward_record = [] np.random.seed(1234) th.manual_seed(1234) #world.seed(1234) n_states = 4 * height + 1 n_actions = 3 capacity = 1000000 batch_size = 128 n_episode = 90 #120#0#00 max_steps = 150 #0 episodes_before_train = 25
def store_transition(self, s, a, r, s_): transition = np.hstack((s, a, [r], s_)) index = self.pointer % self.capacity # replace the old memory with new memory self.data[index, :] = transition self.pointer += 1 def sample(self, n): assert self.pointer >= self.capacity, 'Memory has not been fulfilled' indices = np.random.choice(self.capacity, size=n) return self.data[indices, :] height = 6 ag_num = 2 env = env1.Lift(ag_num, height) #gym.make(ENV_NAME) #env = env.unwrapped #env.seed(1) state_dim = 4 * height + 1 #env.observation_space.shape[0] action_dim = 3 #env.action_space.shape[0] action_bound = 2 #env.action_space.high # all placeholder for tf with tf.name_scope('S'): S = tf.placeholder(tf.float32, shape=[None, state_dim], name='s') with tf.name_scope('R'): R = tf.placeholder(tf.float32, [None, 1], name='r') with tf.name_scope('S_'): S_ = tf.placeholder(tf.float32, shape=[None, state_dim], name='s_') sess = tf.Session()
''' food_reward = 10. poison_reward = -1. encounter_reward = 0.01 world = MAWaterWorld_mod(n_pursuers=2, n_evaders=50, n_poison=50, obstacle_radius=0.04, food_reward=food_reward, poison_reward=poison_reward, encounter_reward=encounter_reward, n_coop=n_coop, sensor_range=0.2, obstacle_loc=None, ) vis = visdom.Visdom(port=5274) ''' world = env1.Lift(5,8) reward_record = [] np.random.seed(1234) th.manual_seed(1234) #world.seed(1234) n_agents = 5#world.n_pursuers n_states = 33 n_actions = 3 capacity = 1000000 batch_size = 128 n_episode = 100#0#00 max_steps = 160#0 episodes_before_train = 15