Beispiel #1
0
        elif atom == "NObusy":
            mode = (obs1[-1] == 0)
        else:
            print(atom, "atom~~~~~")
            mode = "None!!"
        modes.append(mode)
    if modes[0] == True:
        #print(" modes[0]...",obs1[-1],"target_move...",target)
        return target
    else:
        return None


n_agents = 5
height = 15
world = env1.Lift(n_agents, height)
reward_record = []

np.random.seed(1234)
th.manual_seed(1234)
#world.seed(1234)

n_states = 4 * height + 1
n_actions = 3
capacity = 1000000
batch_size = 128

n_episode = 90  #120#0#00
max_steps = 150  #0
episodes_before_train = 25
Beispiel #2
0
    def store_transition(self, s, a, r, s_):
        transition = np.hstack((s, a, [r], s_))
        index = self.pointer % self.capacity  # replace the old memory with new memory
        self.data[index, :] = transition
        self.pointer += 1

    def sample(self, n):
        assert self.pointer >= self.capacity, 'Memory has not been fulfilled'
        indices = np.random.choice(self.capacity, size=n)
        return self.data[indices, :]


height = 6
ag_num = 2
env = env1.Lift(ag_num, height)  #gym.make(ENV_NAME)
#env = env.unwrapped
#env.seed(1)
state_dim = 4 * height + 1  #env.observation_space.shape[0]
action_dim = 3  #env.action_space.shape[0]
action_bound = 2  #env.action_space.high

# all placeholder for tf
with tf.name_scope('S'):
    S = tf.placeholder(tf.float32, shape=[None, state_dim], name='s')
with tf.name_scope('R'):
    R = tf.placeholder(tf.float32, [None, 1], name='r')
with tf.name_scope('S_'):
    S_ = tf.placeholder(tf.float32, shape=[None, state_dim], name='s_')

sess = tf.Session()
Beispiel #3
0
'''
food_reward = 10.
poison_reward = -1.
encounter_reward = 0.01
world = MAWaterWorld_mod(n_pursuers=2, n_evaders=50,
                         n_poison=50, obstacle_radius=0.04,
                         food_reward=food_reward,
                         poison_reward=poison_reward,
                         encounter_reward=encounter_reward,
                         n_coop=n_coop,
                         sensor_range=0.2, obstacle_loc=None, )

vis = visdom.Visdom(port=5274)
'''
world = env1.Lift(5,8)
reward_record = []

np.random.seed(1234)
th.manual_seed(1234)
#world.seed(1234)
n_agents = 5#world.n_pursuers
n_states = 33
n_actions = 3
capacity = 1000000
batch_size = 128

n_episode = 100#0#00
max_steps = 160#0
episodes_before_train = 15