Exemple #1
0
class Env:
    SIZE = 500
    RETURN_IMAGES = False
    MOVE_PENALTY = 1
    ENEMY_PENALTY = 300
    FOOD_REWARD = 50
    #OBSERVATION_SPACE_VALUES = 8
    ACTION_SPACE_SIZE = 5

    def reset(self):
        self.player = Ballon("ballon", 50, 50, 1,1, 100, 1)
        self.food = Target(100,100,[100,100])
        self.food.instantiate()
        while self.food.check(self.player.x, self.player.y):
            self.food.instantiate()

        self.startdpos = [self.player.loc[0] - self.food.x, self.player.loc[1] - self.food.y]
        self.startdis = np.sqrt((self.food.x- self.player.loc[0])**2 + (self.food.y-self.player.loc[1])**2)

        self.episode_step = 0

        if self.RETURN_IMAGES:
            observation = np.array(self.get_image())
        else:
            self.dposold = [self.food.x- self.player.loc[0], self.food.y-self.player.loc[1]]
            observation = [self.dposold[0]/1920, self.dposold[1]/1080,  self.player.vel[0]/153, self.player.vel[1]/153, self.player.angvel/2.5, self.player.rot/(2*3.1415926535897932384623383)]

        return observation

    def step(self, action):
        self.episode_step += 1
        self.player.action(action)


        self.player.setVelocity()
        self.player.move()
        self.player.boundries(1920, 1080)
        if self.RETURN_IMAGES:
            new_observation = np.array(self.get_image())
        else:
            self.dpos = [self.food.x- self.player.loc[0], self.food.y-self.player.loc[1]]
            new_observation = [self.dposold[0]/1920, self.dposold[1]/1080,  self.player.vel[0]/153, self.player.vel[1]/153, self.player.angvel/2.5, self.player.rot/(2*3.1415926535897932384623383)]


        if self.food.check(self.player.loc[0], self.player.loc[1]):
            reward = self.FOOD_REWARD
        elif self.episode_step >= 2500:
            reward = 100 - (np.sqrt((self.food.x - self.player.loc[0])**2 + (self.food.y-self.player.loc[1])**2)/self.startdis)*100
        else:
            reward =  -self.MOVE_PENALTY +(np.abs(self.dposold[0])-np.abs(self.dpos[0])) + (np.abs(self.dposold[1])-np.abs(self.dpos[1]))

        done = False

        if reward == self.FOOD_REWARD or self.episode_step >= 2500:
            done = True

        self.dposold[0] = self.dpos[0]
        self.dposold[1] = self.dpos[1]

        return new_observation, reward, done

    def render(self, screen):
        self.player.draw(screen)
        self.food.draw(screen)
Exemple #2
0
rf = 0
oh = 50
ow = 50
force_right = 0
force_left = 0
mass = 1
r = .1
inertia = mass * r * r
force_front = 0
force_rot = 0
running = True
input = Input(1, 1)
object = PO("rect", 100, 100, 1, 1, 100, 1)
target = Target(100, 100, [100, 100])
pygame.display.set_caption("TheProgram")
target.instantiate(100, 100)
while running:
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            running = False
        if event.type == pygame.KEYDOWN:
            if event.key == pygame.K_e:
                force_right = .2
            if event.key == pygame.K_d:
                force_right = .1
            if event.key == pygame.K_c:
                force_right = -.1
            if event.key == pygame.K_q:
                force_left = .2
            if event.key == pygame.K_a:
                force_left = .1