def setup_play(self): #self.cid = 0 self.trainID = 0 self.setup_key() self.frame_process = False self.rewards = list() self.started_at = datetime.now() self.started_at_str = self.started_at.isoformat() self.rainbow_dqn = RainbowDQNAgent( replay_memory_capacity=100000, history=4, discount=0.99, multi_step=3, priority_weight=0.4, priority_exponent=0.5, quantile=True, quantiles=200, atoms=51, v_min=-10, v_max=10, batch_size=32, hidden_size=1024, target_update=10000, save_steps=5000, observe_steps=50000, max_steps=5000000, model='dataset/rainbow_dqn.pth' ) print('Starting Game') self.input_controller.tap_key(KeyboardKey.KEY_RETURN)
def setup_play(self): self.environment = self.game.environments["GAME"]( game_api=self.game.api, input_controller=self.input_controller, episodes_per_startregions_track=100000000000 ) self.game_inputs = [ { "name": "CONTROLS", "control_type": InputControlTypes.DISCRETE, "inputs": self.game.api.combine_game_inputs(["MOVEMENT", "COMBAT", "CURSOR"]) } ] self.agent = RainbowDQNAgent( "COD", game_inputs=self.game_inputs, callbacks=dict( after_observe=self.after_agent_observe, before_update=self.before_agent_update, after_update=self.after_agent_update ), rainbow_kwargs=dict( replay_memory_capacity=250000, observe_steps=100, batch_size=10, save_steps=300, model="datasets/rainbow_dqn_COD.pth" ), logger=Loggers.COMET_ML, logger_kwargs=dict( api_key="api_key_from_comet_ml", project_name="serpent-ai-cod", reward_func=self.reward ) ) self.analytics_client.track(event_key="COD", data={"name": "COD"}) self.agent.logger.experiment.log_other("game", "COD") self.environment.new_episode(maximum_steps=350) # 5 minutes self.overs = 0 self.input_non_lethal = False
def setup_play(self): self.environment = self.game.environments["RACE"]( game_api=self.game.api, input_controller=self.input_controller, episodes_per_race_track=100000000000 ) self.game_inputs = [ { "name": "STEERING", "control_type": InputControlTypes.DISCRETE, "inputs": self.game.api.combine_game_inputs(["STEERING"]) } ] # self.agent = RandomAgent( # "horAIzon", # game_inputs=self.game_inputs, # callbacks=dict( # after_observe=self.after_agent_observe # ) # ) self.agent = RainbowDQNAgent( "horAIzon", game_inputs=self.game_inputs, callbacks=dict( after_observe=self.after_agent_observe, before_update=self.before_agent_update, after_update=self.after_agent_update ), rainbow_kwargs=dict( replay_memory_capacity=250000, observe_steps=10000, hidden_size=512, conv_layers=3, discount=0.9, max_steps=2000000, noisy_std=0.1 ), logger=Loggers.COMET_ML, logger_kwargs=dict( api_key=config["comet_ml_api_key"], project_name="serpent-ai-hct", reward_func=self.reward ) ) # self.agent = PPOAgent( # "horAIzon", # game_inputs=self.game_inputs, # callbacks=dict( # after_observe=self.after_agent_observe, # before_update=self.before_agent_update, # after_update=self.after_agent_update # ), # input_shape=(100, 100), # ppo_kwargs=dict( # memory_capacity=5120, # discount=0.9, # epochs=10, # batch_size=64, # entropy_regularization_coefficient=0.001, # epsilon=0.2 # ), # logger=Loggers.COMET_ML, # logger_kwargs=dict( # api_key=config["comet_ml_api_key"], # project_name="serpent-ai-hct", # reward_func=self.reward # ) # ) self.agent.logger.experiment.log_other("race_track", "Midnight") self.analytics_client.track(event_key="GAME_NAME", data={"name": "Horizon Chase Turbo"}) self.environment.new_episode(maximum_steps=2400) # 5 minutes
class SerpentHorizonChaseTurboGameAgent(GameAgent): def __init__(self, **kwargs): super().__init__(**kwargs) self.frame_handlers["PLAY"] = self.handle_play self.frame_handler_setups["PLAY"] = self.setup_play self.frame_handler_pause_callbacks["PLAY"] = self.handle_play_pause def setup_play(self): self.environment = self.game.environments["RACE"]( game_api=self.game.api, input_controller=self.input_controller, episodes_per_race_track=100000000000 ) self.game_inputs = [ { "name": "STEERING", "control_type": InputControlTypes.DISCRETE, "inputs": self.game.api.combine_game_inputs(["STEERING"]) } ] # self.agent = RandomAgent( # "horAIzon", # game_inputs=self.game_inputs, # callbacks=dict( # after_observe=self.after_agent_observe # ) # ) self.agent = RainbowDQNAgent( "horAIzon", game_inputs=self.game_inputs, callbacks=dict( after_observe=self.after_agent_observe, before_update=self.before_agent_update, after_update=self.after_agent_update ), rainbow_kwargs=dict( replay_memory_capacity=250000, observe_steps=10000, hidden_size=512, conv_layers=3, discount=0.9, max_steps=2000000, noisy_std=0.1 ), logger=Loggers.COMET_ML, logger_kwargs=dict( api_key=config["comet_ml_api_key"], project_name="serpent-ai-hct", reward_func=self.reward ) ) # self.agent = PPOAgent( # "horAIzon", # game_inputs=self.game_inputs, # callbacks=dict( # after_observe=self.after_agent_observe, # before_update=self.before_agent_update, # after_update=self.after_agent_update # ), # input_shape=(100, 100), # ppo_kwargs=dict( # memory_capacity=5120, # discount=0.9, # epochs=10, # batch_size=64, # entropy_regularization_coefficient=0.001, # epsilon=0.2 # ), # logger=Loggers.COMET_ML, # logger_kwargs=dict( # api_key=config["comet_ml_api_key"], # project_name="serpent-ai-hct", # reward_func=self.reward # ) # ) self.agent.logger.experiment.log_other("race_track", "Midnight") self.analytics_client.track(event_key="GAME_NAME", data={"name": "Horizon Chase Turbo"}) self.environment.new_episode(maximum_steps=2400) # 5 minutes def handle_play(self, game_frame, game_frame_pipeline): self.paused_at = None valid_game_state = self.environment.update_game_state(game_frame) if not valid_game_state: return None reward = self.reward(self.environment.game_state) terminal = ( self.environment.game_state["is_too_slow"] or self.environment.game_state["is_out_of_fuel"] or self.environment.game_state["is_race_over"] or self.environment.episode_over ) self.agent.observe(reward=reward, terminal=terminal) if not terminal: game_frame_buffer = FrameGrabber.get_frames([0, 2, 4, 6], frame_type="PIPELINE") agent_actions = self.agent.generate_actions(game_frame_buffer) self.environment.perform_input(agent_actions) else: self.environment.clear_input() self.agent.reset() if self.environment.game_state["is_race_over"]: time.sleep(5) self.input_controller.tap_key(KeyboardKey.KEY_ENTER) time.sleep(11) if (self.environment.episode + 1) % self.environment.episodes_per_race_track == 0: self.input_controller.tap_key(KeyboardKey.KEY_ESCAPE) time.sleep(8) self.game.api.select_random_region_track(self.input_controller) else: self.input_controller.tap_key(KeyboardKey.KEY_ESCAPE) time.sleep(1) if (self.environment.episode + 1) % self.environment.episodes_per_race_track == 0: for _ in range(3): self.input_controller.tap_key(KeyboardKey.KEY_S) time.sleep(0.1) self.input_controller.tap_key(KeyboardKey.KEY_ENTER) time.sleep(1) self.input_controller.tap_key(KeyboardKey.KEY_ENTER) time.sleep(8) self.input_controller.tap_key(KeyboardKey.KEY_ESCAPE) time.sleep(1) self.game.api.select_random_region_track(self.input_controller) self.environment.end_episode() self.environment.new_episode(maximum_steps=2400) def handle_play_pause(self): self.input_controller.handle_keys([]) def reward(self, game_state): value = game_state["current_speed"] ** 1.5 if value > 5200: value = 5200 reward = serpent.cv.normalize(value, 0, 5200) if game_state["fuel_levels"][0] > game_state["fuel_levels"][1]: reward += 0.5 time_penalty = 0.1 reward -= time_penalty if game_state["is_race_over"]: reward = 1 if reward > 1: reward = 1 return reward def after_agent_observe(self): self.environment.episode_step() def before_agent_update(self): self.input_controller.tap_key(KeyboardKey.KEY_ESCAPE) time.sleep(1) def after_agent_update(self): self.input_controller.tap_key(KeyboardKey.KEY_ESCAPE) time.sleep(3)
class SerpentCODGameAgent(GameAgent): def __init__(self, **kwargs): super().__init__(**kwargs) self.frame_handlers["PLAY"] = self.handle_play self.frame_handler_setups["PLAY"] = self.setup_play self.frame_handler_pause_callbacks["PLAY"] = self.handle_play_pause def setup_play(self): self.environment = self.game.environments["GAME"]( game_api=self.game.api, input_controller=self.input_controller, episodes_per_startregions_track=100000000000 ) self.game_inputs = [ { "name": "CONTROLS", "control_type": InputControlTypes.DISCRETE, "inputs": self.game.api.combine_game_inputs(["MOVEMENT", "COMBAT", "CURSOR"]) } ] self.agent = RainbowDQNAgent( "COD", game_inputs=self.game_inputs, callbacks=dict( after_observe=self.after_agent_observe, before_update=self.before_agent_update, after_update=self.after_agent_update ), rainbow_kwargs=dict( replay_memory_capacity=250000, observe_steps=100, batch_size=10, save_steps=300, model="datasets/rainbow_dqn_COD.pth" ), logger=Loggers.COMET_ML, logger_kwargs=dict( api_key="api_key_from_comet_ml", project_name="serpent-ai-cod", reward_func=self.reward ) ) self.analytics_client.track(event_key="COD", data={"name": "COD"}) self.agent.logger.experiment.log_other("game", "COD") self.environment.new_episode(maximum_steps=350) # 5 minutes self.overs = 0 self.input_non_lethal = False def handle_play(self, game_frame, game_frame_pipeline): self.paused_at = None with mss() as sct: monitor_var = sct.monitors[1] monitor = sct.grab(monitor_var) valid_game_state = self.environment.update_startregions_state(monitor) if not valid_game_state: return None reward, over_boolean = self.reward(self.environment.startregions_state, 1.0) terminal = over_boolean self.agent.observe(reward=reward, terminal=terminal) if not terminal: game_frame_buffer = FrameGrabber.get_frames([0, 2, 4, 6], frame_type="PIPELINE") agent_actions = self.agent.generate_actions(game_frame_buffer) print("Current Action: ") print(agent_actions) str_agent_actions = str(agent_actions) if "MOVE MOUSE X" in str_agent_actions: set_pos(200, 0) if "MOVE MOUSE Y" in str_agent_actions: set_pos(0, 200) if "MOVE MOUSE XY" in str_agent_actions: set_pos(100, 100) if "MOVE MOUSE X2" in str_agent_actions: set_pos(-200, 0) if "MOVE MOUSE Y2" in str_agent_actions: set_pos(0, -200) if "MOVE MOUSE XY2" in str_agent_actions: set_pos(-100, -100) if "MOVE MOUSE XY3" in str_agent_actions: set_pos(-100, 100) if "MOVE MOUSE XY4" in str_agent_actions: set_pos(100, -100) if "LETHAL" in str_agent_actions: self.input_non_lethal = True self.human() self.environment.perform_input(agent_actions) else: self.environment.clear_input() self.agent.reset() time.sleep(30) #To Do #Choose Loadout (Meduim Range) self.environment.end_episode() self.environment.new_episode(maximum_steps=350) print("New Episode") def handle_play_pause(self): self.input_controller.handle_keys([]) def num_there(self, s): return any(i.isdigit() for i in s) def get_health(self, image): img = Image.frombytes('RGB', image.size, image.rgb) red_O = 0 for red in img.getdata(): if red == (117,54,34): red_O += 1 return red_O def get_xp(self, image_xp): img = Image.frombytes('RGB', image_xp.size, image_xp.rgb) pixels = 0 for pixel in img.getdata(): if pixel == (255,194,21): pixels += 1 return pixels def is_startregions_over(self, image): image = Image.frombytes("RGB", image.size, image.bgra, "raw", "BGRX") ocr_result = pytesseract.image_to_string(image, lang='eng') print("Text: ") print(ocr_result) if "KILLCAM" in ocr_result: return True else: return False def human(self): with mss() as sct: W, H = None, None frame = np.array(sct.grab(origbox)) frame = cv2.cvtColor(frame, cv2.COLOR_RGBA2BGR) if W is None or H is None: (H, W) = frame.shape[: 2] frame = cv2.UMat(frame) blob = cv2.dnn.blobFromImage(frame, 1 / 260, (150, 150), swapRB=False, crop=False) net.setInput(blob) layerOutputs = net.forward(ln) boxes = [] confidences = [] classIDs = [] for output in layerOutputs: for detection in output: scores = detection[5:] classID = 0 confidence = scores[classID] if confidence > CONFIDENCE: box = detection[0: 4] * np.array([W, H, W, H]) (centerX, centerY, width, height) = box.astype("int") x = int(centerX - (width / 2)) y = int(centerY - (height / 2)) boxes.append([x, y, int(width), int(height)]) confidences.append(float(confidence)) classIDs.append(classID) idxs = cv2.dnn.NMSBoxes(boxes, confidences, CONFIDENCE, THRESHOLD) if len(idxs) > 0: bestMatch = confidences[np.argmax(confidences)] # loop over the indexes we are keeping for i in idxs.flatten(): # extract the bounding box coordinates (x, y) = (boxes[i][0], boxes[i][1]) (w, h) = (boxes[i][2], boxes[i][3]) # draw target dot on the frame cv2.circle(frame, (int(x + w / 2), int(y + h / 5)), 5, (0, 0, 255), -1) # draw a bounding box rectangle and label on the frame # color = [int(c) for c in COLORS[classIDs[i]]] cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 0, 255), 2) text = "TARGET {}%".format(int(confidences[i] * 100)) cv2.putText(frame, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) if bestMatch == confidences[i]: mouseX = origbox[0] + (x + w/1.5) mouseY = origbox[1] + (y + h/5) mouseX = int(round(mouseX)) mouseY = int(round(mouseY)) set_pos_aimbot(mouseX, mouseY) pywinauto.mouse.click(button='left', coords=(mouseX, mouseY)) pywinauto.mouse.click(button='left', coords=(mouseX, mouseY)) pywinauto.mouse.click(button='left', coords=(mouseX, mouseY)) pywinauto.mouse.click(button='left', coords=(mouseX, mouseY)) pywinauto.mouse.click(button='left', coords=(mouseX, mouseY)) pywinauto.mouse.click(button='left', coords=(mouseX, mouseY)) def reward(self, game_state, object_reward_func): with mss() as sct: image = sct.grab(sct.monitors[1]) value = self.get_health(image) print("Health: ") print(value * -1) monitor = {"top": 452, "left": 1000, "width": 144, "height": 51, "mon": 1} image_xp = sct.grab(monitor) xp = self.get_xp(image_xp) monitor_custom_game = {"top": 47, "left": 50, "width": 230, "height": 66, "mon": 1} image_over = sct.grab(monitor_custom_game) over_check = self.is_startregions_over(image_over) reward = 0.0 over = False if over_check: reward = 0.0 self.overs += 1 if self.overs >= 6: print("Game Over") over = True self.overs = 0 else: over = False else: reward = 0.0 if value >= 1: reward += -1.5 elif value < 1: reward += 0.0 if value >= 1 and self.input_non_lethal: reward += 2.5 self.input_non_lethal = False if xp >= 7: reward += 3.0 print("Reward: ") print(reward) return reward, over def after_agent_observe(self): self.environment.episode_step() def before_agent_update(self): self.input_controller.tap_key(KeyboardKey.KEY_ESCAPE) time.sleep(1) def after_agent_update(self): self.input_controller.tap_key(KeyboardKey.KEY_ESCAPE) time.sleep(3)
class SerpentPika2GameAgent(GameAgent): def __init__(self, **kwargs): super().__init__(**kwargs) self.frame_handlers["PLAY"] = self.handle_play self.frame_handler_setups["PLAY"] = self.setup_play self.previous_game_frame = None self.lowerY = np.array([255, 255, 0], np.uint8) self.upperY = np.array([255, 255, 10], np.uint8) self.lowerR = np.array([255, 0, 0], np.uint8) self.upperR = np.array([255, 0, 10], np.uint8) self.game_state = None self._reset_game_state() def setup_key(self): self.input_mapping = { "JUMP": [KeyboardKey.KEY_UP], "RIGHT": [KeyboardKey.KEY_RIGHT], "LEFT": [KeyboardKey.KEY_LEFT], "LEFT JUMP": [KeyboardKey.KEY_LEFT, KeyboardKey.KEY_UP], "RIGHT JUMP": [KeyboardKey.KEY_RIGHT, KeyboardKey.KEY_UP], "HIT": [KeyboardKey.KEY_RETURN], "None": [] } self.key_mapping = { KeyboardKey.KEY_UP: "UP", KeyboardKey.KEY_RIGHT: "RIGHT", KeyboardKey.KEY_DOWN: "DOWN", KeyboardKey.KEY_LEFT: "LEFT", KeyboardKey.KEY_RETURN: "HIT" } self.action_space = KeyboardMouseActionSpace( action=['None', 'HIT'] ) self.move_action_space = KeyboardMouseActionSpace( action=['None', 'JUMP', 'RIGHT', 'LEFT'] ) ''' move_inputs = { "JUMP": [KeyboardKey.KEY_UP], "RIGHT": [KeyboardKey.KEY_RIGHT], "LEFT": [KeyboardKey.KEY_LEFT], "NO_MOVE": [] } attack_inputs = { "Power Hit": [KeyboardKey.KEY_RETURN], "NO_HIT": [] } self.game_inputs = dict() for move_label, attack_label in itertools.product(move_inputs, attack_inputs): label = f"{move_label.ljust(10)}{attack_label}" self.game_inputs[label] = move_inputs[move_label] + attack_inputs[attack_label] print(self.game_inputs) ''' def setup_play(self): #self.cid = 0 self.trainID = 0 self.setup_key() self.frame_process = False self.rewards = list() self.started_at = datetime.now() self.started_at_str = self.started_at.isoformat() self.rainbow_dqn = RainbowDQNAgent( replay_memory_capacity=100000, history=4, discount=0.99, multi_step=3, priority_weight=0.4, priority_exponent=0.5, quantile=True, quantiles=200, atoms=51, v_min=-10, v_max=10, batch_size=32, hidden_size=1024, target_update=10000, save_steps=5000, observe_steps=50000, max_steps=5000000, model='dataset/rainbow_dqn.pth' ) print('Starting Game') self.input_controller.tap_key(KeyboardKey.KEY_RETURN) def getDifference(self, game_frame, previous_game_frame): return game_frame.grayscale_frame - previous_game_frame.grayscale_frame def handle_play(self, game_frame): # append memory data into game state (self.game_state["com_x"], self.game_state["com_y"], self.ai_x, self.ai_y, self.ball_x, self.ball_y, self.com_sc, self.ai_sc, self.col_size, self.game_state["col_x"], self.game_state["col_y"]) = readInfo() self.game_state["ai_x"].appendleft(self.ai_x) self.game_state["ai_y"].appendleft(self.ai_y) self.game_state["ball_x"].appendleft(self.ball_x) self.game_state["ball_y"].appendleft(self.ball_y) self.game_state["ai_score"].appendleft(self.ai_sc) self.game_state["com_score"].appendleft(self.com_sc) self.game_state["col_size"].appendleft(self.col_size) # judge is-in-game by read pixel value (tricky) self.game_frame_img = FrameGrabber.get_frames([0], frame_type="PIPELINE").frames[0].frame if self.game_frame_img[91, 49] != 0.3607843137254902: self.handle_notInGame() else: self.game_state["playing"] = True self.handle_fight(game_frame) def handle_notInGame(self): serpent.utilities.clear_terminal() print('Currently not in game...please wait..') playAnimation(self.game_state["animeIndex"]) self.game_state["animeIndex"] = self.game_state["animeIndex"] + 1 if self.game_state["animeIndex"] < 3 else 0 #print(self.game_frame_img[75:97,47:52]) self.input_controller.tap_key(KeyboardKey.KEY_RETURN) time.sleep(0.3) def handle_fight(self, game_frame): gc.disable() reward = self._calculate_reward() this.rainbow_dqn.observe(reward=reward, terminal=False) game_frame_buffer = FrameGrabber.get_frames([0, 1, 2, 3], frame_type="PIPELINE") movement_keys = self.rainbow_dqn.generate_action({ game_frame_buffer }) # Every 2000 steps, save latest weights to disk if self.rainbow_dqn.current_step % 2000 == 0: self.rainbow_dqn.save_model() run_time = datetime.now() - self.started_at serpent.utilities.clear_terminal() print('') print(Fore.YELLOW) print(Style.BRIGHT) print(f"STARTED AT:{self.started_at_str}") print(f"RUN TIME: {run_time.days} days, {run_time.seconds // 3600} hours, {(run_time.seconds // 60) % 60} minutes, {run_time.seconds % 60} s") print(Style.RESET_ALL) #print("") print(Fore.GREEN) print(Style.BRIGHT) print("MOVEMENT NEURAL NETWORK:\n") self.dqn_move.output_step_data() print("") print("ACTION NEURAL NETWORK:\n") print(Style.RESET_ALL) print(Style.BRIGHT) print(f"CURRENT RUN: {self.game_state['current_run'] }") print("") print(f"CURRENT RUN REWARD: {round(self.game_state['reward'], 4)}") print(f"CURRENT AI SCORE: {self.game_state['ai_score'][0]}") print(f"CURRENT ENEMY SCORE: {self.game_state['com_score'][0]}") print("") print(f"PREDICTED ACTIONS: {self.game_state['run_predicted_actions']}") print(Style.RESET_ALL) print("") print(Fore.GREEN) print(Style.BRIGHT) #print(movement_keys) #print(" + ".join(list(map(lambda k: self.key_mapping.get(k), movement_keys)))) print(" + ".join(list(map(lambda k: self.key_mapping.get(k).ljust(5), movement_keys)))) print(Style.RESET_ALL) print("") print(f"AI: ({self.game_state['ai_x'][0]}, {self.game_state['ai_y'][0]})") print(f"COM: ({self.game_state['com_x']}, {self.game_state['com_y']})") print(f"BALL: ({self.game_state['ball_x'][0]}, {self.game_state['ball_y'][0]})") print(f"Collision: ({self.game_state['col_x']}, {self.game_state['col_y']}, {self.game_state['col_size'][0]})") print(f"Distance: {self.game_state['distance'][0]}") self.input_controller.handle_keys(movement_keys) self.game_state["current_run"] += 1 if self.game_state['ai_score'][0] == 15 or self.game_state['com_score'][0] == 15: # Game over self.game_state["ai_score"].appendleft(0) self.game_state["com_score"].appendleft(0) self.handle_fight_end(game_frame) def handle_fight_end(self, game_frame): self.game_state["playing"] = False self.input_controller.handle_keys([]) self.game_state["current_run"] += 1 self.handle_fight_training(game_frame) def handle_fight_training(self, game_frame): serpent.utilities.clear_terminal() gc.enable() gc.collect() gc.disable() print("TRAIN MODE") self.input_controller.handle_keys([]) self.game_state["run_predicted_actions"] = 0 self.input_controller.tap_key(KeyboardKey.KEY_RETURN) time.sleep(2) def _reset_game_state(self): self.game_state = { "reward": 0, "animeIndex": 0, "current_run": 1, "playing": False, "run_predicted_actions": 0, "ai_x": collections.deque(np.full((4,), 0), maxlen=4), "ai_y": collections.deque(np.full((4,), 0), maxlen=4), "ai_score": collections.deque(np.full((4,), 0), maxlen=4), "ball_x": collections.deque(np.full((4,), 0), maxlen=4), "ball_y": collections.deque(np.full((4,), 0), maxlen=4), "com_score": collections.deque(np.full((4,), 0), maxlen=4), "col_size": collections.deque(np.full((4,), 6), maxlen=4), "com_x": 36, "com_y": 244, "col_x": 0, "col_y": 0, "distance": collections.deque(np.full((20,), 100), maxlen=20), } def _calculate_reward(self): reward = 0 distance = math.sqrt(abs(self.game_state["ai_x"][0] - self.game_state["ball_x"][0])**2 + abs(self.game_state["ai_y"][0] - self.game_state["ball_y"][0])**2) self.game_state["distance"].appendleft(int(distance)) # to make ai move lesser if self.game_state["ai_x"][0] == self.game_state["ai_x"][1]: reward += 0.1 # collision with ball collision = self.game_state["distance"][0] < 80 and self.game_state["distance"][1] < 80 and self.game_state["distance"][2] < 80 and self.game_state["distance"][0] > self.game_state["distance"][1] and self.game_state["distance"][1] < self.game_state["distance"][2] if collision: reward += 0.25 # power hit if self.game_state["col_size"][0] > 0 and self.game_state["distance"][0] < 90 and self.game_state["col_y"] != 272: reward += 0.5 # AI gain score if self.game_state["ai_score"][0] > self.game_state["ai_score"][1]: reward += 1 # Com gain score if self.game_state["com_score"][0] > self.game_state["com_score"][1]: reward += -1 if reward > 1: reward = 1 self.game_state["reward"] = reward return reward