def image_sequencer_thread(stop_event: threading.Event) -> None: """ Get the images from img_thread and maintain an updated array seq of the last 5 captured images with a 1/10 secs span between them. Input: - stop_event: threading.Event that will stop the thread Output: """ global back_buffer global seq global key_out global num # Frames per second capture rate capturerate = 10.0 while not stop_event.is_set(): last_time = time.time() seq, num, key_out = ( np.concatenate( (seq[1:], [preprocess_image(np.copy(back_buffer))]), axis=0, ), num + 1, keys_to_output(key_check()), ) waittime = (1.0 / capturerate) - (time.time() - last_time) if waittime > 0.0: time.sleep(waittime)
def _img_thread(self, stop_event: threading.Event): """ Thread that continuously captures the screen :param threading.Event stop_event: Event to stop the thread """ if self.get_controller_input and self.control_mode == "controller": self.controller_reader = XboxControllerReader(total_wait_secs=2) while not stop_event.is_set(): last_time = time.time() self.front_buffer = self.screen_grabber.grab(None) # Swap buffers self.front_buffer, self.back_buffer, self.controller_input = ( self.back_buffer, self.front_buffer, None if not self.get_controller_input else (keys_to_id(key_check()) if self.control_mode == "keyboard" else self.controller_reader.read()), ) self.fps = int(1.0 / (time.time() - last_time)) print("Image capture thread stopped")
def multi_image_sequencer_thread(stop_event: threading.Event, num_sequences: int) -> None: """ Get the images from img_thread and maintain an updated array seq of the last 5 captured images with a 1/10 secs span between them. Input: - stop_event: threading.Event that will stop the thread Output: """ global back_buffer global seq global key_out global num # Frames per second capture rate capturerate: float = 10.0 sequence_delay: float = (1 / capturerate) / float(num_sequences) sequences: ndarray = np.repeat( np.expand_dims( np.asarray( [ np.zeros((270, 480, 3)), np.zeros((270, 480, 3)), np.zeros((270, 480, 3)), np.zeros((270, 480, 3)), np.zeros((270, 480, 3)), ], dtype=np.uint8, ), 0, ), num_sequences, axis=0, ) last_times: ndarray = np.asarray( [sequence_delay * x for x in range(num_sequences)]) first_it: bool = True # Avoid printing delay warning during the first iteration while not stop_event.is_set(): for i in range(num_sequences): waittime: float = last_times[i] + sequence_delay - time.time() if waittime > 0.0: time.sleep(waittime) else: if not first_it: logging.warning( f"{math.fabs(waittime)} delay in the sequence capture, consider reducing num_sequences" ) last_times[i] = time.time() sequences[i] = np.concatenate( (sequences[i][1:], [preprocess_image(np.copy(back_buffer))]), axis=0, ) seq, num, key_out = sequences[i], num + 1, keys_to_output( key_check()) first_it = False
def multi_image_sequencer_thread( stop_event: threading.Event, num_sequences: int ) -> None: """ Get the images from img_thread and maintain an updated array seq of the last 5 captured images with a 1/10 secs span between them. Input: - stop_event: threading.Event that will stop the thread Output: """ global back_buffer global seq global key_out global num # Frames per second capture rate capturerate: float = 10.0 sequence_delay: float = 1.0 / capturerate / num_sequences sequences: np.ndarray = np.repeat( np.expand_dims( np.asarray( [ np.zeros((270, 480, 3)), np.zeros((270, 480, 3)), np.zeros((270, 480, 3)), np.zeros((270, 480, 3)), np.zeros((270, 480, 3)), ], dtype=np.uint8, ), 0, ), num_sequences, axis=0, ) while not stop_event.is_set(): for i in range(num_sequences): start_time: float = time.time() sequences[i][0] = preprocess_image(np.copy(back_buffer)) sequences[i] = sequences[i][[1, 2, 3, 4, 0]] seq, num, key_out = sequences[i], num + 1, keys_to_output(key_check()) waittime: float = sequence_delay - (time.time() - start_time) if waittime > 0: time.sleep(waittime) else: logging.warning( f"{math.fabs(waittime)} delay in the sequence capture, consider reducing num_sequences" )
def run_ted1104( model_dir, enable_evasion: bool, show_current_control: bool, num_parallel_sequences: int = 1, evasion_score=1000, enable_segmentation: bool = False, ) -> None: """ Generate dataset exampled from a human playing a videogame HOWTO: Set your game in windowed mode Set your game to 1600x900 resolution Move the game window to the top left corner, there should be a blue line of 1 pixel in the left bezel of your screen and the window top bar should start in the top bezel of your screen. Let the AI play the game! Controls: Push QE to exit Push L to see the input images Push and hold J to use to use manual control Input: - model_dir: Directory where the model to use is stored (model.bin and model_hyperparameters.json files) - enable_evasion: automatic evasion maneuvers when the car gets stuck somewhere. Note: It adds computation time - show_current_control: Show a window with text that indicates if the car is currently being driven by the AI or a human - num_parallel_sequences: num_parallel_sequences to record, is the number is larger the recorded sequence of images will be updated faster and the model will use more recent images as well as being able to do more iterations per second. However if num_parallel_sequences is too high it wont be able to update the sequences with 1/10 secs between images (default capturate to generate training examples). -evasion_score: Mean squared error value between images to activate the evasion maneuvers -enable_segmentation: Image segmentation will be performed using a pretrained model. Cars, persons, bikes.. will be highlighted to help the model to identify them. Output: """ show_what_ai_sees: bool = False fp16: bool model: TEDD1104 model, fp16 = load_model(save_dir=model_dir, device=device) if enable_segmentation: image_segmentation = ImageSegmentation( model_name="fcn_resnet101", device=device, fp16=fp16 ) else: image_segmentation = None transform = transforms.Compose( [ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ] ) model.eval() stop_recording: threading.Event = threading.Event() th_img: threading.Thread = threading.Thread( target=screen_recorder.img_thread, args=[stop_recording] ) th_seq: threading.Thread = threading.Thread( target=screen_recorder.multi_image_sequencer_thread, args=[stop_recording, num_parallel_sequences], ) th_img.setDaemon(True) th_seq.setDaemon(True) th_img.start() # Wait to launch the image_sequencer_thread, it needs the img_thread to be running time.sleep(5) th_seq.start() if show_current_control: root = Tk() var = StringVar() var.set("T.E.D.D. 1104 Driving") l = Label(root, textvariable=var, fg="green", font=("Courier", 44)) l.pack() last_time: float = time.time() model_prediction: np.ndarray = np.asarray([0]) score: np.float = np.float(0) last_num: int = 0 while True: while ( last_num == screen_recorder.num ): # Don't run the same sequence again, the resulted key will be the same time.sleep(0.0001) last_num = screen_recorder.num init_copy_time: float = time.time() if enable_segmentation: img_seq: np.ndarray = image_segmentation.add_segmentation( np.copy(screen_recorder.seq) ) else: img_seq: np.ndarray = np.copy(screen_recorder.seq) keys = key_check() if "J" not in keys: X = torch.stack( ( transform(img_seq[0] / 255.0).half(), transform(img_seq[1] / 255.0).half(), transform(img_seq[2] / 255.0).half(), transform(img_seq[3] / 255.0).half(), transform(img_seq[4] / 255.0).half(), ), dim=0, ).to(device) if fp16: with autocast(): model_prediction: torch.tensor = model.predict(X).cpu().numpy() else: model_prediction: torch.tensor = model.predict(X).cpu().numpy() select_key(int(model_prediction[0])) key_push_time: float = time.time() if show_current_control: var.set("T.E.D.D. 1104 Driving") l.config(fg="green") root.update() if enable_evasion: score = mse(img_seq[0], img_seq[4]) if score < evasion_score: if show_current_control: var.set("Evasion maneuver") l.config(fg="blue") root.update() select_key(4) time.sleep(1) if np.random.rand() > 0.5: select_key(6) else: select_key(8) time.sleep(0.2) if show_current_control: var.set("T.E.D.D. 1104 Driving") l.config(fg="green") root.update() else: if show_current_control: var.set("Manual Control") l.config(fg="red") root.update() key_push_time: float = 0.0 if show_what_ai_sees: cv2.imshow("window1", img_seq[0]) cv2.waitKey(1) cv2.imshow("window2", img_seq[1]) cv2.waitKey(1) cv2.imshow("window3", img_seq[2]) cv2.waitKey(1) cv2.imshow("window4", img_seq[3]) cv2.waitKey(1) cv2.imshow("window5", img_seq[4]) cv2.waitKey(1) if "Q" in keys and "E" in keys: print("\nStopping...") stop_recording.set() th_seq.join() th_img.join() if show_what_ai_sees: cv2.destroyAllWindows() break if "L" in keys: time.sleep(0.1) # Wait for key release if show_what_ai_sees: cv2.destroyAllWindows() show_what_ai_sees = False else: show_what_ai_sees = True time_it: float = time.time() - last_time print( f"Recording at {screen_recorder.fps} FPS\n" f"Actions per second {None if time_it==0 else 1/time_it}\n" f"Reaction time: {round(key_push_time-init_copy_time,3) if key_push_time>0 else 0} secs\n" f"Key predicted by nn: {key_press(int(model_prediction[0]))}\n" f"Difference from img 1 to img 5 {None if not enable_evasion else score}\n" f"Push QE to exit\n" f"Push L to see the input images\n" f"Push J to use to use manual control\n", end="\r", ) last_time = time.time()
def run_ted1104( checkpoint_path: str, enable_evasion: bool, show_current_control: bool, num_parallel_sequences: int = 2, width: int = 1600, height: int = 900, full_screen: bool = False, evasion_score=1000, control_mode: str = "keyboard", enable_segmentation: str = False, dtype=torch.float32, ) -> None: """ Run TEDD1104 model in Real-Time inference HOWTO: - If you play in windowed mode move the game window to the top left corner of the primary screen. - If you play in full screen mode, set the full_screen parameter to True. - Set your game to width x height resolution specified in the parameters. - If you TEDD1104 to use the keyboard for controlling the game set the control_mode parameter to "keyboard". - If you TEDD1104 to use an vXbox Controller for controlling the game set the control_mode parameter to "controller". - Run the script and let TEDD1104 Play the game! - Detailed instructions can be found in the README.md file. :param str checkpoint_path: Path to the model checkpoint file. :param bool enable_evasion: Enable evasion, if the vehicle gets stuck we will reverse and randomly turn left/right. :param bool show_current_control: Show if TEDD or the user is driving in the screen . :param int num_parallel_sequences: Number of sequences to run in parallel. :param int width: Width of the game window. :param int height: Height of the game window. :param bool full_screen: If the game is played in full screen mode. :param int evasion_score: Threshold to trigger the evasion. :param str control_mode: Device that TEDD will use from driving "keyboard" or "controller" (xbox controller). :param bool enable_segmentation: Experimental. Enable segmentation using segformer (It will only apply segmentation to the images displayed to the user if you push the "L" key). Requires huggingface transformers to be installed (https://huggingface.co/docs/transformers/index). Very GPU demanding! :param dtype: Data type to use for the model. BF16 is only supported on Nvidia Ampere GPUs and requires PyTorch 1.10 or higher. """ assert control_mode in [ "keyboard", "controller", ], f"{control_mode} control mode not supported. Supported dataset types: [keyboard, controller]. " if control_mode == "controller" and not _controller_available: raise ModuleNotFoundError( f"Controller emulation not available see controller/setup.md for more info." ) show_what_ai_sees: bool = False fp16: bool model = Tedd1104ModelPL.load_from_checkpoint( checkpoint_path=checkpoint_path ) # hparams_file=hparams_path model.eval() model.to(dtype=dtype, device=device) image_segformer = None if enable_segmentation: from segmentation.segmentation_segformer import ImageSegmentation image_segformer = ImageSegmentation(device=device) if control_mode == "controller": xbox_controller: Optional[XboxControllerEmulator] = XboxControllerEmulator() else: xbox_controller = None transform = transforms.Compose( [ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ] ) img_sequencer = ImageSequencer( width=width, height=height, full_screen=full_screen, get_controller_input=False, num_sequences=num_parallel_sequences, total_wait_secs=5, ) if show_current_control: root = Tk() var = StringVar() var.set("T.E.D.D. 1104 Driving") text_label = Label(root, textvariable=var, fg="green", font=("Courier", 44)) text_label.pack() else: root = None var = None text_label = None last_time: float = time.time() score: np.float = np.float(0) last_num: int = 5 # The image sequence starts with images containing zeros, wait until it is filled close_app: bool = False model_prediction = np.zeros(3 if control_mode == "controller" else 1) lt: float = 0 rt: float = 0 lx: float = 0 while not close_app: try: while last_num == img_sequencer.num_sequence: time.sleep(0.01) last_num = img_sequencer.num_sequence img_seq, _ = img_sequencer.get_sequence() init_copy_time: float = time.time() keys = key_check() if "J" not in keys: x: torch.tensor = torch.stack( ( transform(img_seq[0] / 255.0), transform(img_seq[1] / 255.0), transform(img_seq[2] / 255.0), transform(img_seq[3] / 255.0), transform(img_seq[4] / 255.0), ), dim=0, ).to(device=device, dtype=dtype) with torch.no_grad(): model_prediction: torch.tensor = ( model(x, output_mode=control_mode, return_best=True)[0] .cpu() .numpy() ) if control_mode == "controller": if model_prediction[1] > 0: rt = min(1.0, float(model_prediction[1])) * 2 - 1 lt = -1 else: rt = -1 lt = min(1.0, math.fabs(float(model_prediction[1]))) * 2 - 1 lx = max(-1.0, min(1.0, float(model_prediction[0]))) xbox_controller.set_controller_state( lx=lx, lt=lt, rt=rt, ) else: select_key(model_prediction) key_push_time: float = time.time() if show_current_control: var.set("T.E.D.D. 1104 Driving") text_label.config(fg="green") root.update() if enable_evasion: score = mse(img_seq[0], img_seq[4]) if score < evasion_score: if show_current_control: var.set("Evasion maneuver") text_label.config(fg="blue") root.update() if control_mode == "controller": xbox_controller.set_controller_state(lx=0, lt=1.0, rt=-1.0) time.sleep(1) if np.random.rand() > 0.5: xbox_controller.set_controller_state( lx=1.0, lt=0.0, rt=-1.0 ) else: xbox_controller.set_controller_state( lx=-1.0, lt=0.0, rt=-1.0 ) time.sleep(0.2) else: select_key(4) time.sleep(1) if np.random.rand() > 0.5: select_key(6) else: select_key(8) time.sleep(0.2) if show_current_control: var.set("T.E.D.D. 1104 Driving") text_label.config(fg="green") root.update() else: if show_current_control: var.set("Manual Control") text_label.config(fg="red") root.update() if control_mode == "controller": xbox_controller.set_controller_state(lx=0.0, lt=-1, rt=-1.0) key_push_time: float = 0.0 if show_what_ai_sees: if enable_segmentation: img_seq = image_segformer.add_segmentation(images=img_seq) cv2.imshow("window1", img_seq[0]) cv2.waitKey(1) cv2.imshow("window2", img_seq[1]) cv2.waitKey(1) cv2.imshow("window3", img_seq[2]) cv2.waitKey(1) cv2.imshow("window4", img_seq[3]) cv2.waitKey(1) cv2.imshow("window5", img_seq[4]) cv2.waitKey(1) if "L" in keys: time.sleep(0.1) # Wait for key release if show_what_ai_sees: cv2.destroyAllWindows() show_what_ai_sees = False else: show_what_ai_sees = True time_it: float = time.time() - last_time if control_mode == "controller": info_message = ( f"LX: {int(model_prediction[0] * 100)}%" f"\n LT: {int(lt * 100)}%\n" f"RT: {int(rt * 100)}%" ) else: info_message = f"Predicted Key: {id_to_key(model_prediction)}" print( f"Recording at {img_sequencer.screen_recorder.fps} FPS\n" f"Actions per second {None if time_it == 0 else 1 / time_it}\n" f"Reaction time: {round(key_push_time - init_copy_time, 3) if key_push_time > 0 else 0} secs\n" f"{info_message}\n" f"Difference from img 1 to img 5 {None if not enable_evasion else score}\n" f"Push Ctrl + C to exit\n" f"Push L to see the input images\n" f"Push J to use to use manual control\n", end="\r", ) last_time = time.time() except KeyboardInterrupt: print() img_sequencer.stop() if control_mode == "controller": xbox_controller.stop() close_app = True
def generate_dataset( output_dir: str, num_training_examples_per_file: int, use_probability: bool = True ) -> None: """ Generate dataset exampled from a human playing a videogame HOWTO: Set your game in windowed mode Set your game to 1600x900 resolution Move the game window to the top left corner, there should be a blue line of 1 pixel in the left bezel of your screen and the window top bar should start in the top bezel of your screen. Play the game! The program will capture your screen and generate the training examples. There will be saved as files named "training_dataX.npz" (numpy compressed array). Don't worry if you re-launch this script, the program will search for already existing dataset files in the directory and it won't overwrite them. Input: - output_dir: Directory where the training files will be saved - num_training_examples_per_file: Number of training examples per output file - use_probability: Use probability to generate a balanced dataset. Each example will have a probability that depends on the number of instances with the same key combination in the dataset. Output: """ training_data: list = [] stop_recording: threading.Event = threading.Event() th_img: threading.Thread = threading.Thread( target=screen_recorder.img_thread, args=[stop_recording] ) th_seq: threading.Thread = threading.Thread( target=screen_recorder.image_sequencer_thread, args=[stop_recording] ) th_img.start() # Wait to launch the image_sequencer_thread, it needs the img_thread to be running time.sleep(1) th_seq.start() number_of_files: int = get_last_file_num(output_dir) + 1 total_examples_in_dataset: int = ( number_of_files * num_training_examples_per_file ) + number_of_files time.sleep(4) last_num: int = 5 # The image sequence starts with images containing zeros, wait until it is filled with real images number_of_keys = np.asarray([0, 0, 0, 0, 0, 0, 0, 0, 0]) while True: while last_num == screen_recorder.num: time.sleep(0.01) last_num = screen_recorder.num img_seq, output = screen_recorder.seq.copy(), screen_recorder.key_out.copy() print( f"Recording at {screen_recorder.fps} FPS\n" f"Images in sequence {len(img_seq)}\n" f"Training data len {total_examples_in_dataset - number_of_files} sequences\n" f"Number of archives {number_of_files}\n" f"Keys pressed: {output}\n" f"Keys samples recorded: " f"None: {str(number_of_keys[0])} " f"A: {str(number_of_keys[1])} " f"D {str(number_of_keys[2])} " f"W {str(number_of_keys[3])} " f"S {str(number_of_keys[4])} " f"AW {str(number_of_keys[5])} " f"AS {str(number_of_keys[6])} " f"WD {str(number_of_keys[7])} " f"SD {str(number_of_keys[8])}\n" f"Push QE to exit\n", end="\r", ) key = counter_keys(output) if key != -1: if use_probability: total = np.sum(number_of_keys) key_num = number_of_keys[key] if total != 0: prop = ((total - key_num) / total) ** 2 if prop < 0.5: prop = 0.1 else: prop = 1.0 if np.random.rand() <= prop: number_of_keys[key] += 1 total_examples_in_dataset += 1 training_data.append( [ img_seq[0], img_seq[1], img_seq[2], img_seq[3], img_seq[4], output, ] ) else: number_of_keys[key] += 1 total_examples_in_dataset += 1 training_data.append( [img_seq[0], img_seq[1], img_seq[2], img_seq[3], img_seq[4], output] ) keys = key_check() if "Q" in keys and "E" in keys: print("\nStopping...") stop_recording.set() save_thread = threading.Thread( target=save_data, args=(output_dir, training_data.copy(), number_of_files,), ) save_thread.start() th_seq.join() th_img.join() save_thread.join() break if total_examples_in_dataset % num_training_examples_per_file == 0: threading.Thread( target=save_data, args=(output_dir, training_data.copy(), number_of_files,), ).start() number_of_files += 1 training_data = [] total_examples_in_dataset += 1
def run_TED1104(model_dir, enable_evasion, show_current_control, evasion_score=1000) -> None: """ Generate dataset exampled from a human playing a videogame HOWTO: Set your game in windowed mode Set your game to 1600x900 resolution Move the game window to the top left corner, there should be a blue line of 1 pixel in the left bezel of your screen and the window top bar should start in the top bezel of your screen. Let the AI play the game! Controls: Push QE to exit Push L to see the input images Push and hold J to use to use manual control Input: - model_dir: Directory where the model to use is stored (model.bin and model_hyperparameters.json files) - enable_evasion: automatic evasion maneuvers when the car gets stuck somewhere. Note: It adds computation time - show_current_control: Show a window with text that indicates if the car is currently being driven by the AI or a human -evasion_score: Mean squared error value between images to activate the evasion maneuvers Output: """ show_what_ai_sees: bool = False fp16: bool model: TEDD1104 model, fp16 = load_model(model_dir, device) model.eval() stop_recording: threading.Event = threading.Event() th_img: threading.Thread = threading.Thread( target=screen_recorder.img_thread, args=[stop_recording]) th_seq: threading.Thread = threading.Thread( target=screen_recorder.image_sequencer_thread, args=[stop_recording]) th_img.start() # Wait to launch the image_sequencer_thread, it needs the img_thread to be running time.sleep(5) th_seq.start() if show_current_control: root = Tk() var = StringVar() var.set("T.E.D.D. 1104 Driving") l = Label(root, textvariable=var, fg="green", font=("Courier", 44)) l.pack() last_time: float = time.time() model_prediction: np.ndarray = np.asarray([0]) score: np.float = np.float(0) while True: img_seq: np.ndarray = screen_recorder.seq.copy() keys = key_check() if not "J" in keys: X: torch.Tensor = torch.from_numpy( reshape_x(np.array([img_seq]), fp=16 if fp16 else 32)) model_prediction = model.predict(X.to(device)).cpu().numpy() select_key(int(model_prediction[0])) if show_current_control: var.set("T.E.D.D. 1104 Driving") l.config(fg="green") root.update() if enable_evasion: score = mse(img_seq[0], img_seq[4]) if score < evasion_score: if show_current_control: var.set("Evasion maneuver") l.config(fg="blue") root.update() select_key(4) time.sleep(1) if np.random.rand() > 0.5: select_key(6) else: select_key(8) time.sleep(0.2) if show_current_control: var.set("T.E.D.D. 1104 Driving") l.config(fg="green") root.update() else: if show_current_control: var.set("Manual Control") l.config(fg="red") root.update() if show_what_ai_sees: cv2.imshow("window1", img_seq[0]) cv2.imshow("window2", img_seq[1]) cv2.imshow("window3", img_seq[2]) cv2.imshow("window4", img_seq[3]) cv2.imshow("window5", img_seq[4]) if "Q" in keys and "E" in keys: print("\nStopping...") stop_recording.set() th_seq.join() th_img.join() if show_what_ai_sees: cv2.destroyAllWindows() break if "L" in keys: time.sleep(0.1) # Wait for key release if show_what_ai_sees: cv2.destroyAllWindows() show_what_ai_sees = False else: show_what_ai_sees = True time_it = time.time() - last_time print( f"Recording at {screen_recorder.fps} FPS\n" f"Actions per second {None if time_it==0 else 1/time_it}\n" f"Key predicted by nn: {key_press(model_prediction[0])}\n" f"Difference from img 1 to img 5 {None if not enable_evasion else score}\n" f"Push QE to exit\n" f"Push L to see the input images\n" f"Push J to use to use manual control\n", end="\r", ) last_time = time.time()