def beat(previous_state, log_filename): """Run a cycle of heartbeat checks to ensure bot is running.""" # Handle case when run_bot.py script is stuck. If yes, kill its process. task_end_time = tasks.get_task_end_time() if psutil and task_end_time and dates.time_has_expired( task_end_time, seconds=tasks.TASK_COMPLETION_BUFFER): # Get absolute path to |run_bot| script. We use this to identify unique # instances of bot running on a particular host. startup_scripts_directory = environment.get_startup_scripts_directory() bot_file_path = os.path.join(startup_scripts_directory, 'run_bot') for process in psutil.process_iter(): try: command_line = ' '.join(process.cmdline()) except (psutil.AccessDenied, psutil.NoSuchProcess, OSError): utils.exc_clear() continue # Find the process running the main bot script. if bot_file_path not in command_line: continue process_id = process.pid logs.log('Killing stale bot (pid %d) which seems to have stuck.' % process_id) try: process_handler.terminate_root_and_child_processes(process_id) except Exception: logs.log_error('Failed to terminate stale bot processes.') # Minor cleanup to avoid disk space issues on bot restart. process_handler.terminate_stale_application_instances() shell.clear_temp_directory() shell.clear_testcase_directories() # Concerned stale processes should be killed. Now, delete the stale task. tasks.track_task_end() # Figure out when the log file was last modified. try: current_state = str(os.path.getmtime(log_filename)) except Exception: current_state = None # Only update the heartbeat if the log file was modified. if current_state and current_state != previous_state: # Try updating the heartbeat. If an error occurs, just # wait and return None. if not data_handler.update_heartbeat(): return None # Heartbeat is successfully updated. return current_state
def cleanup_task_state(): """Cleans state before and after a task is executed.""" # Cleanup stale processes. process_handler.cleanup_stale_processes() # Clear build urls, temp and testcase directories. shell.clear_build_urls_directory() shell.clear_crash_stacktraces_directory() shell.clear_testcase_directories() shell.clear_temp_directory() shell.clear_system_temp_directory() shell.clear_device_temp_directories() # Reset memory tool environment variables. environment.reset_current_memory_tool_options() # Clear exceptions. utils.exc_clear() # Call python's garbage collector. utils.python_gc()
def main(): """Prepare the configuration options and start requesting tasks.""" logs.configure('run_bot') root_directory = environment.get_value('ROOT_DIR') if not root_directory: print( 'Please set ROOT_DIR environment variable to the root of the source ' 'checkout before running. Exiting.') print('For an example, check init.bash in the local directory.') return dates.initialize_timezone_from_environment() environment.set_bot_environment() monitor.initialize() if not profiler.start_if_needed('python_profiler_bot'): sys.exit(-1) fuzzers_init.run() if environment.is_trusted_host(ensure_connected=False): from bot.untrusted_runner import host host.init() if environment.is_untrusted_worker(): # Track revision since we won't go into the task_loop. update_task.track_revision() from bot.untrusted_runner import untrusted as untrusted_worker untrusted_worker.start_server() assert False, 'Unreachable code' while True: # task_loop should be an infinite loop, # unless we run into an exception. error_stacktrace, clean_exit, task_payload = task_loop() # Print the error trace to the console. if not clean_exit: print('Exception occurred while running "%s".' % task_payload) print('-' * 80) print(error_stacktrace) print('-' * 80) should_terminate = (clean_exit or errors.error_in_list( error_stacktrace, errors.BOT_ERROR_TERMINATION_LIST)) if should_terminate: return logs.log_error('Task exited with exception (payload="%s").' % task_payload, error_stacktrace=error_stacktrace) should_hang = errors.error_in_list(error_stacktrace, errors.BOT_ERROR_HANG_LIST) if should_hang: logs.log('Start hanging forever.') while True: # Sleep to avoid consuming 100% of CPU. time.sleep(60) # See if our run timed out, if yes bail out. if data_handler.bot_run_timed_out(): return # Clear the current exception. utils.exc_clear()