def init(): """Initialization function before management loops.""" global init_complete init_runners() init_governors() init_runs() if runtime.running_all_in_one: start_runner_process() elif not AnarchyRunner.get('default'): init_default_runner() init_complete = True operator_logger.debug("Completed init")
def start_runner_process(): ''' Start anarchy-runner process for running in all-in-one pod. ''' operator_logger.info('Starting all-in-one runner') default_runner = AnarchyRunner.get('default') if not default_runner: default_runner = AnarchyRunner.register(AnarchyRunner.default_runner_definition(runtime)) env = os.environ.copy() env['ANARCHY_COMPONENT'] = 'runner' env['ANARCHY_URL'] = 'http://{}:5000'.format(runtime.anarchy_service_name) env['RUNNER_NAME'] = 'default' env['RUNNER_TOKEN'] = default_runner.runner_token subprocess.Popen(['/opt/app-root/src/.s2i/bin/run'], env=env)
def main_loop(): while True: with runtime.is_active_condition: while not runtime.is_active: runtime.is_active_condition.wait() if runtime.running_all_in_one: start_runner_process() elif not AnarchyRunner.get('default'): init_default_runner() while runtime.is_active: AnarchyAction.start_actions(runtime) time.sleep(1)
def check_runner_auth(auth_header): """ Verify bearer token sent by anarchy runner in API call. """ match = re.match(r'Bearer ([^:]+):([^:]+):(.*)', auth_header) if not match: return None, None runner_name = match.group(1) pod_name = match.group(2) runner_token = match.group(3) anarchy_runner = AnarchyRunner.get(runner_name) if not anarchy_runner: operator_logger.warning('Failed auth for unknown AnarchyRunner %s %s', runner_name, pod_name) return None, None runner_pod = anarchy_runner.pods.get(pod_name) if not runner_pod: operator_logger.warning( 'Failed auth for AnarchyRunner %s %s, unknown pod', runner_name, pod_name) return None, None pod_runner_token = None if runtime.running_all_in_one: pod_runner_token = anarchy_runner.runner_token else: for env_var in runner_pod.spec.containers[0].env: if env_var.name == 'RUNNER_TOKEN': pod_runner_token = env_var.value break if not pod_runner_token: operator_logger.warning( 'Failed auth for AnarchyRunner %s %s, cannot find RUNNER_TOKEN', runner_name, pod_name) return None, None if pod_runner_token == runner_token: return anarchy_runner, runner_pod operator_logger.warning('Invalid auth token for AnarchyRunner %s %s', runner_name, runner_pod) return None, None
def check_runner_auth(auth_header): match = re.match(r'Bearer ([^:]+):([^:]+):(.*)', auth_header) if not match: return None, None runner_name = match.group(1) runner_pod = match.group(2) runner_token = match.group(3) anarchy_runner = AnarchyRunner.get(runner_name) if not anarchy_runner: operator_logger.warning('Failed auth for unknown AnarchyRunner %s %s', runner_name, runner_pod) return None, None elif anarchy_runner.runner_token != runner_token: operator_logger.warning('Invalid auth token for AnarchyRunner %s %s', runner_name, runner_pod) operator_logger.warning('%s %s', anarchy_runner.runner_token, runner_token) return None, None return anarchy_runner, runner_pod
def main_loop(): last_cleanup = 0 last_run_check = 0 last_runner_check = 0 while True: with runtime.is_active_condition: while not runtime.is_active: runtime.is_active_condition.wait() if runtime.running_all_in_one: start_runner_process() elif not AnarchyRunner.get('default'): init_default_runner() while runtime.is_active: AnarchyAction.start_actions(runtime) if cleanup_interval < time.time() - last_cleanup: try: AnarchyGovernor.cleanup(runtime) last_cleanup = time.time() except: operator_logger.exception( 'Error in AnarchyGovernor.cleanup!') if run_check_interval < time.time() - last_run_check: try: AnarchyRun.manage_active_runs(runtime) last_run_check = time.time() except: operator_logger.exception( 'Error in AnarchyRun.manage_active_runs!') if runner_check_interval < time.time() - last_runner_check: try: AnarchyRunner.manage_runners(runtime) last_runner_check = time.time() except: operator_logger.exception( 'Error in AnarchyRunner.manage_runners!') time.sleep(1)
def manage(self, runtime): runner_label_value = self.get_runner_label_value(runtime) if runner_label_value == 'pending': pass elif runner_label_value == 'queued': pass elif runner_label_value == 'failed': if self.retry_after_datetime < datetime.utcnow(): self.set_to_pending(runtime) elif '.' in runner_label_value: # Running, assigned to a runner pod runner_name, runner_pod_name = runner_label_value.split('.') runner = AnarchyRunner.get(runner_name) if runner: if runner.pods.get(runner_pod_name): pass # FIXME - Timeout? else: self.handle_lost_runner(runtime.runner_label, runtime) else: operator_logger.warning( 'Unable to find AnarchyRunner %s for AnarchyRun %s', runner_name, self.name)