def subscribe(actor_id, worker_ch): """ Main loop for the Actor executor worker. Subscribes to the actor's inbox and executes actor containers when message arrive. Also subscribes to the worker channel for future communications. :return: """ actor_ch = ActorMsgChannel(actor_id) t = threading.Thread(target=process_worker_ch, args=(worker_ch, actor_id, actor_ch)) t.start() print("Worker subscribing to actor channel...") while keep_running: update_worker_status(actor_id, worker_ch.name, READY) try: msg = actor_ch.get(timeout=2) except channelpy.ChannelTimeoutException: continue print("Received message {}. Starting actor container...".format(str(msg))) message = msg.pop("msg", "") try: stats, logs = execute_actor(actor_id, worker_ch, image, message, msg) except DockerStartContainerError as e: print("Got DockerStartContainerError: {}".format(str(e))) Actor.set_status(actor_id, ERROR) continue # add the execution to the actor store print("Actor container finished successfully. Got stats object:{}".format(str(stats))) exc_id = Execution.add_execution(actor_id, stats) Execution.set_logs(exc_id, logs)
def subscribe(tenant, actor_id, worker_id, api_server, client_id, client_secret, access_token, refresh_token, worker_ch): """ Main loop for the Actor executor worker. Subscribes to the actor's inbox and executes actor containers when message arrive. Also subscribes to the worker channel for future communications. :return: """ logger.debug("Top of subscribe().") actor_ch = ActorMsgChannel(actor_id) try: leave_containers = Config.get('workers', 'leave_containers') except configparser.NoOptionError: leave_containers = False ag = None if api_server and client_id and client_secret and access_token and refresh_token: logger.info("Creating agave client.") verify = get_tenant_verify(tenant) ag = Agave(api_server=api_server, token=access_token, refresh_token=refresh_token, api_key=client_id, api_secret=client_secret, verify=verify) else: logger.info("Not creating agave client.") logger.info("Starting the process worker channel thread.") t = threading.Thread(target=process_worker_ch, args=(tenant, worker_ch, actor_id, worker_id, actor_ch, ag)) t.start() logger.info("Worker subscribing to actor channel.") update_worker_status = True global keep_running while keep_running: if update_worker_status: Worker.update_worker_status(actor_id, worker_id, READY) update_worker_status = False try: msg = actor_ch.get(timeout=2) except channelpy.ChannelTimeoutException: continue except channelpy.ChannelClosedException: logger.info("Channel closed, worker exiting...") keep_running = False sys.exit() update_worker_status = True logger.info( "Received message {}. Starting actor container...".format(msg)) # the msg object is a dictionary with an entry called message and an arbitrary # set of k:v pairs coming in from the query parameters. message = msg.pop('message', '') actor = Actor.from_db(actors_store[actor_id]) execution_id = msg['_abaco_execution_id'] content_type = msg['_abaco_Content_Type'] mounts = actor.mounts logger.debug("actor mounts: {}".format(mounts)) # for binary data, create a fifo in the configured directory. The configured # fifo_host_path_dir is equal to the fifo path in the worker container: fifo_host_path = None if content_type == 'application/octet-stream': try: fifo_host_path_dir = Config.get('workers', 'fifo_host_path_dir') except (configparser.NoSectionError, configparser.NoOptionError): logger.error( "No fifo_host_path configured. Cannot manage binary data.") Actor.set_status( actor_id, ERROR, msg="Abaco instance not configured for binary data.") continue fifo_host_path = os.path.join(fifo_host_path_dir, worker_id, execution_id) logger.info("Create fifo at path: {}".format(fifo_host_path)) try: os.mkfifo(fifo_host_path) except Exception as e: logger.error( "Could not create fifo_path. Exception: {}".format(e)) raise e # add the fifo as a mount: mounts.append({ 'host_path': fifo_host_path, 'container_path': '/_abaco_binary_data', 'format': 'ro' }) # the execution object was created by the controller, but we need to add the worker id to it now that we # know which worker will be working on the execution. logger.debug("Adding worker_id to execution.") Execution.add_worker_id(actor_id, execution_id, worker_id) # privileged dictates whether the actor container runs in privileged mode and if docker daemon is mounted. privileged = False if type(actor['privileged']) == bool and actor['privileged']: privileged = True logger.debug("privileged: {}".format(privileged)) # retrieve the default environment registered with the actor. environment = actor['default_environment'] logger.debug("Actor default environment: {}".format(environment)) # construct the user field from the actor's uid and gid: user = get_container_user(actor) logger.debug("Final user valiue: {}".format(user)) # overlay the default_environment registered for the actor with the msg # dictionary environment.update(msg) environment['_abaco_access_token'] = '' environment['_abaco_actor_dbid'] = actor_id environment['_abaco_actor_id'] = actor.id environment['_abaco_actor_state'] = actor.state logger.debug("Overlayed environment: {}".format(environment)) # if we have an agave client, get a fresh set of tokens: if ag: try: ag.token.refresh() token = ag.token.token_info['access_token'] environment['_abaco_access_token'] = token logger.info( "Refreshed the tokens. Passed {} to the environment.". format(token)) except Exception as e: logger.error( "Got an exception trying to get an access token: {}". format(e)) else: logger.info( "Agave client `ag` is None -- not passing access token.") logger.info("Passing update environment: {}".format(environment)) try: stats, logs, final_state, exit_code, start_time = execute_actor( actor_id, worker_id, worker_ch, image, message, user, environment, privileged, mounts, leave_containers, fifo_host_path) except DockerStartContainerError as e: logger.error("Got DockerStartContainerError: {}".format(e)) Actor.set_status(actor_id, ERROR, "Error executing container: {}".format(e)) continue # Add the completed stats to the execution logger.info( "Actor container finished successfully. Got stats object:{}". format(str(stats))) Execution.finalize_execution(actor_id, execution_id, COMPLETE, stats, final_state, exit_code, start_time) logger.info("Added execution: {}".format(execution_id)) # Add the logs to the execution Execution.set_logs(execution_id, logs) logger.info("Added execution logs.") # Update the worker's last updated and last execution fields: Worker.update_worker_execution_time(actor_id, worker_id) logger.info("worker time stamps updated.")
def subscribe(tenant, actor_id, api_server, client_id, client_secret, access_token, refresh_token, worker_ch): """ Main loop for the Actor executor worker. Subscribes to the actor's inbox and executes actor containers when message arrive. Also subscribes to the worker channel for future communications. :return: """ actor_ch = ActorMsgChannel(actor_id) ag = None if api_server and client_id and client_secret and access_token and refresh_token: ag = Agave(api_server=api_server, token=access_token, refresh_token=refresh_token, api_key=client_id, api_secret=client_secret) else: print("Not creating agave client.") t = threading.Thread(target=process_worker_ch, args=(tenant, worker_ch, actor_id, actor_ch, ag)) t.start() print("Worker subscribing to actor channel...") global keep_running while keep_running: Worker.update_worker_status(actor_id, worker_ch.name, READY) try: msg = actor_ch.get(timeout=2) except channelpy.ChannelTimeoutException: continue except channelpy.ChannelClosedException: print("Channel closed, worker exiting...") keep_running = False sys.exit() print("Received message {}. Starting actor container...".format(str(msg))) # the msg object is a dictionary with an entry called message and an arbitrary # set of k:v pairs coming in from the query parameters. message = msg.pop('message', '') actor = Actor.from_db(actors_store[actor_id]) execution_id = msg['_abaco_execution_id'] privileged = False if actor['privileged'] == 'TRUE': privileged = True environment = actor['default_environment'] print("Actor default environment: {}".format(environment)) print("Actor privileged: {}".format(privileged)) # overlay the default_environment registered for the actor with the msg # dictionary environment.update(msg) environment['_abaco_access_token'] = '' # if we have an agave client, get a fresh set of tokens: if ag: try: ag.token.refresh() token = ag.token.token_info['access_token'] environment['_abaco_access_token'] = token print("Refreshed the tokens. Passed {} to the environment.".format(token)) except Exception as e: print("Got an exception trying to get an access token: {}".format(e)) else: print("Agave client `ag` is None -- not passing access token.") print("Passing update environment: {}".format(environment)) try: stats, logs = execute_actor(actor_id, worker_ch, image, message, environment, privileged) except DockerStartContainerError as e: print("Got DockerStartContainerError: {}".format(str(e))) Actor.set_status(actor_id, ERROR) continue # add the execution to the actor store print("Actor container finished successfully. Got stats object:{}".format(str(stats))) Execution.finalize_execution(actor_id, execution_id, COMPLETE, stats) print("Added execution: {}".format(execution_id)) Execution.set_logs(execution_id, logs) Worker.update_worker_execution_time(actor_id, worker_ch.name)
def subscribe(tenant, actor_id, worker_id, api_server, client_id, client_secret, access_token, refresh_token, worker_ch): """ Main loop for the Actor executor worker. Subscribes to the actor's inbox and executes actor containers when message arrive. Also subscribes to the worker channel for future communications. :return: """ logger.debug("Top of subscribe().") actor_ch = ActorMsgChannel(actor_id) ag = None if api_server and client_id and client_secret and access_token and refresh_token: logger.info("Creating agave client.") ag = Agave(api_server=api_server, token=access_token, refresh_token=refresh_token, api_key=client_id, api_secret=client_secret) else: logger.info("Not creating agave client.") logger.info("Starting the process worker channel thread.") t = threading.Thread(target=process_worker_ch, args=(tenant, worker_ch, actor_id, worker_id, actor_ch, ag)) t.start() logger.info("Worker subscribing to actor channel.") update_worker_status = True global keep_running while keep_running: if update_worker_status: Worker.update_worker_status(actor_id, worker_id, READY) update_worker_status = False try: msg = actor_ch.get(timeout=2) except channelpy.ChannelTimeoutException: continue except channelpy.ChannelClosedException: logger.info("Channel closed, worker exiting...") keep_running = False sys.exit() update_worker_status = True logger.info( "Received message {}. Starting actor container...".format(msg)) # the msg object is a dictionary with an entry called message and an arbitrary # set of k:v pairs coming in from the query parameters. message = msg.pop('message', '') actor = Actor.from_db(actors_store[actor_id]) execution_id = msg['_abaco_execution_id'] # the execution object was created by the controller, but we need to add the worker id to it now that we # know which worker will be working on the execution. logger.debug("Adding worker_id to execution.") Execution.add_worker_id(actor_id, execution_id, worker_id) # privileged dictates whether the actor container runs in privileged mode and if docker daemon is mounted. privileged = False if actor['privileged'] == 'TRUE': privileged = True logger.debug("privileged: {}".format(privileged)) # retrieve the default environment registered with the actor. environment = actor['default_environment'] logger.debug("Actor default environment: {}".format(environment)) # overlay the default_environment registered for the actor with the msg # dictionary environment.update(msg) environment['_abaco_access_token'] = '' environment['_abaco_actor_dbid'] = actor_id environment['_abaco_actor_id'] = actor.id environment['_abaco_actor_state'] = actor.state logger.debug("Overlayed environment: {}".format(environment)) # if we have an agave client, get a fresh set of tokens: if ag: try: ag.token.refresh() token = ag.token.token_info['access_token'] environment['_abaco_access_token'] = token logger.info( "Refreshed the tokens. Passed {} to the environment.". format(token)) except Exception as e: logger.error( "Got an exception trying to get an access token: {}". format(e)) else: logger.info( "Agave client `ag` is None -- not passing access token.") logger.info("Passing update environment: {}".format(environment)) try: stats, logs, final_state, exit_code = execute_actor( actor_id, worker_id, worker_ch, image, message, environment, privileged) except DockerStartContainerError as e: logger.error("Got DockerStartContainerError: {}".format(str(e))) Actor.set_status(actor_id, ERROR) continue # Add the completed stats to the execution logger.info( "Actor container finished successfully. Got stats object:{}". format(str(stats))) Execution.finalize_execution(actor_id, execution_id, COMPLETE, stats, final_state, exit_code) logger.info("Added execution: {}".format(execution_id)) # Add the logs to the execution Execution.set_logs(execution_id, logs) logger.info("Added execution logs.") # Update the worker's last updated and last execution fields: Worker.update_worker_execution_time(actor_id, worker_id) logger.info("worker time stamps updated.")
def subscribe(tenant, actor_id, worker_id, api_server, client_id, client_secret, access_token, refresh_token, worker_ch): """ Main loop for the Actor executor worker. Subscribes to the actor's inbox and executes actor containers when message arrive. Also subscribes to the worker channel for future communications. :return: """ actor_ch = ActorMsgChannel(actor_id) ag = None if api_server and client_id and client_secret and access_token and refresh_token: ag = Agave(api_server=api_server, token=access_token, refresh_token=refresh_token, api_key=client_id, api_secret=client_secret) else: print("Not creating agave client.") t = threading.Thread(target=process_worker_ch, args=(tenant, worker_ch, actor_id, worker_id, actor_ch, ag)) t.start() print("Worker subscribing to actor channel...") global keep_running while keep_running: Worker.update_worker_status(actor_id, worker_id, READY) try: msg = actor_ch.get(timeout=2) except channelpy.ChannelTimeoutException: continue except channelpy.ChannelClosedException: print("Channel closed, worker exiting...") keep_running = False sys.exit() print("Received message {}. Starting actor container...".format( str(msg))) # the msg object is a dictionary with an entry called message and an arbitrary # set of k:v pairs coming in from the query parameters. message = msg.pop('message', '') actor = Actor.from_db(actors_store[actor_id]) execution_id = msg['_abaco_execution_id'] Execution.add_worker_id(actor_id, execution_id, worker_id) privileged = False if actor['privileged'] == 'TRUE': privileged = True environment = actor['default_environment'] print("Actor default environment: {}".format(environment)) print("Actor privileged: {}".format(privileged)) # overlay the default_environment registered for the actor with the msg # dictionary environment.update(msg) environment['_abaco_access_token'] = '' environment['_abaco_actor_dbid'] = actor_id environment['_abaco_actor_id'] = actor.id environment['_abaco_actor_state'] = actor.state # if we have an agave client, get a fresh set of tokens: if ag: try: ag.token.refresh() token = ag.token.token_info['access_token'] environment['_abaco_access_token'] = token print("Refreshed the tokens. Passed {} to the environment.". format(token)) except Exception as e: print("Got an exception trying to get an access token: {}". format(e)) else: print("Agave client `ag` is None -- not passing access token.") print("Passing update environment: {}".format(environment)) try: stats, logs, final_state, exit_code = execute_actor( actor_id, worker_id, worker_ch, image, message, environment, privileged) except DockerStartContainerError as e: print("Got DockerStartContainerError: {}".format(str(e))) Actor.set_status(actor_id, ERROR) continue # add the execution to the actor store print("Actor container finished successfully. Got stats object:{}". format(str(stats))) Execution.finalize_execution(actor_id, execution_id, COMPLETE, stats, final_state, exit_code) print("Added execution: {}".format(execution_id)) Execution.set_logs(execution_id, logs) Worker.update_worker_execution_time(actor_id, worker_id)