Example #1
0
def subscribe(actor_id, worker_ch):
    """
    Main loop for the Actor executor worker. Subscribes to the actor's inbox and executes actor
    containers when message arrive. Also subscribes to the worker channel for future communications.
    :return:
    """
    actor_ch = ActorMsgChannel(actor_id)
    t = threading.Thread(target=process_worker_ch, args=(worker_ch, actor_id, actor_ch))
    t.start()
    print("Worker subscribing to actor channel...")
    while keep_running:
        update_worker_status(actor_id, worker_ch.name, READY)
        try:
            msg = actor_ch.get(timeout=2)
        except channelpy.ChannelTimeoutException:
            continue
        print("Received message {}. Starting actor container...".format(str(msg)))
        message = msg.pop("msg", "")
        try:
            stats, logs = execute_actor(actor_id, worker_ch, image, message, msg)
        except DockerStartContainerError as e:
            print("Got DockerStartContainerError: {}".format(str(e)))
            Actor.set_status(actor_id, ERROR)
            continue
        # add the execution to the actor store
        print("Actor container finished successfully. Got stats object:{}".format(str(stats)))
        exc_id = Execution.add_execution(actor_id, stats)
        Execution.set_logs(exc_id, logs)
Example #2
0
def subscribe(tenant, actor_id, worker_id, api_server, client_id,
              client_secret, access_token, refresh_token, worker_ch):
    """
    Main loop for the Actor executor worker. Subscribes to the actor's inbox and executes actor
    containers when message arrive. Also subscribes to the worker channel for future communications.
    :return:
    """
    logger.debug("Top of subscribe().")
    actor_ch = ActorMsgChannel(actor_id)
    try:
        leave_containers = Config.get('workers', 'leave_containers')
    except configparser.NoOptionError:
        leave_containers = False
    ag = None
    if api_server and client_id and client_secret and access_token and refresh_token:
        logger.info("Creating agave client.")
        verify = get_tenant_verify(tenant)
        ag = Agave(api_server=api_server,
                   token=access_token,
                   refresh_token=refresh_token,
                   api_key=client_id,
                   api_secret=client_secret,
                   verify=verify)
    else:
        logger.info("Not creating agave client.")
    logger.info("Starting the process worker channel thread.")
    t = threading.Thread(target=process_worker_ch,
                         args=(tenant, worker_ch, actor_id, worker_id,
                               actor_ch, ag))
    t.start()
    logger.info("Worker subscribing to actor channel.")
    update_worker_status = True
    global keep_running
    while keep_running:
        if update_worker_status:
            Worker.update_worker_status(actor_id, worker_id, READY)
            update_worker_status = False
        try:
            msg = actor_ch.get(timeout=2)
        except channelpy.ChannelTimeoutException:
            continue
        except channelpy.ChannelClosedException:
            logger.info("Channel closed, worker exiting...")
            keep_running = False
            sys.exit()
        update_worker_status = True
        logger.info(
            "Received message {}. Starting actor container...".format(msg))
        # the msg object is a dictionary with an entry called message and an arbitrary
        # set of k:v pairs coming in from the query parameters.
        message = msg.pop('message', '')
        actor = Actor.from_db(actors_store[actor_id])
        execution_id = msg['_abaco_execution_id']
        content_type = msg['_abaco_Content_Type']
        mounts = actor.mounts
        logger.debug("actor mounts: {}".format(mounts))
        # for binary data, create a fifo in the configured directory. The configured
        # fifo_host_path_dir is equal to the fifo path in the worker container:
        fifo_host_path = None
        if content_type == 'application/octet-stream':
            try:
                fifo_host_path_dir = Config.get('workers',
                                                'fifo_host_path_dir')
            except (configparser.NoSectionError, configparser.NoOptionError):
                logger.error(
                    "No fifo_host_path configured. Cannot manage binary data.")
                Actor.set_status(
                    actor_id,
                    ERROR,
                    msg="Abaco instance not configured for binary data.")
                continue
            fifo_host_path = os.path.join(fifo_host_path_dir, worker_id,
                                          execution_id)
            logger.info("Create fifo at path: {}".format(fifo_host_path))
            try:
                os.mkfifo(fifo_host_path)
            except Exception as e:
                logger.error(
                    "Could not create fifo_path. Exception: {}".format(e))
                raise e
            # add the fifo as a mount:
            mounts.append({
                'host_path': fifo_host_path,
                'container_path': '/_abaco_binary_data',
                'format': 'ro'
            })

        # the execution object was created by the controller, but we need to add the worker id to it now that we
        # know which worker will be working on the execution.
        logger.debug("Adding worker_id to execution.")
        Execution.add_worker_id(actor_id, execution_id, worker_id)

        # privileged dictates whether the actor container runs in privileged mode and if docker daemon is mounted.
        privileged = False
        if type(actor['privileged']) == bool and actor['privileged']:
            privileged = True
        logger.debug("privileged: {}".format(privileged))

        # retrieve the default environment registered with the actor.
        environment = actor['default_environment']
        logger.debug("Actor default environment: {}".format(environment))

        # construct the user field from the actor's uid and gid:
        user = get_container_user(actor)
        logger.debug("Final user valiue: {}".format(user))
        # overlay the default_environment registered for the actor with the msg
        # dictionary
        environment.update(msg)
        environment['_abaco_access_token'] = ''
        environment['_abaco_actor_dbid'] = actor_id
        environment['_abaco_actor_id'] = actor.id
        environment['_abaco_actor_state'] = actor.state
        logger.debug("Overlayed environment: {}".format(environment))

        # if we have an agave client, get a fresh set of tokens:
        if ag:
            try:
                ag.token.refresh()
                token = ag.token.token_info['access_token']
                environment['_abaco_access_token'] = token
                logger.info(
                    "Refreshed the tokens. Passed {} to the environment.".
                    format(token))
            except Exception as e:
                logger.error(
                    "Got an exception trying to get an access token: {}".
                    format(e))
        else:
            logger.info(
                "Agave client `ag` is None -- not passing access token.")
        logger.info("Passing update environment: {}".format(environment))
        try:
            stats, logs, final_state, exit_code, start_time = execute_actor(
                actor_id, worker_id, worker_ch, image, message, user,
                environment, privileged, mounts, leave_containers,
                fifo_host_path)
        except DockerStartContainerError as e:
            logger.error("Got DockerStartContainerError: {}".format(e))
            Actor.set_status(actor_id, ERROR,
                             "Error executing container: {}".format(e))
            continue
        # Add the completed stats to the execution
        logger.info(
            "Actor container finished successfully. Got stats object:{}".
            format(str(stats)))
        Execution.finalize_execution(actor_id, execution_id, COMPLETE, stats,
                                     final_state, exit_code, start_time)
        logger.info("Added execution: {}".format(execution_id))

        # Add the logs to the execution
        Execution.set_logs(execution_id, logs)
        logger.info("Added execution logs.")

        # Update the worker's last updated and last execution fields:
        Worker.update_worker_execution_time(actor_id, worker_id)
        logger.info("worker time stamps updated.")
Example #3
0
File: worker.py Project: TACC/abaco
def subscribe(tenant,
              actor_id,
              api_server,
              client_id,
              client_secret,
              access_token,
              refresh_token,
              worker_ch):
    """
    Main loop for the Actor executor worker. Subscribes to the actor's inbox and executes actor
    containers when message arrive. Also subscribes to the worker channel for future communications.
    :return:
    """
    actor_ch = ActorMsgChannel(actor_id)
    ag = None
    if api_server and client_id and client_secret and access_token and refresh_token:
        ag = Agave(api_server=api_server,
                   token=access_token,
                   refresh_token=refresh_token,
                   api_key=client_id,
                   api_secret=client_secret)
    else:
        print("Not creating agave client.")
    t = threading.Thread(target=process_worker_ch, args=(tenant, worker_ch, actor_id, actor_ch, ag))
    t.start()
    print("Worker subscribing to actor channel...")
    global keep_running
    while keep_running:
        Worker.update_worker_status(actor_id, worker_ch.name, READY)
        try:
            msg = actor_ch.get(timeout=2)
        except channelpy.ChannelTimeoutException:
            continue
        except channelpy.ChannelClosedException:
            print("Channel closed, worker exiting...")
            keep_running = False
            sys.exit()
        print("Received message {}. Starting actor container...".format(str(msg)))
        # the msg object is a dictionary with an entry called message and an arbitrary
        # set of k:v pairs coming in from the query parameters.
        message = msg.pop('message', '')
        actor = Actor.from_db(actors_store[actor_id])
        execution_id = msg['_abaco_execution_id']
        privileged = False
        if actor['privileged'] == 'TRUE':
            privileged = True
        environment = actor['default_environment']
        print("Actor default environment: {}".format(environment))
        print("Actor privileged: {}".format(privileged))
        # overlay the default_environment registered for the actor with the msg
        # dictionary
        environment.update(msg)
        environment['_abaco_access_token'] = ''
        # if we have an agave client, get a fresh set of tokens:
        if ag:
            try:
                ag.token.refresh()
                token = ag.token.token_info['access_token']
                environment['_abaco_access_token'] = token
                print("Refreshed the tokens. Passed {} to the environment.".format(token))
            except Exception as e:
                print("Got an exception trying to get an access token: {}".format(e))
        else:
            print("Agave client `ag` is None -- not passing access token.")
        print("Passing update environment: {}".format(environment))
        try:
            stats, logs = execute_actor(actor_id, worker_ch, image, message,
                                        environment, privileged)
        except DockerStartContainerError as e:
            print("Got DockerStartContainerError: {}".format(str(e)))
            Actor.set_status(actor_id, ERROR)
            continue
        # add the execution to the actor store
        print("Actor container finished successfully. Got stats object:{}".format(str(stats)))
        Execution.finalize_execution(actor_id, execution_id, COMPLETE, stats)
        print("Added execution: {}".format(execution_id))
        Execution.set_logs(execution_id, logs)
        Worker.update_worker_execution_time(actor_id, worker_ch.name)
Example #4
0
def subscribe(tenant, actor_id, worker_id, api_server, client_id,
              client_secret, access_token, refresh_token, worker_ch):
    """
    Main loop for the Actor executor worker. Subscribes to the actor's inbox and executes actor
    containers when message arrive. Also subscribes to the worker channel for future communications.
    :return:
    """
    logger.debug("Top of subscribe().")
    actor_ch = ActorMsgChannel(actor_id)
    ag = None
    if api_server and client_id and client_secret and access_token and refresh_token:
        logger.info("Creating agave client.")
        ag = Agave(api_server=api_server,
                   token=access_token,
                   refresh_token=refresh_token,
                   api_key=client_id,
                   api_secret=client_secret)
    else:
        logger.info("Not creating agave client.")
    logger.info("Starting the process worker channel thread.")
    t = threading.Thread(target=process_worker_ch,
                         args=(tenant, worker_ch, actor_id, worker_id,
                               actor_ch, ag))
    t.start()
    logger.info("Worker subscribing to actor channel.")
    update_worker_status = True
    global keep_running
    while keep_running:
        if update_worker_status:
            Worker.update_worker_status(actor_id, worker_id, READY)
            update_worker_status = False
        try:
            msg = actor_ch.get(timeout=2)
        except channelpy.ChannelTimeoutException:
            continue
        except channelpy.ChannelClosedException:
            logger.info("Channel closed, worker exiting...")
            keep_running = False
            sys.exit()
        update_worker_status = True
        logger.info(
            "Received message {}. Starting actor container...".format(msg))
        # the msg object is a dictionary with an entry called message and an arbitrary
        # set of k:v pairs coming in from the query parameters.
        message = msg.pop('message', '')
        actor = Actor.from_db(actors_store[actor_id])
        execution_id = msg['_abaco_execution_id']

        # the execution object was created by the controller, but we need to add the worker id to it now that we
        # know which worker will be working on the execution.
        logger.debug("Adding worker_id to execution.")
        Execution.add_worker_id(actor_id, execution_id, worker_id)

        # privileged dictates whether the actor container runs in privileged mode and if docker daemon is mounted.
        privileged = False
        if actor['privileged'] == 'TRUE':
            privileged = True
        logger.debug("privileged: {}".format(privileged))

        # retrieve the default environment registered with the actor.
        environment = actor['default_environment']
        logger.debug("Actor default environment: {}".format(environment))

        # overlay the default_environment registered for the actor with the msg
        # dictionary
        environment.update(msg)
        environment['_abaco_access_token'] = ''
        environment['_abaco_actor_dbid'] = actor_id
        environment['_abaco_actor_id'] = actor.id
        environment['_abaco_actor_state'] = actor.state
        logger.debug("Overlayed environment: {}".format(environment))

        # if we have an agave client, get a fresh set of tokens:
        if ag:
            try:
                ag.token.refresh()
                token = ag.token.token_info['access_token']
                environment['_abaco_access_token'] = token
                logger.info(
                    "Refreshed the tokens. Passed {} to the environment.".
                    format(token))
            except Exception as e:
                logger.error(
                    "Got an exception trying to get an access token: {}".
                    format(e))
        else:
            logger.info(
                "Agave client `ag` is None -- not passing access token.")
        logger.info("Passing update environment: {}".format(environment))
        try:
            stats, logs, final_state, exit_code = execute_actor(
                actor_id, worker_id, worker_ch, image, message, environment,
                privileged)
        except DockerStartContainerError as e:
            logger.error("Got DockerStartContainerError: {}".format(str(e)))
            Actor.set_status(actor_id, ERROR)
            continue
        # Add the completed stats to the execution
        logger.info(
            "Actor container finished successfully. Got stats object:{}".
            format(str(stats)))
        Execution.finalize_execution(actor_id, execution_id, COMPLETE, stats,
                                     final_state, exit_code)
        logger.info("Added execution: {}".format(execution_id))

        # Add the logs to the execution
        Execution.set_logs(execution_id, logs)
        logger.info("Added execution logs.")

        # Update the worker's last updated and last execution fields:
        Worker.update_worker_execution_time(actor_id, worker_id)
        logger.info("worker time stamps updated.")
Example #5
0
def subscribe(tenant, actor_id, worker_id, api_server, client_id,
              client_secret, access_token, refresh_token, worker_ch):
    """
    Main loop for the Actor executor worker. Subscribes to the actor's inbox and executes actor
    containers when message arrive. Also subscribes to the worker channel for future communications.
    :return:
    """
    actor_ch = ActorMsgChannel(actor_id)
    ag = None
    if api_server and client_id and client_secret and access_token and refresh_token:
        ag = Agave(api_server=api_server,
                   token=access_token,
                   refresh_token=refresh_token,
                   api_key=client_id,
                   api_secret=client_secret)
    else:
        print("Not creating agave client.")
    t = threading.Thread(target=process_worker_ch,
                         args=(tenant, worker_ch, actor_id, worker_id,
                               actor_ch, ag))
    t.start()
    print("Worker subscribing to actor channel...")
    global keep_running
    while keep_running:
        Worker.update_worker_status(actor_id, worker_id, READY)
        try:
            msg = actor_ch.get(timeout=2)
        except channelpy.ChannelTimeoutException:
            continue
        except channelpy.ChannelClosedException:
            print("Channel closed, worker exiting...")
            keep_running = False
            sys.exit()
        print("Received message {}. Starting actor container...".format(
            str(msg)))
        # the msg object is a dictionary with an entry called message and an arbitrary
        # set of k:v pairs coming in from the query parameters.
        message = msg.pop('message', '')
        actor = Actor.from_db(actors_store[actor_id])
        execution_id = msg['_abaco_execution_id']
        Execution.add_worker_id(actor_id, execution_id, worker_id)
        privileged = False
        if actor['privileged'] == 'TRUE':
            privileged = True
        environment = actor['default_environment']
        print("Actor default environment: {}".format(environment))
        print("Actor privileged: {}".format(privileged))
        # overlay the default_environment registered for the actor with the msg
        # dictionary
        environment.update(msg)
        environment['_abaco_access_token'] = ''
        environment['_abaco_actor_dbid'] = actor_id
        environment['_abaco_actor_id'] = actor.id
        environment['_abaco_actor_state'] = actor.state
        # if we have an agave client, get a fresh set of tokens:
        if ag:
            try:
                ag.token.refresh()
                token = ag.token.token_info['access_token']
                environment['_abaco_access_token'] = token
                print("Refreshed the tokens. Passed {} to the environment.".
                      format(token))
            except Exception as e:
                print("Got an exception trying to get an access token: {}".
                      format(e))
        else:
            print("Agave client `ag` is None -- not passing access token.")
        print("Passing update environment: {}".format(environment))
        try:
            stats, logs, final_state, exit_code = execute_actor(
                actor_id, worker_id, worker_ch, image, message, environment,
                privileged)
        except DockerStartContainerError as e:
            print("Got DockerStartContainerError: {}".format(str(e)))
            Actor.set_status(actor_id, ERROR)
            continue
        # add the execution to the actor store
        print("Actor container finished successfully. Got stats object:{}".
              format(str(stats)))
        Execution.finalize_execution(actor_id, execution_id, COMPLETE, stats,
                                     final_state, exit_code)
        print("Added execution: {}".format(execution_id))
        Execution.set_logs(execution_id, logs)
        Worker.update_worker_execution_time(actor_id, worker_id)