Esempio n. 1
0
def executor_starter(htex, logdir, endpoint_id, logging_level=logging.DEBUG):

    from funcx.utils.loggers import set_file_logger

    stdout = open(
        os.path.join(logdir, "executor.{}.stdout".format(endpoint_id)), 'w')
    stderr = open(
        os.path.join(logdir, "executor.{}.stderr".format(endpoint_id)), 'w')

    logdir = os.path.abspath(logdir)
    with daemon.DaemonContext(stdout=stdout, stderr=stderr):
        global logger
        print("cwd: ", os.getcwd())
        logger = set_file_logger(os.path.join(
            logdir, "executor.{}.log".format(endpoint_id)),
                                 level=logging_level)
        htex.start()

    stdout.close()
    stderr.close()
Esempio n. 2
0
    def __init__(self,
                 worker_id,
                 address,
                 port,
                 logdir,
                 debug=False,
                 worker_type='RAW'):

        self.worker_id = worker_id
        self.address = address
        self.port = port
        self.logdir = logdir
        self.debug = debug
        self.worker_type = worker_type
        self.serializer = FuncXSerializer()
        self.serialize = self.serializer.serialize
        self.deserialize = self.serializer.deserialize

        global logger
        logger = set_file_logger(
            '{}/funcx_worker_{}.log'.format(logdir, worker_id),
            name="worker_log",
            level=logging.DEBUG if debug else logging.INFO)

        logger.info('Initializing worker {}'.format(worker_id))
        logger.info('Worker is of type: {}'.format(worker_type))

        if debug:
            logger.debug('Debug logging enabled')

        self.context = zmq.Context()
        self.poller = zmq.Poller()
        self.identity = worker_id.encode()

        self.task_socket = self.context.socket(zmq.DEALER)
        self.task_socket.setsockopt(zmq.IDENTITY, self.identity)

        logger.info('Trying to connect to : tcp://{}:{}'.format(
            self.address, self.port))
        self.task_socket.connect('tcp://{}:{}'.format(self.address, self.port))
        self.poller.register(self.task_socket, zmq.POLLIN)
Esempio n. 3
0
    def __init__(self, task_q, result_q, executor, endpoint_id,
                 logdir="forwarder", logging_level=logging.INFO):
        """
        Params:
             task_q : A queue object
                Any queue object that has get primitives. This must be a thread-safe queue.

             result_q : A queue object
                Any queue object that has put primitives. This must be a thread-safe queue.

             executor: Executor object
                Executor to which tasks are to be forwarded

             endpoint_id: str
                Usually a uuid4 as string that identifies the executor

             logdir: str
                Path to logdir

             logging_level : int
                Logging level as defined in the logging module. Default: logging.INFO (20)

        """
        super().__init__()
        self.logdir = logdir
        os.makedirs(self.logdir, exist_ok=True)

        global logger
        logger = set_file_logger(os.path.join(self.logdir, "forwarder.{}.log".format(endpoint_id)),
                                 level=logging_level)

        logger.info("Initializing forwarder for endpoint:{}".format(endpoint_id))
        self.task_q = task_q
        self.result_q = result_q
        self.executor = executor
        self.endpoint_id = endpoint_id
        self.internal_q = Queue()
        self.client_ports = None
Esempio n. 4
0
#!/usr/bin/env python3

import zmq
import time
import pickle
import logging

from funcx.utils.loggers import set_file_logger
from funcx_endpoint.executors.high_throughput.messages import Message

logger = logging.getLogger(__name__)
if not logger.hasHandlers():
    logger = set_file_logger("zmq_pipe.log", name=__name__)


class CommandClient(object):
    """ CommandClient
    """

    def __init__(self, ip_address, port_range):
        """
        Parameters
        ----------

        ip_address: str
           IP address of the client (where Parsl runs)
        port_range: tuple(int, int)
           Port range for the comms between client and interchange

        """
        self.context = zmq.Context()
Esempio n. 5
0
def cli_run():

    parser = argparse.ArgumentParser()
    parser.add_argument("-d", "--debug", action='store_true',
                        help="Count of apps to launch")
    parser.add_argument("-l", "--logdir", default="process_worker_pool_logs",
                        help="Process worker pool log directory")
    parser.add_argument("-u", "--uid", default=str(uuid.uuid4()).split('-')[-1],
                        help="Unique identifier string for Manager")
    parser.add_argument("-b", "--block_id", default=None,
                        help="Block identifier string for Manager")
    parser.add_argument("-c", "--cores_per_worker", default="1.0",
                        help="Number of cores assigned to each worker process. Default=1.0")
    parser.add_argument("-t", "--task_url", required=True,
                        help="REQUIRED: ZMQ url for receiving tasks")
    parser.add_argument("--max_workers", default=float('inf'),
                        help="Caps the maximum workers that can be launched, default:infinity")
    parser.add_argument("--hb_period", default=30,
                        help="Heartbeat period in seconds. Uses manager default unless set")
    parser.add_argument("--hb_threshold", default=120,
                        help="Heartbeat threshold in seconds. Uses manager default unless set")
    parser.add_argument("--poll", default=10,
                        help="Poll period used in milliseconds")
    parser.add_argument("--worker_type", default=None,
                        help="Fixed worker type of manager")
    parser.add_argument("--worker_mode", default="singularity_reuse",
                        help=("Choose the mode of operation from "
                              "(no_container, singularity_reuse, singularity_single_use"))
    parser.add_argument("--scheduler_mode", default="soft",
                        help=("Choose the mode of scheduler "
                              "(hard, soft"))
    parser.add_argument("-r", "--result_url", required=True,
                        help="REQUIRED: ZMQ url for posting results")
    parser.add_argument("--log_max_bytes", default=256 * 1024 * 1024,
                        help="The maximum bytes per logger file in bytes")
    parser.add_argument("--log_backup_count", default=1,
                        help="The number of backup (must be non-zero) per logger file")

    args = parser.parse_args()

    try:
        os.makedirs(os.path.join(args.logdir, args.uid))
    except FileExistsError:
        pass

    try:
        global logger
        logger = set_file_logger('{}/{}/manager.log'.format(args.logdir, args.uid),
                                 name='funcx_endpoint',
                                 level=logging.DEBUG if args.debug is True else logging.INFO,
                                 max_bytes=float(args.log_max_bytes),
                                 backup_count=int(args.log_backup_count))

        logger.info("Python version: {}".format(sys.version))
        logger.info("Debug logging: {}".format(args.debug))
        logger.info("Log dir: {}".format(args.logdir))
        logger.info("Manager ID: {}".format(args.uid))
        logger.info("Block ID: {}".format(args.block_id))
        logger.info("cores_per_worker: {}".format(args.cores_per_worker))
        logger.info("task_url: {}".format(args.task_url))
        logger.info("result_url: {}".format(args.result_url))
        logger.info("hb_period: {}".format(args.hb_period))
        logger.info("hb_threshold: {}".format(args.hb_threshold))
        logger.info("max_workers: {}".format(args.max_workers))
        logger.info("poll_period: {}".format(args.poll))
        logger.info("worker_mode: {}".format(args.worker_mode))
        logger.info("scheduler_mode: {}".format(args.scheduler_mode))
        logger.info("worker_type: {}".format(args.worker_type))
        logger.info("log_max_bytes: {}".format(args.log_max_bytes))
        logger.info("log_backup_count: {}".format(args.log_backup_count))

        manager = Manager(task_q_url=args.task_url,
                          result_q_url=args.result_url,
                          uid=args.uid,
                          block_id=args.block_id,
                          cores_per_worker=float(args.cores_per_worker),
                          max_workers=args.max_workers if args.max_workers == float('inf') else int(args.max_workers),
                          heartbeat_threshold=int(args.hb_threshold),
                          heartbeat_period=int(args.hb_period),
                          logdir=args.logdir,
                          debug=args.debug,
                          worker_mode=args.worker_mode,
                          scheduler_mode=args.scheduler_mode,
                          worker_type=args.worker_type,
                          poll_period=int(args.poll))
        manager.start()

    except Exception as e:
        logger.critical("process_worker_pool exiting from an exception")
        logger.exception("Caught error: {}".format(e))
        raise
    else:
        logger.info("process_worker_pool exiting")
        print("PROCESS_WORKER_POOL exiting")
Esempio n. 6
0
fx_serializer = FuncXSerializer()

from parsl.executors.high_throughput import interchange
from parsl.executors.errors import *
from parsl.executors.base import ParslExecutor
from parsl.dataflow.error import ConfigurationError

from parsl.utils import RepresentationMixin
from parsl.providers import LocalProvider

from funcx_endpoint.executors.high_throughput import zmq_pipes
from funcx.utils.loggers import set_file_logger

logger = logging.getLogger(__name__)
if not logger.hasHandlers():
    logger = set_file_logger("executor.log", name=__name__)

BUFFER_THRESHOLD = 1024 * 1024
ITEM_THRESHOLD = 1024


class HighThroughputExecutor(ParslExecutor, RepresentationMixin):
    """Executor designed for cluster-scale

    The HighThroughputExecutor system has the following components:
      1. The HighThroughputExecutor instance which is run as part of the Parsl script.
      2. The Interchange which is acts as a load-balancing proxy between workers and Parsl
      3. The multiprocessing based worker pool which coordinates task execution over several
         cores on a node.
      4. ZeroMQ pipes connect the HighThroughputExecutor, Interchange and the process_worker_pool