def executor_starter(htex, logdir, endpoint_id, logging_level=logging.DEBUG): from funcx import set_file_logger stdout = open(os.path.join(logdir, "executor.{}.stdout".format(endpoint_id)), 'w') stderr = open(os.path.join(logdir, "executor.{}.stderr".format(endpoint_id)), 'w') logdir = os.path.abspath(logdir) with daemon.DaemonContext(stdout=stdout, stderr=stderr): global logger print("cwd: ", os.getcwd()) logger = set_file_logger(os.path.join(logdir, "executor.{}.log".format(endpoint_id)), level=logging_level) htex.start() stdout.close() stderr.close()
def __init__(self, task_q, result_q, executor, endpoint_id, logdir="forwarder", logging_level=logging.INFO): """ Params: task_q : A queue object Any queue object that has get primitives. This must be a thread-safe queue. result_q : A queue object Any queue object that has put primitives. This must be a thread-safe queue. executor: Executor object Executor to which tasks are to be forwarded endpoint_id: str Usually a uuid4 as string that identifies the executor logdir: str Path to logdir logging_level : int Logging level as defined in the logging module. Default: logging.INFO (20) """ super().__init__() self.logdir = logdir os.makedirs(self.logdir, exist_ok=True) global logger logger = set_file_logger(os.path.join( self.logdir, "forwarder.{}.log".format(endpoint_id)), level=logging_level) logger.info( "Initializing forwarder for endpoint:{}".format(endpoint_id)) self.task_q = task_q self.result_q = result_q self.executor = executor self.endpoint_id = endpoint_id self.internal_q = Queue() self.client_ports = None
def __init__(self, worker_id, address, port, logdir, debug=False, worker_type='RAW'): self.worker_id = worker_id self.address = address self.port = port self.logdir = logdir self.debug = debug self.worker_type = worker_type self.serializer = FuncXSerializer() self.serialize = self.serializer.serialize self.deserialize = self.serializer.deserialize global logger logger = set_file_logger( '{}/funcx_worker_{}.log'.format(logdir, worker_id), name="worker_log", level=logging.DEBUG if debug else logging.INFO) logger.info('Initializing worker {}'.format(worker_id)) logger.info('Worker is of type: {}'.format(worker_type)) if debug: logger.debug('Debug logging enabled') self.context = zmq.Context() self.poller = zmq.Poller() self.identity = worker_id.encode() self.task_socket = self.context.socket(zmq.DEALER) self.task_socket.setsockopt(zmq.IDENTITY, self.identity) logger.info('Trying to connect to : tcp://{}:{}'.format( self.address, self.port)) self.task_socket.connect('tcp://{}:{}'.format(self.address, self.port)) self.poller.register(self.task_socket, zmq.POLLIN)
#!/usr/bin/env python3 import zmq import time import pickle import logging from funcx import set_file_logger from funcx.executors.high_throughput.messages import Message logger = logging.getLogger(__name__) if not logger.hasHandlers(): logger = set_file_logger("zmq_pipe.log", name=__name__) class CommandClient(object): """ CommandClient """ def __init__(self, ip_address, port_range): """ Parameters ---------- ip_address: str IP address of the client (where Parsl runs) port_range: tuple(int, int) Port range for the comms between client and interchange """ self.context = zmq.Context() self.zmq_socket = self.context.socket(zmq.DEALER)
def cli_run(): parser = argparse.ArgumentParser() parser.add_argument("-d", "--debug", action='store_true', help="Count of apps to launch") parser.add_argument("-l", "--logdir", default="process_worker_pool_logs", help="Process worker pool log directory") parser.add_argument("-u", "--uid", default=str(uuid.uuid4()).split('-')[-1], help="Unique identifier string for Manager") parser.add_argument("-b", "--block_id", default=None, help="Block identifier string for Manager") parser.add_argument( "-c", "--cores_per_worker", default="1.0", help="Number of cores assigned to each worker process. Default=1.0") parser.add_argument("-t", "--task_url", required=True, help="REQUIRED: ZMQ url for receiving tasks") parser.add_argument( "--max_workers", default=float('inf'), help="Caps the maximum workers that can be launched, default:infinity") parser.add_argument( "--hb_period", default=30, help="Heartbeat period in seconds. Uses manager default unless set") parser.add_argument( "--hb_threshold", default=120, help="Heartbeat threshold in seconds. Uses manager default unless set") parser.add_argument("--poll", default=10, help="Poll period used in milliseconds") parser.add_argument("--container_image", default=None, help="Container image identifier/path") parser.add_argument( "--mode", default="singularity_reuse", help=("Choose the mode of operation from " "(no_container, singularity_reuse, singularity_single_use")) parser.add_argument("-r", "--result_url", required=True, help="REQUIRED: ZMQ url for posting results") args = parser.parse_args() try: os.makedirs(os.path.join(args.logdir, args.uid)) except FileExistsError: pass try: global logger logger = set_file_logger( '{}/{}/manager.log'.format(args.logdir, args.uid), level=logging.DEBUG if args.debug is True else logging.INFO) logger.info("Python version: {}".format(sys.version)) logger.info("Debug logging: {}".format(args.debug)) logger.info("Log dir: {}".format(args.logdir)) logger.info("Manager ID: {}".format(args.uid)) logger.info("Block ID: {}".format(args.block_id)) logger.info("cores_per_worker: {}".format(args.cores_per_worker)) logger.info("task_url: {}".format(args.task_url)) logger.info("result_url: {}".format(args.result_url)) logger.info("hb_period: {}".format(args.hb_period)) logger.info("hb_threshold: {}".format(args.hb_threshold)) logger.info("max_workers: {}".format(args.max_workers)) logger.info("poll_period: {}".format(args.poll)) logger.info("mode: {}".format(args.mode)) logger.info("container_image: {}".format(args.container_image)) manager = Manager(task_q_url=args.task_url, result_q_url=args.result_url, uid=args.uid, block_id=args.block_id, cores_per_worker=float(args.cores_per_worker), max_workers=args.max_workers if args.max_workers == float('inf') else int(args.max_workers), heartbeat_threshold=int(args.hb_threshold), heartbeat_period=int(args.hb_period), logdir=args.logdir, debug=args.debug, mode=args.mode, container_image=args.container_image, poll_period=int(args.poll)) manager.start() except Exception as e: logger.critical("process_worker_pool exiting from an exception") logger.exception("Caught error: {}".format(e)) raise else: logger.info("process_worker_pool exiting") print("PROCESS_WORKER_POOL exiting")
fx_serializer = FuncXSerializer() from parsl.executors.high_throughput import interchange from parsl.executors.errors import * from parsl.executors.base import ParslExecutor from parsl.dataflow.error import ConfigurationError from parsl.utils import RepresentationMixin from parsl.providers import LocalProvider from funcx.executors.high_throughput import zmq_pipes from funcx import set_file_logger logger = logging.getLogger(__name__) if not logger.hasHandlers(): logger = set_file_logger("executor.log", name=__name__) BUFFER_THRESHOLD = 1024 * 1024 ITEM_THRESHOLD = 1024 class HighThroughputExecutor(ParslExecutor, RepresentationMixin): """Executor designed for cluster-scale The HighThroughputExecutor system has the following components: 1. The HighThroughputExecutor instance which is run as part of the Parsl script. 2. The Interchange which is acts as a load-balancing proxy between workers and Parsl 3. The multiprocessing based worker pool which coordinates task execution over several cores on a node. 4. ZeroMQ pipes connect the HighThroughputExecutor, Interchange and the process_worker_pool
import argparse import os import time import sqlite3 from tqdm.auto import tqdm import funcx from coffea.processor.funcx.detail import MappedFuncXFuture funcx.set_file_logger('funcx.log') client = funcx.sdk.client.FuncXClient( funcx_service_address='https://dev.funcx.org/api/v1') parser = argparse.ArgumentParser() parser.add_argument("--tasks_per_core", default=10, help="number of cores per task") parser.add_argument("--sleep", default=60, help="number of cores per task") parser.add_argument("--tag", default='after yadu updates (ndcrc)', help="any extra info to save to DB") parser.add_argument("--cores_per_manager", default=16) # parser.add_argument("--endpoint", default='07ad6996-3505-4b86-b95a-aa33acf842d8') parser.add_argument("--endpoint", default='8bd5cb36-1eec-4769-b001-6b34fa8f9dc7') # ndcrc parser.add_argument("--batch_size", default=5000) args = parser.parse_args() db = sqlite3.connect('data.db') db.execute("""create table if not exists analyses(