Example #1
0
def executor_starter(htex, logdir, endpoint_id, logging_level=logging.DEBUG):

    from funcx import set_file_logger

    stdout = open(os.path.join(logdir, "executor.{}.stdout".format(endpoint_id)), 'w')
    stderr = open(os.path.join(logdir, "executor.{}.stderr".format(endpoint_id)), 'w')

    logdir = os.path.abspath(logdir)
    with daemon.DaemonContext(stdout=stdout, stderr=stderr):
        global logger
        print("cwd: ", os.getcwd())
        logger = set_file_logger(os.path.join(logdir, "executor.{}.log".format(endpoint_id)),
                                 level=logging_level)
        htex.start()

    stdout.close()
    stderr.close()
Example #2
0
    def __init__(self,
                 task_q,
                 result_q,
                 executor,
                 endpoint_id,
                 logdir="forwarder",
                 logging_level=logging.INFO):
        """
        Params:
             task_q : A queue object
                Any queue object that has get primitives. This must be a thread-safe queue.

             result_q : A queue object
                Any queue object that has put primitives. This must be a thread-safe queue.

             executor: Executor object
                Executor to which tasks are to be forwarded

             endpoint_id: str
                Usually a uuid4 as string that identifies the executor

             logdir: str
                Path to logdir

             logging_level : int
                Logging level as defined in the logging module. Default: logging.INFO (20)

        """
        super().__init__()
        self.logdir = logdir
        os.makedirs(self.logdir, exist_ok=True)

        global logger
        logger = set_file_logger(os.path.join(
            self.logdir, "forwarder.{}.log".format(endpoint_id)),
                                 level=logging_level)

        logger.info(
            "Initializing forwarder for endpoint:{}".format(endpoint_id))
        self.task_q = task_q
        self.result_q = result_q
        self.executor = executor
        self.endpoint_id = endpoint_id
        self.internal_q = Queue()
        self.client_ports = None
Example #3
0
    def __init__(self,
                 worker_id,
                 address,
                 port,
                 logdir,
                 debug=False,
                 worker_type='RAW'):

        self.worker_id = worker_id
        self.address = address
        self.port = port
        self.logdir = logdir
        self.debug = debug
        self.worker_type = worker_type
        self.serializer = FuncXSerializer()
        self.serialize = self.serializer.serialize
        self.deserialize = self.serializer.deserialize

        global logger
        logger = set_file_logger(
            '{}/funcx_worker_{}.log'.format(logdir, worker_id),
            name="worker_log",
            level=logging.DEBUG if debug else logging.INFO)

        logger.info('Initializing worker {}'.format(worker_id))
        logger.info('Worker is of type: {}'.format(worker_type))

        if debug:
            logger.debug('Debug logging enabled')

        self.context = zmq.Context()
        self.poller = zmq.Poller()
        self.identity = worker_id.encode()

        self.task_socket = self.context.socket(zmq.DEALER)
        self.task_socket.setsockopt(zmq.IDENTITY, self.identity)

        logger.info('Trying to connect to : tcp://{}:{}'.format(
            self.address, self.port))
        self.task_socket.connect('tcp://{}:{}'.format(self.address, self.port))
        self.poller.register(self.task_socket, zmq.POLLIN)
Example #4
0
#!/usr/bin/env python3

import zmq
import time
import pickle
import logging

from funcx import set_file_logger
from funcx.executors.high_throughput.messages import Message

logger = logging.getLogger(__name__)
if not logger.hasHandlers():
    logger = set_file_logger("zmq_pipe.log", name=__name__)


class CommandClient(object):
    """ CommandClient
    """
    def __init__(self, ip_address, port_range):
        """
        Parameters
        ----------

        ip_address: str
           IP address of the client (where Parsl runs)
        port_range: tuple(int, int)
           Port range for the comms between client and interchange

        """
        self.context = zmq.Context()
        self.zmq_socket = self.context.socket(zmq.DEALER)
Example #5
0
def cli_run():

    parser = argparse.ArgumentParser()
    parser.add_argument("-d",
                        "--debug",
                        action='store_true',
                        help="Count of apps to launch")
    parser.add_argument("-l",
                        "--logdir",
                        default="process_worker_pool_logs",
                        help="Process worker pool log directory")
    parser.add_argument("-u",
                        "--uid",
                        default=str(uuid.uuid4()).split('-')[-1],
                        help="Unique identifier string for Manager")
    parser.add_argument("-b",
                        "--block_id",
                        default=None,
                        help="Block identifier string for Manager")
    parser.add_argument(
        "-c",
        "--cores_per_worker",
        default="1.0",
        help="Number of cores assigned to each worker process. Default=1.0")
    parser.add_argument("-t",
                        "--task_url",
                        required=True,
                        help="REQUIRED: ZMQ url for receiving tasks")
    parser.add_argument(
        "--max_workers",
        default=float('inf'),
        help="Caps the maximum workers that can be launched, default:infinity")
    parser.add_argument(
        "--hb_period",
        default=30,
        help="Heartbeat period in seconds. Uses manager default unless set")
    parser.add_argument(
        "--hb_threshold",
        default=120,
        help="Heartbeat threshold in seconds. Uses manager default unless set")
    parser.add_argument("--poll",
                        default=10,
                        help="Poll period used in milliseconds")
    parser.add_argument("--container_image",
                        default=None,
                        help="Container image identifier/path")
    parser.add_argument(
        "--mode",
        default="singularity_reuse",
        help=("Choose the mode of operation from "
              "(no_container, singularity_reuse, singularity_single_use"))
    parser.add_argument("-r",
                        "--result_url",
                        required=True,
                        help="REQUIRED: ZMQ url for posting results")

    args = parser.parse_args()

    try:
        os.makedirs(os.path.join(args.logdir, args.uid))
    except FileExistsError:
        pass

    try:
        global logger
        logger = set_file_logger(
            '{}/{}/manager.log'.format(args.logdir, args.uid),
            level=logging.DEBUG if args.debug is True else logging.INFO)

        logger.info("Python version: {}".format(sys.version))
        logger.info("Debug logging: {}".format(args.debug))
        logger.info("Log dir: {}".format(args.logdir))
        logger.info("Manager ID: {}".format(args.uid))
        logger.info("Block ID: {}".format(args.block_id))
        logger.info("cores_per_worker: {}".format(args.cores_per_worker))
        logger.info("task_url: {}".format(args.task_url))
        logger.info("result_url: {}".format(args.result_url))
        logger.info("hb_period: {}".format(args.hb_period))
        logger.info("hb_threshold: {}".format(args.hb_threshold))
        logger.info("max_workers: {}".format(args.max_workers))
        logger.info("poll_period: {}".format(args.poll))
        logger.info("mode: {}".format(args.mode))
        logger.info("container_image: {}".format(args.container_image))

        manager = Manager(task_q_url=args.task_url,
                          result_q_url=args.result_url,
                          uid=args.uid,
                          block_id=args.block_id,
                          cores_per_worker=float(args.cores_per_worker),
                          max_workers=args.max_workers if args.max_workers
                          == float('inf') else int(args.max_workers),
                          heartbeat_threshold=int(args.hb_threshold),
                          heartbeat_period=int(args.hb_period),
                          logdir=args.logdir,
                          debug=args.debug,
                          mode=args.mode,
                          container_image=args.container_image,
                          poll_period=int(args.poll))
        manager.start()

    except Exception as e:
        logger.critical("process_worker_pool exiting from an exception")
        logger.exception("Caught error: {}".format(e))
        raise
    else:
        logger.info("process_worker_pool exiting")
        print("PROCESS_WORKER_POOL exiting")
Example #6
0
fx_serializer = FuncXSerializer()

from parsl.executors.high_throughput import interchange
from parsl.executors.errors import *
from parsl.executors.base import ParslExecutor
from parsl.dataflow.error import ConfigurationError

from parsl.utils import RepresentationMixin
from parsl.providers import LocalProvider

from funcx.executors.high_throughput import zmq_pipes
from funcx import set_file_logger

logger = logging.getLogger(__name__)
if not logger.hasHandlers():
    logger = set_file_logger("executor.log", name=__name__)

BUFFER_THRESHOLD = 1024 * 1024
ITEM_THRESHOLD = 1024


class HighThroughputExecutor(ParslExecutor, RepresentationMixin):
    """Executor designed for cluster-scale

    The HighThroughputExecutor system has the following components:
      1. The HighThroughputExecutor instance which is run as part of the Parsl script.
      2. The Interchange which is acts as a load-balancing proxy between workers and Parsl
      3. The multiprocessing based worker pool which coordinates task execution over several
         cores on a node.
      4. ZeroMQ pipes connect the HighThroughputExecutor, Interchange and the process_worker_pool
import argparse
import os
import time
import sqlite3
from tqdm.auto import tqdm

import funcx
from coffea.processor.funcx.detail import MappedFuncXFuture
funcx.set_file_logger('funcx.log')

client = funcx.sdk.client.FuncXClient(
    funcx_service_address='https://dev.funcx.org/api/v1')

parser = argparse.ArgumentParser()
parser.add_argument("--tasks_per_core",
                    default=10,
                    help="number of cores per task")
parser.add_argument("--sleep", default=60, help="number of cores per task")
parser.add_argument("--tag",
                    default='after yadu updates (ndcrc)',
                    help="any extra info to save to DB")
parser.add_argument("--cores_per_manager", default=16)
# parser.add_argument("--endpoint", default='07ad6996-3505-4b86-b95a-aa33acf842d8')
parser.add_argument("--endpoint",
                    default='8bd5cb36-1eec-4769-b001-6b34fa8f9dc7')  # ndcrc
parser.add_argument("--batch_size", default=5000)

args = parser.parse_args()

db = sqlite3.connect('data.db')
db.execute("""create table if not exists analyses(