def serialize_fx_inputs(*args, **kwargs): from funcx.serialize import FuncXSerializer fx_serializer = FuncXSerializer() ser_args = fx_serializer.serialize(args) ser_kwargs = fx_serializer.serialize(kwargs) payload = fx_serializer.pack_buffers([ser_args, ser_kwargs]) return payload
def __init__(self, gsearchresult): """ Parameters ---------- gsearchresult : dict """ # wrapper for an array of results results = gsearchresult['results'] super().__init__(results) # track data about where we are in total results self.has_next_page = gsearchresult['has_next_page'] self.offset = gsearchresult['offset'] self.total = gsearchresult['total'] # we can use this to load functions and run them self.serializer = FuncXSerializer() # Reformat for pretty printing and easy viewing self._init_columns() self.table = Texttable(max_width=120) self.table.header(self.columns) for res in self: self.table.add_row([ res[col] for col in self.columns ])
def test(endpoint_id=None, tasks=10, hostname=None, port=None): tasks_rq = RedisQueue(f'task_{endpoint_id}', hostname) results_rq = RedisQueue(f'results', hostname) fxs = FuncXSerializer() ser_code = fxs.serialize(slow_double) fn_code = fxs.pack_buffers([ser_code]) while True: try: x = results_rq.get(timeout=1) except: print("No more results left") break tasks_rq.connect() results_rq.connect() start = time.time() for i in range(tasks): ser_args = fxs.serialize([i]) ser_kwargs = fxs.serialize({'duration':0}) input_data = fxs.pack_buffers([ser_args, ser_kwargs]) payload = fn_code + input_data tasks_rq.put(f"0{i}", payload) for i in range(tasks): res = results_rq.get(timeout=1) print("Result : ", res) delta = time.time() - start print("Time to complete {} tasks: {:8.3f} s".format(tasks, delta)) print("Throughput : {:8.3f} Tasks/s".format(tasks / delta)) return delta
def __init__( self, worker_id, address, port, worker_type="RAW", result_size_limit=DEFAULT_RESULT_SIZE_LIMIT_B, ): self.worker_id = worker_id self.address = address self.port = port self.worker_type = worker_type self.serializer = FuncXSerializer() self.serialize = self.serializer.serialize self.deserialize = self.serializer.deserialize self.result_size_limit = result_size_limit log.info(f"Initializing worker {worker_id}") log.info(f"Worker is of type: {worker_type}") self.context = zmq.Context() self.poller = zmq.Poller() self.identity = worker_id.encode() self.task_socket = self.context.socket(zmq.DEALER) self.task_socket.setsockopt(zmq.IDENTITY, self.identity) log.info(f"Trying to connect to : tcp://{self.address}:{self.port}") self.task_socket.connect(f"tcp://{self.address}:{self.port}") self.poller.register(self.task_socket, zmq.POLLIN) signal.signal(signal.SIGTERM, self.handler)
def serialize_fx_inputs(*args, **kwargs): """Pack and serialize inputs """ fx_serializer = FuncXSerializer() ser_args = fx_serializer.serialize(args) ser_kwargs = fx_serializer.serialize(kwargs) payload = fx_serializer.pack_buffers([ser_args, ser_kwargs]) return payload
def get_funcx_function_checksum(funcx_function): """ Get the SHA256 checksum of a funcx function :returns sha256 hex string of a given funcx function """ fxs = FuncXSerializer() serialized_func = fxs.serialize(funcx_function).encode() return hashlib.sha256(serialized_func).hexdigest()
def __init__(self, task_group_id=None): """ Parameters ========== task_group_id : str UUID indicating the task group that this batch belongs to """ self.tasks = [] self.fx_serializer = FuncXSerializer() self.task_group_id = task_group_id
def __init__(self, http_timeout=None, funcx_home=os.path.join('~', '.funcx'), force_login=False, fx_authorizer=None, funcx_service_address='https://dev.funcx.org/api/v1', **kwargs): """ Initialize the client Parameters ---------- http_timeout: int Timeout for any call to service in seconds. Default is no timeout force_login: bool Whether to force a login to get new credentials. fx_authorizer:class:`GlobusAuthorizer <globus_sdk.authorizers.base.GlobusAuthorizer>`: A custom authorizer instance to communicate with funcX. Default: ``None``, will be created. service_address: str The address of the funcX web service to communicate with. Default: https://dev.funcx.org/api/v1 Keyword arguments are the same as for BaseClient. """ self.ep_registration_path = 'register_endpoint_2' self.funcx_home = os.path.expanduser(funcx_home) native_client = NativeClient(client_id=self.CLIENT_ID) fx_scope = "https://auth.globus.org/scopes/facd7ccc-c5f4-42aa-916b-a0e270e2c2a9/all" if not fx_authorizer: native_client.login( requested_scopes=[fx_scope], no_local_server=kwargs.get("no_local_server", True), no_browser=kwargs.get("no_browser", True), refresh_tokens=kwargs.get("refresh_tokens", True), force=force_login) all_authorizers = native_client.get_authorizers_by_scope( requested_scopes=[fx_scope]) fx_authorizer = all_authorizers[fx_scope] super(FuncXClient, self).__init__("funcX", environment='funcx', authorizer=fx_authorizer, http_timeout=http_timeout, base_url=funcx_service_address, **kwargs) self.fx_serializer = FuncXSerializer()
def __init__(self, dlh_authorizer=None, search_client=None, http_timeout=None, force_login=False, fx_authorizer=None, **kwargs): """Initialize the client Args: dlh_authorizer (:class:`GlobusAuthorizer <globus_sdk.authorizers.base.GlobusAuthorizer>`): An authorizer instance used to communicate with DLHub. If ``None``, will be created. search_client (:class:`SearchClient <globus_sdk.SearchClient>`): An authenticated SearchClient to communicate with Globus Search. If ``None``, will be created. http_timeout (int): Timeout for any call to service in seconds. (default is no timeout) force_login (bool): Whether to force a login to get new credentials. A login will always occur if ``dlh_authorizer`` or ``search_client`` are not provided. no_local_server (bool): Disable spinning up a local server to automatically copy-paste the auth code. THIS IS REQUIRED if you are on a remote server. When used locally with no_local_server=False, the domain is localhost with a randomly chosen open port number. **Default**: ``True``. fx_authorizer (:class:`GlobusAuthorizer <globus_sdk.authorizers.base.GlobusAuthorizer>`): An authorizer instance used to communicate with funcX. If ``None``, will be created. no_browser (bool): Do not automatically open the browser for the Globus Auth URL. Display the URL instead and let the user navigate to that location manually. **Default**: ``True``. Keyword arguments are the same as for BaseClient. """ if force_login or not dlh_authorizer or not search_client or not fx_authorizer: fx_scope = "https://auth.globus.org/scopes/facd7ccc-c5f4-42aa-916b-a0e270e2c2a9/all" auth_res = login(services=["search", "dlhub", fx_scope], app_name="DLHub_Client", client_id=CLIENT_ID, clear_old_tokens=force_login, token_dir=_token_dir, no_local_server=kwargs.get("no_local_server", True), no_browser=kwargs.get("no_browser", True)) dlh_authorizer = auth_res["dlhub"] fx_authorizer = auth_res[fx_scope] self._search_client = auth_res["search"] self._fx_client = FuncXClient(force_login=True,fx_authorizer=fx_authorizer, funcx_service_address='https://funcx.org/api/v1') # funcX endpoint to use self.fx_endpoint = '86a47061-f3d9-44f0-90dc-56ddc642c000' # self.fx_endpoint = '2c92a06a-015d-4bfa-924c-b3d0c36bdad7' self.fx_serializer = FuncXSerializer() self.fx_cache = {} super(DLHubClient, self).__init__("DLHub", environment='dlhub', authorizer=dlh_authorizer, http_timeout=http_timeout, base_url=DLHUB_SERVICE_ADDRESS, **kwargs)
def dont_run_yet(endpoint_id=None, tasks=10, duration=1, hostname=None): # tasks_rq = EndpointQueue(f'task_{endpoint_id}', hostname) tasks_channel = RedisPubSub(hostname) tasks_channel.connect() redis_client = tasks_channel.redis_client redis_client.ping() fxs = FuncXSerializer() ser_code = fxs.serialize(slow_double) fn_code = fxs.pack_buffers([ser_code]) start = time.time() task_ids = {} for i in range(tasks): time.sleep(duration) task_id = str(uuid.uuid4()) print("Task_id : ", task_id) ser_args = fxs.serialize([i]) ser_kwargs = fxs.serialize({"duration": duration}) input_data = fxs.pack_buffers([ser_args, ser_kwargs]) payload = fn_code + input_data container_id = "RAW" task = Task(redis_client, task_id, container_id, serializer="", payload=payload) task.endpoint = endpoint_id task.status = TaskState.WAITING_FOR_EP # tasks_rq.enqueue(task) tasks_channel.put(endpoint_id, task) task_ids[i] = task_id d1 = time.time() - start print(f"Time to launch {tasks} tasks: {d1:8.3f} s") delay = 5 print(f"Sleeping {delay} seconds") time.sleep(delay) print(f"Launched {tasks} tasks") for i in range(tasks): task_id = task_ids[i] print("Task_id : ", task_id) task = Task.from_id(redis_client, task_id) # TODO: wait for task result... time.sleep(duration) try: result = fxs.deserialize(task.result) print(f"Result : {result}") except Exception as e: print(f"Task failed with exception:{e}") pass delta = time.time() - start print(f"Time to complete {tasks} tasks: {delta:8.3f} s") print(f"Throughput : {tasks / delta:8.3f} Tasks/s") return delta
class FuncXFuture(Future): client = FuncXClient() serializer = FuncXSerializer() def __init__(self, task_id, poll_period=1): super().__init__() self.task_id = task_id self.poll_period = poll_period self.__result = None self.submitted = time.time() def done(self): if self.__result is not None: return True try: data = FuncXFuture.client.get_task_status(self.task_id) except Exception: return False if 'status' in data and data['status'] == 'PENDING': time.sleep( self.poll_period) # needed to not overwhelm the FuncX server return False elif 'result' in data: self.__result = FuncXFuture.serializer.deserialize(data['result']) self.returned = time.time() # FIXME AW benchmarking self.connected_managers = os.environ.get('connected_managers', -1) return True elif 'exception' in data: e = FuncXFuture.serializer.deserialize(data['exception']) e.reraise() else: raise NotImplementedError( 'task {} is neither pending or finished: {}'.format( self.task_id, str(data))) def result(self, timeout=None): if self.__result is not None: return self.__result while True: if self.done(): break else: time.sleep(self.poll_period) if timeout is not None: timeout -= self.poll_period if timeout < 0: raise TimeoutError return self.__result
def deserialize(): """Return the deserialized result """ fx_serializer = FuncXSerializer() # Return a failure message if all else fails ret_package = {'error': 'Failed to deserialize result'} try: inputs = request.json res = fx_serializer.deserialize(inputs) ret_package = jsonify(res) except Exception as e: print(e) return jsonify(ret_package), 500 return ret_package, 200
def dont_run_yet(endpoint_id=None, tasks=10, duration=1, hostname=None): tasks_rq = EndpointQueue(f"task_{endpoint_id}", hostname) fxs = FuncXSerializer() ser_code = fxs.serialize(slow_double) fn_code = fxs.pack_buffers([ser_code]) tasks_rq.connect() start = time.time() task_ids = {} for i in range(tasks): task_id = str(uuid.uuid4()) ser_args = fxs.serialize([i]) ser_kwargs = fxs.serialize({"duration": duration}) input_data = fxs.pack_buffers([ser_args, ser_kwargs]) payload = fn_code + input_data container_id = "RAW" task = Task(tasks_rq.redis_client, task_id, container_id, serializer="", payload=payload) tasks_rq.enqueue(task) task_ids[i] = task_id d1 = time.time() - start print(f"Time to launch {tasks} tasks: {d1:8.3f} s") print(f"Launched {tasks} tasks") for i in range(tasks): task_id = task_ids[i] task = Task.from_id(tasks_rq.redis_client, task_id) # TODO: wait for task result... time.sleep(2) print(f"Result: {task.result}") # res = results_rq.get('result', timeout=300) # print("Result : ", res) delta = time.time() - start print(f"Time to complete {tasks} tasks: {delta:8.3f} s") print(f"Throughput : {tasks / delta:8.3f} Tasks/s") return delta
def __init__(self, http_timeout=None, funcx_home=os.path.join('~', '.funcx'), force_login=False, fx_authorizer=None, **kwargs): """ Initialize the client Parameters ---------- http_timeout: int Timeout for any call to service in seconds. Default is no timeout force_login: bool Whether to force a login to get new credentials. fx_authorizer:class:`GlobusAuthorizer <globus_sdk.authorizers.base.GlobusAuthorizer>`: A custom authorizer instance to communicate with funcX. Default: ``None``, will be created. Keyword arguments are the same as for BaseClient. """ self.ep_registration_path = 'register_endpoint_2' self.funcx_home = os.path.expanduser(funcx_home) if force_login or not fx_authorizer: fx_scope = "https://auth.globus.org/scopes/facd7ccc-c5f4-42aa-916b-a0e270e2c2a9/all" auth_res = login(services=[fx_scope], app_name="funcX_Client", client_id=self.CLIENT_ID, clear_old_tokens=force_login, token_dir=self.TOKEN_DIR) dlh_authorizer = auth_res['funcx_service'] super(FuncXClient, self).__init__("funcX", environment='funcx', authorizer=dlh_authorizer, http_timeout=http_timeout, base_url=self.FUNCX_SERVICE_ADDRESS, **kwargs) self.fx_serializer = FuncXSerializer()
def __init__(self, worker_id, address, port, logdir, debug=False, worker_type='RAW'): self.worker_id = worker_id self.address = address self.port = port self.logdir = logdir self.debug = debug self.worker_type = worker_type self.serializer = FuncXSerializer() self.serialize = self.serializer.serialize self.deserialize = self.serializer.deserialize global logger logger = set_file_logger( '{}/funcx_worker_{}.log'.format(logdir, worker_id), name="worker_log", level=logging.DEBUG if debug else logging.INFO) logger.info('Initializing worker {}'.format(worker_id)) logger.info('Worker is of type: {}'.format(worker_type)) if debug: logger.debug('Debug logging enabled') self.context = zmq.Context() self.poller = zmq.Poller() self.identity = worker_id.encode() self.task_socket = self.context.socket(zmq.DEALER) self.task_socket.setsockopt(zmq.IDENTITY, self.identity) logger.info('Trying to connect to : tcp://{}:{}'.format( self.address, self.port)) self.task_socket.connect('tcp://{}:{}'.format(self.address, self.port)) self.poller.register(self.task_socket, zmq.POLLIN)
def server(port=0, host="", debug=False, datasize=102400): try: from funcx.serialize import FuncXSerializer fxs = FuncXSerializer(use_offprocess_checker=False) with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: s.bind((host, port)) bound_port = s.getsockname()[1] print(f"BINDING TO:{bound_port}", flush=True) s.listen(1) conn, addr = s.accept() # we only expect one incoming connection here. with conn: while True: b_msg = conn.recv(datasize) if not b_msg: print("Exiting") return msg = pickle.loads(b_msg) if msg == "PING": ret_value = ("PONG", None) else: try: method = fxs.deserialize(msg) # noqa del method except Exception as e: ret_value = ("DESERIALIZE_FAIL", str(e)) else: ret_value = ("SUCCESS", None) ret_buf = pickle.dumps(ret_value) conn.sendall(ret_buf) except Exception as e: print(f"OFF_PROCESS_CHECKER FAILURE, Exception:{e}") sys.exit()
def test(endpoint_id=None, tasks=10, duration=1, hostname=None, port=None): tasks_rq = RedisQueue(f'task_{endpoint_id}', hostname) results_rq = RedisQueue('results', hostname) fxs = FuncXSerializer() ser_code = fxs.serialize(slow_double) fn_code = fxs.pack_buffers([ser_code]) tasks_rq.connect() results_rq.connect() while True: try: _ = results_rq.get(timeout=1) except Exception: print("No more results left") break start = time.time() for i in range(tasks): ser_args = fxs.serialize([i]) ser_kwargs = fxs.serialize({'duration': duration}) input_data = fxs.pack_buffers([ser_args, ser_kwargs]) payload = fn_code + input_data container_id = "odd" if i % 2 else "even" tasks_rq.put(f"0{i};{container_id}", payload) d1 = time.time() - start print("Time to launch {} tasks: {:8.3f} s".format(tasks, d1)) print(f"Launched {tasks} tasks") for i in range(tasks): _ = results_rq.get(timeout=300) # print("Result : ", res) delta = time.time() - start print("Time to complete {} tasks: {:8.3f} s".format(tasks, delta)) print("Throughput : {:8.3f} Tasks/s".format(tasks / delta)) return delta
def __init__(self, http_timeout=None, funcx_home=os.path.join('~', '.funcx'), force_login=False, fx_authorizer=None, funcx_service_address='https://api.funcx.org/v1', **kwargs): """ Initialize the client Parameters ---------- http_timeout: int Timeout for any call to service in seconds. Default is no timeout force_login: bool Whether to force a login to get new credentials. fx_authorizer:class:`GlobusAuthorizer <globus_sdk.authorizers.base.GlobusAuthorizer>`: A custom authorizer instance to communicate with funcX. Default: ``None``, will be created. funcx_service_address: str The address of the funcX web service to communicate with. Default: https://api.funcx.org/v1 Keyword arguments are the same as for BaseClient. """ self.func_table = {} self.ep_registration_path = 'register_endpoint_2' self.funcx_home = os.path.expanduser(funcx_home) if not os.path.exists(self.TOKEN_DIR): os.makedirs(self.TOKEN_DIR) tokens_filename = os.path.join(self.TOKEN_DIR, self.TOKEN_FILENAME) self.native_client = NativeClient( client_id=self.CLIENT_ID, app_name="FuncX SDK", token_storage=JSONTokenStorage(tokens_filename)) # TODO: if fx_authorizer is given, we still need to get an authorizer for Search fx_scope = "https://auth.globus.org/scopes/facd7ccc-c5f4-42aa-916b-a0e270e2c2a9/all" search_scope = "urn:globus:auth:scope:search.api.globus.org:all" scopes = [fx_scope, search_scope, "openid"] search_authorizer = None if not fx_authorizer: self.native_client.login( requested_scopes=scopes, no_local_server=kwargs.get("no_local_server", True), no_browser=kwargs.get("no_browser", True), refresh_tokens=kwargs.get("refresh_tokens", True), force=force_login) all_authorizers = self.native_client.get_authorizers_by_scope( requested_scopes=scopes) fx_authorizer = all_authorizers[fx_scope] search_authorizer = all_authorizers[search_scope] openid_authorizer = all_authorizers["openid"] super(FuncXClient, self).__init__("funcX", environment='funcx', authorizer=fx_authorizer, http_timeout=http_timeout, base_url=funcx_service_address, **kwargs) self.fx_serializer = FuncXSerializer() authclient = AuthClient(authorizer=openid_authorizer) user_info = authclient.oauth2_userinfo() self.searcher = SearchHelper(authorizer=search_authorizer, owner_uuid=user_info['sub']) self.funcx_service_address = funcx_service_address
def __init__(self, task_q, result_q, executor, endpoint_id, heartbeat_threshold=60, endpoint_addr=None, redis_address=None, logdir="forwarder_logs", logging_level=logging.INFO, max_heartbeats_missed=2): """ Parameters ---------- task_q : A queue object Any queue object that has get primitives. This must be a thread-safe queue. result_q : A queue object Any queue object that has put primitives. This must be a thread-safe queue. executor: Executor object Executor to which tasks are to be forwarded endpoint_id: str Usually a uuid4 as string that identifies the executor endpoint_addr: str Endpoint ip address as a string heartbeat_threshold : int Heartbeat threshold in seconds logdir: str Path to logdir logging_level : int Logging level as defined in the logging module. Default: logging.INFO (20) max_heartbeats_missed : int The maximum heartbeats missed before the forwarder terminates """ super().__init__() self.logdir = logdir os.makedirs(self.logdir, exist_ok=True) global logger logger = logging.getLogger(endpoint_id) if len(logger.handlers) == 0: logger = set_file_logger(os.path.join( self.logdir, "forwarder.{}.log".format(endpoint_id)), name=endpoint_id, level=logging_level) logger.info( "Initializing forwarder for endpoint:{}".format(endpoint_id)) logger.info("Log level set to {}".format(loglevels[logging_level])) self.endpoint_addr = endpoint_addr self.task_q = task_q self.result_q = result_q self.heartbeat_threshold = heartbeat_threshold self.executor = executor self.endpoint_id = endpoint_id self.endpoint_addr = endpoint_addr self.redis_address = redis_address self.internal_q = Queue() self.client_ports = None self.fx_serializer = FuncXSerializer() self.kill_event = threading.Event() self.max_heartbeats_missed = max_heartbeats_missed
def __init__(self, config, client_address="127.0.0.1", interchange_address="127.0.0.1", client_ports=(50055, 50056, 50057), worker_ports=None, worker_port_range=(54000, 55000), cores_per_worker=1.0, worker_debug=False, launch_cmd=None, heartbeat_threshold=60, logdir=".", logging_level=logging.INFO, poll_period=10, endpoint_id=None, suppress_failure=False, max_heartbeats_missed=2 ): """ Parameters ---------- config : funcx.Config object Funcx config object that describes how compute should be provisioned client_address : str The ip address at which the parsl client can be reached. Default: "127.0.0.1" interchange_address : str The ip address at which the workers will be able to reach the Interchange. Default: "127.0.0.1" client_ports : triple(int, int, int) The ports at which the client can be reached launch_cmd : str TODO : update worker_ports : tuple(int, int) The specific two ports at which workers will connect to the Interchange. Default: None worker_port_range : tuple(int, int) The interchange picks ports at random from the range which will be used by workers. This is overridden when the worker_ports option is set. Defauls: (54000, 55000) cores_per_worker : float cores to be assigned to each worker. Oversubscription is possible by setting cores_per_worker < 1.0. Default=1 worker_debug : Bool Enables worker debug logging. heartbeat_threshold : int Number of seconds since the last heartbeat after which worker is considered lost. logdir : str Parsl log directory paths. Logs and temp files go here. Default: '.' logging_level : int Logging level as defined in the logging module. Default: logging.INFO (20) endpoint_id : str Identity string that identifies the endpoint to the broker poll_period : int The main thread polling period, in milliseconds. Default: 10ms suppress_failure : Bool When set to True, the interchange will attempt to suppress failures. Default: False max_heartbeats_missed : int Number of heartbeats missed before setting kill_event """ self.logdir = logdir try: os.makedirs(self.logdir) except FileExistsError: pass start_file_logger("{}/interchange.log".format(self.logdir), level=logging_level) logger.info("logger location {}".format(logger.handlers)) logger.info("Initializing Interchange process with Endpoint ID: {}".format(endpoint_id)) self.config = config logger.info("Got config : {}".format(config)) self.strategy = self.config.strategy self.client_address = client_address self.interchange_address = interchange_address self.suppress_failure = suppress_failure self.poll_period = poll_period self.serializer = FuncXSerializer() logger.info("Attempting connection to client at {} on ports: {},{},{}".format( client_address, client_ports[0], client_ports[1], client_ports[2])) self.context = zmq.Context() self.task_incoming = self.context.socket(zmq.DEALER) self.task_incoming.set_hwm(0) self.task_incoming.RCVTIMEO = 10 # in milliseconds logger.info("Task incoming on tcp://{}:{}".format(client_address, client_ports[0])) self.task_incoming.connect("tcp://{}:{}".format(client_address, client_ports[0])) self.results_outgoing = self.context.socket(zmq.DEALER) self.results_outgoing.set_hwm(0) logger.info("Results outgoing on tcp://{}:{}".format(client_address, client_ports[1])) self.results_outgoing.connect("tcp://{}:{}".format(client_address, client_ports[1])) self.command_channel = self.context.socket(zmq.DEALER) self.command_channel.RCVTIMEO = 1000 # in milliseconds # self.command_channel.set_hwm(0) logger.info("Command channel on tcp://{}:{}".format(client_address, client_ports[2])) self.command_channel.connect("tcp://{}:{}".format(client_address, client_ports[2])) logger.info("Connected to client") self.pending_task_queue = {} self.containers = {} self.total_pending_task_count = 0 self.fxs = FuncXClient() logger.info("Interchange address is {}".format(self.interchange_address)) self.worker_ports = worker_ports self.worker_port_range = worker_port_range self.task_outgoing = self.context.socket(zmq.ROUTER) self.task_outgoing.set_hwm(0) self.results_incoming = self.context.socket(zmq.ROUTER) self.results_incoming.set_hwm(0) # initalize the last heartbeat time to start the loop self.last_heartbeat = time.time() self.max_heartbeats_missed = max_heartbeats_missed self.endpoint_id = endpoint_id if self.worker_ports: self.worker_task_port = self.worker_ports[0] self.worker_result_port = self.worker_ports[1] self.task_outgoing.bind("tcp://*:{}".format(self.worker_task_port)) self.results_incoming.bind("tcp://*:{}".format(self.worker_result_port)) else: self.worker_task_port = self.task_outgoing.bind_to_random_port('tcp://*', min_port=worker_port_range[0], max_port=worker_port_range[1], max_tries=100) self.worker_result_port = self.results_incoming.bind_to_random_port('tcp://*', min_port=worker_port_range[0], max_port=worker_port_range[1], max_tries=100) logger.info("Bound to ports {},{} for incoming worker connections".format( self.worker_task_port, self.worker_result_port)) self._ready_manager_queue = {} self.heartbeat_threshold = heartbeat_threshold self.blocks = {} # type: Dict[str, str] self.block_id_map = {} self.launch_cmd = launch_cmd self.last_core_hr_counter = 0 if not launch_cmd: self.launch_cmd = ("funcx-manager {debug} {max_workers} " "-c {cores_per_worker} " "--poll {poll_period} " "--task_url={task_url} " "--result_url={result_url} " "--logdir={logdir} " "--block_id={{block_id}} " "--hb_period={heartbeat_period} " "--hb_threshold={heartbeat_threshold} " "--worker_mode={worker_mode} " "--scheduler_mode={scheduler_mode} " "--worker_type={{worker_type}} ") self.current_platform = {'parsl_v': PARSL_VERSION, 'python_v': "{}.{}.{}".format(sys.version_info.major, sys.version_info.minor, sys.version_info.micro), 'os': platform.system(), 'hname': platform.node(), 'dir': os.getcwd()} logger.info("Platform info: {}".format(self.current_platform)) self._block_counter = 0 try: self.load_config() except Exception as e: logger.exception("Caught exception") raise
def _get_packed_code( func: t.Callable, serializer: t.Optional[FuncXSerializer] = None ) -> str: serializer = serializer if serializer else FuncXSerializer() return serializer.pack_buffers([serializer.serialize(func)])
def __init__(self, funcx_eid, mdata_store_path, source_eid=None, dest_eid=None, gdrive_token=None, extractor_finder='gdrive', prefetch_remote=False, data_prefetch_path=None, dataset_mdata=None): prefetch_remote = False # TODO -- fix this. # self.crawl_type = 'from_file' self.write_cpe = False self.dataset_mdata = dataset_mdata self.t_crawl_start = time.time() self.t_send_batch = 0 self.t_transfer = 0 self.prefetch_remote = prefetch_remote self.data_prefetch_path = data_prefetch_path self.extractor_finder = extractor_finder self.funcx_eid = funcx_eid self.func_dict = { "image": xtract_images.ImageExtractor(), "images": xtract_images.ImageExtractor(), "tabular": xtract_tabular.TabularExtractor(), "text": xtract_keyword.KeywordExtractor(), "matio": xtract_matio.MatioExtractor() } self.fx_ser = FuncXSerializer() self.send_status = "STARTING" self.poll_status = "STARTING" self.commit_completed = False self.source_endpoint = source_eid self.dest_endpoint = dest_eid self.gdrive_token = gdrive_token self.num_families_fetched = 0 self.get_families_start_time = time.time() self.last_checked = time.time() self.pre_launch_counter = 0 self.success_returns = 0 self.failed_returns = 0 self.to_send_queue = Queue() self.poll_gap_s = 5 self.get_families_status = "STARTING" self.task_dict = { "active": Queue(), "pending": Queue(), "failed": Queue() } # Batch size we use to send tasks to funcx. (and the subbatch size) self.map_size = 8 self.fx_batch_size = 16 self.fx_task_sublist_size = 500 # Want to store attributes about funcX requests/responses. self.tot_fx_send_payload_size = 0 self.tot_fx_poll_payload_size = 0 self.tot_fx_poll_result_size = 0 self.num_send_reqs = 0 self.num_poll_reqs = 0 self.t_first_funcx_invoke = None self.max_result_size = 0 # Number (current and max) of number of tasks sent to funcX for extraction. self.max_extracting_tasks = 5 self.num_extracting_tasks = 0 self.max_pre_prefetch = 15000 # TODO: Integrate this to actually fix timing bug. self.status_things = Queue() # If this is turned on, should mean that we hit our local task maximum and don't want to pull down new work... self.pause_q_consume = False self.file_count = 0 self.current_batch = [] self.extract_end = None self.mdata_store_path = mdata_store_path self.n_fams_transferred = 0 self.prefetcher_tid = None self.prefetch_status = None self.fx_headers = { "Authorization": f"Bearer {self.headers['FuncX']}", 'FuncX': self.headers['FuncX'] } self.family_headers = None if 'Petrel' in self.headers: self.fx_headers['Petrel'] = self.headers['Petrel'] self.family_headers = { 'Authorization': f"Bearer {self.headers['Petrel']}", 'Transfer': self.headers['Transfer'], 'FuncX': self.headers['FuncX'], 'Petrel': self.headers['Petrel'] } self.logger = logging.getLogger(__name__) handler = logging.StreamHandler() formatter = logging.Formatter( '%(asctime)s %(name)-12s %(levelname)-8s %(message)s') handler.setFormatter(formatter) self.logger.addHandler(handler) self.logger.setLevel( logging.INFO) # TODO: let's make this configurable. self.families_to_process = Queue() self.to_validate_q = Queue() self.sqs_push_threads = {} self.thr_ls = [] self.commit_threads = 1 self.get_family_threads = 20 if self.prefetch_remote: self.logger.info("Launching prefetcher...") self.logger.info("Prefetcher successfully launched!") prefetch_thread = threading.Thread( target=self.prefetcher.main_poller_loop, args=()) prefetch_thread.start() for i in range(0, self.commit_threads): thr = threading.Thread(target=self.validate_enqueue_loop, args=(i, )) self.thr_ls.append(thr) thr.start() self.sqs_push_threads[i] = True self.logger.info( f"Successfully started {len(self.sqs_push_threads)} SQS push threads!" ) if self.crawl_type != 'from_file': for i in range(0, self.get_family_threads): self.logger.info( f"Attempting to start get_next_families() as its own thread [{i}]... " ) consumer_thr = threading.Thread( target=self.get_next_families_loop, args=()) consumer_thr.start() print( f"Successfully started the get_next_families() thread number {i} " ) else: print("ATTEMPTING TO LAUNCH **FILE** CRAWL THREAD. ") file_crawl_thr = threading.Thread( target=self.read_next_families_from_file_loop, args=()) file_crawl_thr.start() print("Successfully started the **FILE** CRAWL thread!") for i in range(0, 15): fx_push_thr = threading.Thread(target=self.send_subbatch_thread, args=()) fx_push_thr.start() print("Successfully spun up {i} send threads!") with open("cpe_times.csv", 'w') as f: f.close()
def __init__(self, endpoints, strategy='round-robin', runtime_predictor='rolling-average', last_n=3, train_every=1, log_level='INFO', import_model_file=None, transfer_model_file=None, sync_level='exists', max_backups=0, backup_delay_threshold=2.0, *args, **kwargs): self._fxc = FuncXClient(*args, **kwargs) # Initialize a transfer client self._transfer_manger = TransferManager(endpoints=endpoints, sync_level=sync_level, log_level=log_level) # Info about FuncX endpoints we can execute on self._endpoints = endpoints self._dead_endpoints = set() self.last_result_time = defaultdict(float) self.temperature = defaultdict(lambda: 'WARM') self._imports = defaultdict(list) self._imports_required = defaultdict(list) # Track which endpoints a function can't run on self._blocked = defaultdict(set) # Track pending tasks # We will provide the client our own task ids, since we may submit the # same task multiple times to the FuncX service, and sometimes we may # wait to submit a task to FuncX (e.g., wait for a data transfer). self._task_id_translation = {} self._pending = {} self._pending_by_endpoint = defaultdict(set) self._task_info = {} # List of endpoints a (virtual) task was scheduled to self._endpoints_sent_to = defaultdict(list) self.max_backups = max_backups self.backup_delay_threshold = backup_delay_threshold self._latest_status = {} self._last_task_ETA = defaultdict(float) # Maximum ETA, if any, of a task which we allow to be scheduled on an # endpoint. This is to prevent backfill tasks to be longer than the # estimated time for when a pending data transfer will finish. self._transfer_ETAs = defaultdict(dict) # Estimated error in the pending-task time of an endpoint. # Updated every time a task result is received from an endpoint. self._queue_error = defaultdict(float) # Set logging levels logger.setLevel(log_level) self.execution_log = [] # Intialize serializer self.fx_serializer = FuncXSerializer() self.fx_serializer.use_custom('03\n', 'code') # Initialize runtime predictor self.runtime = init_runtime_predictor(runtime_predictor, endpoints=endpoints, last_n=last_n, train_every=train_every) logger.info(f"Runtime predictor using strategy {self.runtime}") # Initialize transfer-time predictor self.transfer_time = TransferPredictor(endpoints=endpoints, train_every=train_every, state_file=transfer_model_file) # Initialize import-time predictor self.import_predictor = ImportPredictor(endpoints=endpoints, state_file=import_model_file) # Initialize scheduling strategy self.strategy = init_strategy(strategy, endpoints=endpoints, runtime_predictor=self.runtime, queue_predictor=self.queue_delay, cold_start_predictor=self.cold_start, transfer_predictor=self.transfer_time) logger.info(f"Scheduler using strategy {self.strategy}") # Start thread to check on endpoints regularly self._endpoint_watchdog = Thread(target=self._check_endpoints) self._endpoint_watchdog.start() # Start thread to monitor tasks and send tasks to FuncX service self._scheduled_tasks = Queue() self._task_watchdog_sleep = 0.15 self._task_watchdog = Thread(target=self._monitor_tasks) self._task_watchdog.start()
def __init__( self, http_timeout=None, funcx_home=_FUNCX_HOME, force_login=False, fx_authorizer=None, search_authorizer=None, openid_authorizer=None, funcx_service_address=None, check_endpoint_version=False, asynchronous=False, loop=None, results_ws_uri=None, use_offprocess_checker=True, environment=None, **kwargs, ): """ Initialize the client Parameters ---------- http_timeout: int Timeout for any call to service in seconds. Default is no timeout force_login: bool Whether to force a login to get new credentials. fx_authorizer:class:`GlobusAuthorizer \ <globus_sdk.authorizers.base.GlobusAuthorizer>`: A custom authorizer instance to communicate with funcX. Default: ``None``, will be created. search_authorizer:class:`GlobusAuthorizer \ <globus_sdk.authorizers.base.GlobusAuthorizer>`: A custom authorizer instance to communicate with Globus Search. Default: ``None``, will be created. openid_authorizer:class:`GlobusAuthorizer \ <globus_sdk.authorizers.base.GlobusAuthorizer>`: A custom authorizer instance to communicate with OpenID. Default: ``None``, will be created. funcx_service_address: str For internal use only. The address of the web service. results_ws_uri: str For internal use only. The address of the websocket service. environment: str For internal use only. The name of the environment to use. asynchronous: bool Should the API use asynchronous interactions with the web service? Currently only impacts the run method Default: False loop: AbstractEventLoop If asynchronous mode is requested, then you can provide an optional event loop instance. If None, then we will access asyncio.get_event_loop() Default: None use_offprocess_checker: Bool, Use this option to disable the offprocess_checker in the FuncXSerializer used by the client. Default: True Keyword arguments are the same as for BaseClient. """ # resolve URLs if not set if funcx_service_address is None: funcx_service_address = get_web_service_url(environment) if results_ws_uri is None: results_ws_uri = get_web_socket_url(environment) self.func_table = {} self.use_offprocess_checker = use_offprocess_checker self.funcx_home = os.path.expanduser(funcx_home) self.session_task_group_id = str(uuid.uuid4()) if not os.path.exists(self.TOKEN_DIR): os.makedirs(self.TOKEN_DIR) tokens_filename = os.path.join(self.TOKEN_DIR, self.TOKEN_FILENAME) self.native_client = NativeClient( client_id=self.FUNCX_SDK_CLIENT_ID, app_name="FuncX SDK", token_storage=JSONTokenStorage(tokens_filename), ) # TODO: if fx_authorizer is given, we still need to get an authorizer for Search search_scope = "urn:globus:auth:scope:search.api.globus.org:all" scopes = [self.FUNCX_SCOPE, search_scope, "openid"] if not fx_authorizer or not search_authorizer or not openid_authorizer: self.native_client.login( requested_scopes=scopes, no_local_server=kwargs.get("no_local_server", True), no_browser=kwargs.get("no_browser", True), refresh_tokens=kwargs.get("refresh_tokens", True), force=force_login, ) all_authorizers = self.native_client.get_authorizers_by_scope( requested_scopes=scopes ) fx_authorizer = all_authorizers[self.FUNCX_SCOPE] search_authorizer = all_authorizers[search_scope] openid_authorizer = all_authorizers["openid"] self.web_client = FuncxWebClient( base_url=funcx_service_address, authorizer=fx_authorizer ) self.fx_serializer = FuncXSerializer( use_offprocess_checker=self.use_offprocess_checker ) authclient = AuthClient(authorizer=openid_authorizer) user_info = authclient.oauth2_userinfo() self.searcher = SearchHelper( authorizer=search_authorizer, owner_uuid=user_info["sub"] ) self.funcx_service_address = funcx_service_address self.check_endpoint_version = check_endpoint_version self.version_check() self.results_ws_uri = results_ws_uri self.asynchronous = asynchronous if asynchronous: self.loop = loop if loop else asyncio.get_event_loop() # Start up an asynchronous polling loop in the background self.ws_polling_task = WebSocketPollingTask( self, self.loop, init_task_group_id=self.session_task_group_id, results_ws_uri=self.results_ws_uri, ) else: self.loop = None
def __init__(self): self.tasks = [] self.fx_serializer = FuncXSerializer()
import json import sys import argparse import time import funcx from funcx import FuncXClient from funcx.serialize import FuncXSerializer fxs = FuncXSerializer() # funcx.set_stream_logger() def double(x): return x * 2 def test(fxc, ep_id, task_count=10): fn_uuid = fxc.register_function(double, description="Yadu double") print("FN_UUID : ", fn_uuid) start = time.time() task_ids = fxc.map_run(list(range(task_count)), endpoint_id=ep_id, function_id=fn_uuid) delta = time.time() - start print("Time to launch {} tasks: {:8.3f} s".format(task_count, delta)) print("Got {} tasks_ids ".format(len(task_ids))) for i in range(3): x = fxc.get_batch_status(task_ids)
def __init__( self, task_q_url="tcp://127.0.0.1:50097", result_q_url="tcp://127.0.0.1:50098", max_queue_size=10, cores_per_worker=1, max_workers=float('inf'), uid=None, heartbeat_threshold=120, heartbeat_period=30, logdir=None, debug=False, block_id=None, internal_worker_port_range=(50000, 60000), mode="singularity_reuse", container_image=None, # TODO : This should be 10ms poll_period=100): """ Parameters ---------- worker_url : str Worker url on which workers will attempt to connect back uid : str string unique identifier cores_per_worker : float cores to be assigned to each worker. Oversubscription is possible by setting cores_per_worker < 1.0. Default=1 max_workers : int caps the maximum number of workers that can be launched. default: infinity heartbeat_threshold : int Seconds since the last message from the interchange after which the interchange is assumed to be un-available, and the manager initiates shutdown. Default:120s Number of seconds since the last message from the interchange after which the worker assumes that the interchange is lost and the manager shuts down. Default:120 heartbeat_period : int Number of seconds after which a heartbeat message is sent to the interchange internal_worker_port_range : tuple(int, int) Port range from which the port(s) for the workers to connect to the manager is picked. Default: (50000,60000) mode : str Pick between 3 supported modes for the worker: 1. no_container : Worker launched without containers 2. singularity_reuse : Worker launched inside a singularity container that will be reused 3. singularity_single_use : Each worker and task runs inside a new container instance. container_image : str Path or identifier for the container to be used. Default: None poll_period : int Timeout period used by the manager in milliseconds. Default: 10ms """ logger.info("Manager started") self.context = zmq.Context() self.task_incoming = self.context.socket(zmq.DEALER) self.task_incoming.setsockopt(zmq.IDENTITY, uid.encode('utf-8')) # Linger is set to 0, so that the manager can exit even when there might be # messages in the pipe self.task_incoming.setsockopt(zmq.LINGER, 0) self.task_incoming.connect(task_q_url) self.logdir = logdir self.debug = debug self.block_id = block_id self.result_outgoing = self.context.socket(zmq.DEALER) self.result_outgoing.setsockopt(zmq.IDENTITY, uid.encode('utf-8')) self.result_outgoing.setsockopt(zmq.LINGER, 0) self.result_outgoing.connect(result_q_url) logger.info("Manager connected") self.uid = uid self.mode = mode self.container_image = container_image self.cores_on_node = multiprocessing.cpu_count() self.max_workers = max_workers self.cores_per_workers = cores_per_worker self.available_mem_on_node = round( psutil.virtual_memory().available / (2**30), 1) self.worker_count = min( max_workers, math.floor(self.cores_on_node / cores_per_worker)) self.worker_map = WorkerMap(self.worker_count) self.internal_worker_port_range = internal_worker_port_range self.funcx_task_socket = self.context.socket(zmq.ROUTER) self.funcx_task_socket.set_hwm(0) self.address = '127.0.0.1' self.worker_port = self.funcx_task_socket.bind_to_random_port( "tcp://*", min_port=self.internal_worker_port_range[0], max_port=self.internal_worker_port_range[1]) logger.info( "Manager listening on {} port for incoming worker connections". format(self.worker_port)) self.task_queues = {'RAW': queue.Queue()} self.pending_result_queue = multiprocessing.Queue() self.max_queue_size = max_queue_size + self.worker_count self.tasks_per_round = 1 self.heartbeat_period = heartbeat_period self.heartbeat_threshold = heartbeat_threshold self.poll_period = poll_period self.serializer = FuncXSerializer() self.next_worker_q = [] # FIFO queue for spinning up workers.
def __init__( self, config, client_address="127.0.0.1", interchange_address="127.0.0.1", client_ports: Tuple[int, int, int] = (50055, 50056, 50057), launch_cmd=None, logdir=".", endpoint_id=None, keys_dir=".curve", suppress_failure=True, endpoint_dir=".", endpoint_name="default", reg_info=None, funcx_client_options=None, results_ack_handler=None, ): """ Parameters ---------- config : funcx.Config object Funcx config object that describes how compute should be provisioned client_address : str The ip address at which the parsl client can be reached. Default: "127.0.0.1" interchange_address : str The ip address at which the workers will be able to reach the Interchange. Default: "127.0.0.1" client_ports : Tuple[int, int, int] The ports at which the client can be reached launch_cmd : str TODO : update logdir : str Parsl log directory paths. Logs and temp files go here. Default: '.' keys_dir : str Directory from where keys used for communicating with the funcX service (forwarders) are stored endpoint_id : str Identity string that identifies the endpoint to the broker suppress_failure : Bool When set to True, the interchange will attempt to suppress failures. Default: False endpoint_dir : str Endpoint directory path to store registration info in endpoint_name : str Name of endpoint reg_info : Dict Registration info from initial registration on endpoint start, if it succeeded funcx_client_options : Dict FuncXClient initialization options """ self.logdir = logdir log.info( "Initializing EndpointInterchange process with Endpoint ID: {}". format(endpoint_id)) self.config = config log.info(f"Got config: {config}") self.client_address = client_address self.interchange_address = interchange_address self.client_ports = client_ports self.suppress_failure = suppress_failure self.endpoint_dir = endpoint_dir self.endpoint_name = endpoint_name if funcx_client_options is None: funcx_client_options = {} self.funcx_client = FuncXClient(**funcx_client_options) self.initial_registration_complete = False if reg_info: self.initial_registration_complete = True self.apply_reg_info(reg_info) self.heartbeat_period = self.config.heartbeat_period self.heartbeat_threshold = self.config.heartbeat_threshold # initalize the last heartbeat time to start the loop self.last_heartbeat = time.time() self.keys_dir = keys_dir self.serializer = FuncXSerializer() self.pending_task_queue = Queue() self.containers = {} self.total_pending_task_count = 0 self._quiesce_event = threading.Event() self._kill_event = threading.Event() self.results_ack_handler = results_ack_handler log.info(f"Interchange address is {self.interchange_address}") self.endpoint_id = endpoint_id self.current_platform = { "parsl_v": PARSL_VERSION, "python_v": "{}.{}.{}".format(sys.version_info.major, sys.version_info.minor, sys.version_info.micro), "libzmq_v": zmq.zmq_version(), "pyzmq_v": zmq.pyzmq_version(), "os": platform.system(), "hname": platform.node(), "funcx_sdk_version": funcx_sdk_version, "funcx_endpoint_version": funcx_endpoint_version, "registration": self.endpoint_id, "dir": os.getcwd(), } log.info(f"Platform info: {self.current_platform}") try: self.load_config() except Exception: log.exception("Caught exception") raise self.tasks = set() self.task_status_deltas = {} self._test_start = False
from funcx_endpoint.executors.high_throughput.messages import Task def double(x): return x * 2 if __name__ == "__main__": results_queue = Queue() # set_file_logger('executor.log', name='funcx_endpoint', level=logging.DEBUG) htex = HighThroughputExecutor(interchange_local=True, passthrough=True) htex.start(results_passthrough=results_queue) htex._start_remote_interchange_process() fx_serializer = FuncXSerializer() for i in range(10): task_id = str(uuid.uuid4()) args = (i, ) kwargs = {} fn_code = fx_serializer.serialize(double) ser_code = fx_serializer.pack_buffers([fn_code]) ser_params = fx_serializer.pack_buffers( [fx_serializer.serialize(args), fx_serializer.serialize(kwargs)]) payload = Task(task_id, "RAW", ser_code + ser_params) f = htex.submit_raw(payload.pack()) time.sleep(0.5)
def __init__( self, task_q_url="tcp://127.0.0.1:50097", result_q_url="tcp://127.0.0.1:50098", max_queue_size=10, cores_per_worker=1, max_workers=float("inf"), uid=None, heartbeat_threshold=120, heartbeat_period=30, logdir=None, debug=False, block_id=None, internal_worker_port_range=(50000, 60000), worker_mode="singularity_reuse", container_cmd_options="", scheduler_mode="hard", worker_type=None, worker_max_idletime=60, # TODO : This should be 10ms poll_period=100, ): """ Parameters ---------- worker_url : str Worker url on which workers will attempt to connect back uid : str string unique identifier cores_per_worker : float cores to be assigned to each worker. Oversubscription is possible by setting cores_per_worker < 1.0. Default=1 max_workers : int caps the maximum number of workers that can be launched. default: infinity heartbeat_threshold : int Seconds since the last message from the interchange after which the interchange is assumed to be un-available, and the manager initiates shutdown. Default:120s Number of seconds since the last message from the interchange after which the worker assumes that the interchange is lost and the manager shuts down. Default:120 heartbeat_period : int Number of seconds after which a heartbeat message is sent to the interchange internal_worker_port_range : tuple(int, int) Port range from which the port(s) for the workers to connect to the manager is picked. Default: (50000,60000) worker_mode : str Pick between 3 supported modes for the worker: 1. no_container : Worker launched without containers 2. singularity_reuse : Worker launched inside a singularity container that will be reused 3. singularity_single_use : Each worker and task runs inside a new container instance. container_cmd_options: str Container command strings to be added to associated container command. For example, singularity exec {container_cmd_options} scheduler_mode : str Pick between 2 supported modes for the manager: 1. hard: the manager cannot change the launched container type 2. soft: the manager can decide whether to launch different containers worker_type : str If set, the worker type for this manager is fixed. Default: None poll_period : int Timeout period used by the manager in milliseconds. Default: 10ms """ log.info("Manager started") self.context = zmq.Context() self.task_incoming = self.context.socket(zmq.DEALER) self.task_incoming.setsockopt(zmq.IDENTITY, uid.encode("utf-8")) # Linger is set to 0, so that the manager can exit even when there might be # messages in the pipe self.task_incoming.setsockopt(zmq.LINGER, 0) self.task_incoming.connect(task_q_url) self.logdir = logdir self.debug = debug self.block_id = block_id self.result_outgoing = self.context.socket(zmq.DEALER) self.result_outgoing.setsockopt(zmq.IDENTITY, uid.encode("utf-8")) self.result_outgoing.setsockopt(zmq.LINGER, 0) self.result_outgoing.connect(result_q_url) log.info("Manager connected") self.uid = uid self.worker_mode = worker_mode self.container_cmd_options = container_cmd_options self.scheduler_mode = scheduler_mode self.worker_type = worker_type self.worker_max_idletime = worker_max_idletime self.cores_on_node = multiprocessing.cpu_count() self.max_workers = max_workers self.cores_per_workers = cores_per_worker self.available_mem_on_node = round( psutil.virtual_memory().available / (2**30), 1) self.max_worker_count = min( max_workers, math.floor(self.cores_on_node / cores_per_worker)) self.worker_map = WorkerMap(self.max_worker_count) self.internal_worker_port_range = internal_worker_port_range self.funcx_task_socket = self.context.socket(zmq.ROUTER) self.funcx_task_socket.set_hwm(0) self.address = "127.0.0.1" self.worker_port = self.funcx_task_socket.bind_to_random_port( "tcp://*", min_port=self.internal_worker_port_range[0], max_port=self.internal_worker_port_range[1], ) log.info( "Manager listening on {} port for incoming worker connections". format(self.worker_port)) self.task_queues = {} if worker_type: self.task_queues[worker_type] = queue.Queue() self.outstanding_task_count = {} self.task_type_mapping = {} self.pending_result_queue = mpQueue() self.max_queue_size = max_queue_size + self.max_worker_count self.tasks_per_round = 1 self.heartbeat_period = heartbeat_period self.heartbeat_threshold = heartbeat_threshold self.poll_period = poll_period self.serializer = FuncXSerializer() self.next_worker_q = [] # FIFO queue for spinning up workers. self.worker_procs = {} self.task_status_deltas = {} self._kill_event = threading.Event() self._result_pusher_thread = threading.Thread( target=self.push_results, args=(self._kill_event, )) self._status_report_thread = threading.Thread( target=self._status_report_loop, args=(self._kill_event, )) self.container_switch_count = 0 self.poller = zmq.Poller() self.poller.register(self.task_incoming, zmq.POLLIN) self.poller.register(self.funcx_task_socket, zmq.POLLIN) self.task_worker_map = {} self.task_done_counter = 0 self.task_finalization_lock = threading.Lock()