예제 #1
0
def serialize_fx_inputs(*args, **kwargs):
    from funcx.serialize import FuncXSerializer
    fx_serializer = FuncXSerializer()
    ser_args = fx_serializer.serialize(args)
    ser_kwargs = fx_serializer.serialize(kwargs)
    payload = fx_serializer.pack_buffers([ser_args, ser_kwargs])
    return payload
예제 #2
0
    def __init__(self, gsearchresult):
        """

        Parameters
        ----------
        gsearchresult : dict
        """
        # wrapper for an array of results
        results = gsearchresult['results']
        super().__init__(results)

        # track data about where we are in total results
        self.has_next_page = gsearchresult['has_next_page']
        self.offset = gsearchresult['offset']
        self.total = gsearchresult['total']

        # we can use this to load functions and run them
        self.serializer = FuncXSerializer()

        # Reformat for pretty printing and easy viewing
        self._init_columns()
        self.table = Texttable(max_width=120)
        self.table.header(self.columns)
        for res in self:
            self.table.add_row([
                res[col] for col in self.columns
            ])
예제 #3
0
def test(endpoint_id=None, tasks=10, hostname=None, port=None):
    tasks_rq = RedisQueue(f'task_{endpoint_id}', hostname)
    results_rq = RedisQueue(f'results', hostname)
    fxs = FuncXSerializer()

    ser_code = fxs.serialize(slow_double)
    fn_code = fxs.pack_buffers([ser_code])

    while True:
        try:
            x = results_rq.get(timeout=1)
        except:
            print("No more results left")
            break

    tasks_rq.connect()
    results_rq.connect()
    start = time.time()
    for i in range(tasks):
        ser_args = fxs.serialize([i])
        ser_kwargs = fxs.serialize({'duration':0})
        input_data = fxs.pack_buffers([ser_args, ser_kwargs])
        payload = fn_code + input_data
        tasks_rq.put(f"0{i}", payload)

    for i in range(tasks):
        res = results_rq.get(timeout=1)
        print("Result : ", res)

    delta = time.time() - start
    print("Time to complete {} tasks: {:8.3f} s".format(tasks, delta))
    print("Throughput : {:8.3f} Tasks/s".format(tasks / delta))
    return delta
예제 #4
0
    def __init__(
        self,
        worker_id,
        address,
        port,
        worker_type="RAW",
        result_size_limit=DEFAULT_RESULT_SIZE_LIMIT_B,
    ):

        self.worker_id = worker_id
        self.address = address
        self.port = port
        self.worker_type = worker_type
        self.serializer = FuncXSerializer()
        self.serialize = self.serializer.serialize
        self.deserialize = self.serializer.deserialize
        self.result_size_limit = result_size_limit

        log.info(f"Initializing worker {worker_id}")
        log.info(f"Worker is of type: {worker_type}")

        self.context = zmq.Context()
        self.poller = zmq.Poller()
        self.identity = worker_id.encode()

        self.task_socket = self.context.socket(zmq.DEALER)
        self.task_socket.setsockopt(zmq.IDENTITY, self.identity)

        log.info(f"Trying to connect to : tcp://{self.address}:{self.port}")
        self.task_socket.connect(f"tcp://{self.address}:{self.port}")
        self.poller.register(self.task_socket, zmq.POLLIN)
        signal.signal(signal.SIGTERM, self.handler)
예제 #5
0
def serialize_fx_inputs(*args, **kwargs):
    """Pack and serialize inputs
    """
    fx_serializer = FuncXSerializer()
    ser_args = fx_serializer.serialize(args)
    ser_kwargs = fx_serializer.serialize(kwargs)
    payload = fx_serializer.pack_buffers([ser_args, ser_kwargs])
    return payload
예제 #6
0
파일: client.py 프로젝트: ravescovi/gladier
 def get_funcx_function_checksum(funcx_function):
     """
     Get the SHA256 checksum of a funcx function
     :returns sha256 hex string of a given funcx function
     """
     fxs = FuncXSerializer()
     serialized_func = fxs.serialize(funcx_function).encode()
     return hashlib.sha256(serialized_func).hexdigest()
예제 #7
0
파일: batch.py 프로젝트: funcx-faas/funcX
    def __init__(self, task_group_id=None):
        """
        Parameters
        ==========

        task_group_id : str
            UUID indicating the task group that this batch belongs to
        """
        self.tasks = []
        self.fx_serializer = FuncXSerializer()
        self.task_group_id = task_group_id
예제 #8
0
파일: client.py 프로젝트: NickolausDS/funcX
    def __init__(self,
                 http_timeout=None,
                 funcx_home=os.path.join('~', '.funcx'),
                 force_login=False,
                 fx_authorizer=None,
                 funcx_service_address='https://dev.funcx.org/api/v1',
                 **kwargs):
        """ Initialize the client

        Parameters
        ----------
        http_timeout: int
        Timeout for any call to service in seconds.
        Default is no timeout

        force_login: bool
        Whether to force a login to get new credentials.

        fx_authorizer:class:`GlobusAuthorizer <globus_sdk.authorizers.base.GlobusAuthorizer>`:
        A custom authorizer instance to communicate with funcX.
        Default: ``None``, will be created.

        service_address: str
        The address of the funcX web service to communicate with.
        Default: https://dev.funcx.org/api/v1

        Keyword arguments are the same as for BaseClient.
        """
        self.ep_registration_path = 'register_endpoint_2'
        self.funcx_home = os.path.expanduser(funcx_home)

        native_client = NativeClient(client_id=self.CLIENT_ID)

        fx_scope = "https://auth.globus.org/scopes/facd7ccc-c5f4-42aa-916b-a0e270e2c2a9/all"

        if not fx_authorizer:
            native_client.login(
                requested_scopes=[fx_scope],
                no_local_server=kwargs.get("no_local_server", True),
                no_browser=kwargs.get("no_browser", True),
                refresh_tokens=kwargs.get("refresh_tokens", True),
                force=force_login)

            all_authorizers = native_client.get_authorizers_by_scope(
                requested_scopes=[fx_scope])
            fx_authorizer = all_authorizers[fx_scope]

        super(FuncXClient, self).__init__("funcX",
                                          environment='funcx',
                                          authorizer=fx_authorizer,
                                          http_timeout=http_timeout,
                                          base_url=funcx_service_address,
                                          **kwargs)
        self.fx_serializer = FuncXSerializer()
예제 #9
0
    def __init__(self, dlh_authorizer=None, search_client=None, http_timeout=None,
                 force_login=False, fx_authorizer=None, **kwargs):
        """Initialize the client

        Args:
            dlh_authorizer (:class:`GlobusAuthorizer
                            <globus_sdk.authorizers.base.GlobusAuthorizer>`):
                An authorizer instance used to communicate with DLHub.
                If ``None``, will be created.
            search_client (:class:`SearchClient <globus_sdk.SearchClient>`):
                An authenticated SearchClient to communicate with Globus Search.
                If ``None``, will be created.
            http_timeout (int): Timeout for any call to service in seconds. (default is no timeout)
            force_login (bool): Whether to force a login to get new credentials.
                A login will always occur if ``dlh_authorizer`` or ``search_client``
                are not provided.
            no_local_server (bool): Disable spinning up a local server to automatically
                copy-paste the auth code. THIS IS REQUIRED if you are on a remote server.
                When used locally with no_local_server=False, the domain is localhost with
                a randomly chosen open port number.
                **Default**: ``True``.
            fx_authorizer (:class:`GlobusAuthorizer
                            <globus_sdk.authorizers.base.GlobusAuthorizer>`):
                An authorizer instance used to communicate with funcX.
                If ``None``, will be created.
            no_browser (bool): Do not automatically open the browser for the Globus Auth URL.
                Display the URL instead and let the user navigate to that location manually.
                **Default**: ``True``.
        Keyword arguments are the same as for BaseClient.
        """
        if force_login or not dlh_authorizer or not search_client or not fx_authorizer:

            fx_scope = "https://auth.globus.org/scopes/facd7ccc-c5f4-42aa-916b-a0e270e2c2a9/all"
            auth_res = login(services=["search", "dlhub",
                                       fx_scope],
                             app_name="DLHub_Client",
                             client_id=CLIENT_ID, clear_old_tokens=force_login,
                             token_dir=_token_dir, no_local_server=kwargs.get("no_local_server", True),
                             no_browser=kwargs.get("no_browser", True))
            dlh_authorizer = auth_res["dlhub"]
            fx_authorizer = auth_res[fx_scope]
            self._search_client = auth_res["search"]
            self._fx_client = FuncXClient(force_login=True,fx_authorizer=fx_authorizer,
                                          funcx_service_address='https://funcx.org/api/v1')

        # funcX endpoint to use
        self.fx_endpoint = '86a47061-f3d9-44f0-90dc-56ddc642c000'
        # self.fx_endpoint = '2c92a06a-015d-4bfa-924c-b3d0c36bdad7'
        self.fx_serializer = FuncXSerializer()
        self.fx_cache = {}
        super(DLHubClient, self).__init__("DLHub", environment='dlhub', authorizer=dlh_authorizer,
                                          http_timeout=http_timeout, base_url=DLHUB_SERVICE_ADDRESS,
                                          **kwargs)
def dont_run_yet(endpoint_id=None, tasks=10, duration=1, hostname=None):
    # tasks_rq = EndpointQueue(f'task_{endpoint_id}', hostname)
    tasks_channel = RedisPubSub(hostname)
    tasks_channel.connect()
    redis_client = tasks_channel.redis_client
    redis_client.ping()
    fxs = FuncXSerializer()

    ser_code = fxs.serialize(slow_double)
    fn_code = fxs.pack_buffers([ser_code])

    start = time.time()
    task_ids = {}
    for i in range(tasks):
        time.sleep(duration)
        task_id = str(uuid.uuid4())
        print("Task_id : ", task_id)
        ser_args = fxs.serialize([i])
        ser_kwargs = fxs.serialize({"duration": duration})
        input_data = fxs.pack_buffers([ser_args, ser_kwargs])
        payload = fn_code + input_data
        container_id = "RAW"
        task = Task(redis_client, task_id, container_id, serializer="", payload=payload)
        task.endpoint = endpoint_id
        task.status = TaskState.WAITING_FOR_EP
        # tasks_rq.enqueue(task)
        tasks_channel.put(endpoint_id, task)
        task_ids[i] = task_id

    d1 = time.time() - start
    print(f"Time to launch {tasks} tasks: {d1:8.3f} s")

    delay = 5
    print(f"Sleeping {delay} seconds")
    time.sleep(delay)
    print(f"Launched {tasks} tasks")
    for i in range(tasks):
        task_id = task_ids[i]
        print("Task_id : ", task_id)
        task = Task.from_id(redis_client, task_id)
        # TODO: wait for task result...
        time.sleep(duration)
        try:
            result = fxs.deserialize(task.result)
            print(f"Result : {result}")
        except Exception as e:
            print(f"Task failed with exception:{e}")
            pass

    delta = time.time() - start
    print(f"Time to complete {tasks} tasks: {delta:8.3f} s")
    print(f"Throughput : {tasks / delta:8.3f} Tasks/s")
    return delta
예제 #11
0
class FuncXFuture(Future):
    client = FuncXClient()
    serializer = FuncXSerializer()

    def __init__(self, task_id, poll_period=1):
        super().__init__()
        self.task_id = task_id
        self.poll_period = poll_period
        self.__result = None
        self.submitted = time.time()

    def done(self):
        if self.__result is not None:
            return True
        try:
            data = FuncXFuture.client.get_task_status(self.task_id)
        except Exception:
            return False
        if 'status' in data and data['status'] == 'PENDING':
            time.sleep(
                self.poll_period)  # needed to not overwhelm the FuncX server
            return False
        elif 'result' in data:
            self.__result = FuncXFuture.serializer.deserialize(data['result'])
            self.returned = time.time()
            # FIXME AW benchmarking
            self.connected_managers = os.environ.get('connected_managers', -1)

            return True
        elif 'exception' in data:
            e = FuncXFuture.serializer.deserialize(data['exception'])
            e.reraise()
        else:
            raise NotImplementedError(
                'task {} is neither pending or finished: {}'.format(
                    self.task_id, str(data)))

    def result(self, timeout=None):
        if self.__result is not None:
            return self.__result
        while True:
            if self.done():
                break
            else:
                time.sleep(self.poll_period)
                if timeout is not None:
                    timeout -= self.poll_period
                    if timeout < 0:
                        raise TimeoutError

        return self.__result
예제 #12
0
def deserialize():
    """Return the deserialized result
    """

    fx_serializer = FuncXSerializer()
    # Return a failure message if all else fails
    ret_package = {'error': 'Failed to deserialize result'}
    try:
        inputs = request.json
        res = fx_serializer.deserialize(inputs)
        ret_package = jsonify(res)
    except Exception as e:
        print(e)
        return jsonify(ret_package), 500
    return ret_package, 200
예제 #13
0
def dont_run_yet(endpoint_id=None, tasks=10, duration=1, hostname=None):
    tasks_rq = EndpointQueue(f"task_{endpoint_id}", hostname)
    fxs = FuncXSerializer()

    ser_code = fxs.serialize(slow_double)
    fn_code = fxs.pack_buffers([ser_code])

    tasks_rq.connect()
    start = time.time()
    task_ids = {}
    for i in range(tasks):
        task_id = str(uuid.uuid4())
        ser_args = fxs.serialize([i])
        ser_kwargs = fxs.serialize({"duration": duration})
        input_data = fxs.pack_buffers([ser_args, ser_kwargs])
        payload = fn_code + input_data
        container_id = "RAW"
        task = Task(tasks_rq.redis_client,
                    task_id,
                    container_id,
                    serializer="",
                    payload=payload)
        tasks_rq.enqueue(task)
        task_ids[i] = task_id

    d1 = time.time() - start
    print(f"Time to launch {tasks} tasks: {d1:8.3f} s")

    print(f"Launched {tasks} tasks")
    for i in range(tasks):
        task_id = task_ids[i]
        task = Task.from_id(tasks_rq.redis_client, task_id)
        # TODO: wait for task result...
        time.sleep(2)
        print(f"Result: {task.result}")
        # res = results_rq.get('result', timeout=300)
        # print("Result : ", res)

    delta = time.time() - start
    print(f"Time to complete {tasks} tasks: {delta:8.3f} s")
    print(f"Throughput : {tasks / delta:8.3f} Tasks/s")
    return delta
예제 #14
0
    def __init__(self,
                 http_timeout=None,
                 funcx_home=os.path.join('~', '.funcx'),
                 force_login=False,
                 fx_authorizer=None,
                 **kwargs):
        """ Initialize the client

        Parameters
        ----------
        http_timeout: int
        Timeout for any call to service in seconds.
        Default is no timeout

        force_login: bool
        Whether to force a login to get new credentials.

        fx_authorizer:class:`GlobusAuthorizer <globus_sdk.authorizers.base.GlobusAuthorizer>`:
        A custom authorizer instance to communicate with funcX.
        Default: ``None``, will be created.

        Keyword arguments are the same as for BaseClient.
        """
        self.ep_registration_path = 'register_endpoint_2'
        self.funcx_home = os.path.expanduser(funcx_home)

        if force_login or not fx_authorizer:
            fx_scope = "https://auth.globus.org/scopes/facd7ccc-c5f4-42aa-916b-a0e270e2c2a9/all"
            auth_res = login(services=[fx_scope],
                             app_name="funcX_Client",
                             client_id=self.CLIENT_ID,
                             clear_old_tokens=force_login,
                             token_dir=self.TOKEN_DIR)
            dlh_authorizer = auth_res['funcx_service']

        super(FuncXClient, self).__init__("funcX",
                                          environment='funcx',
                                          authorizer=dlh_authorizer,
                                          http_timeout=http_timeout,
                                          base_url=self.FUNCX_SERVICE_ADDRESS,
                                          **kwargs)
        self.fx_serializer = FuncXSerializer()
예제 #15
0
    def __init__(self,
                 worker_id,
                 address,
                 port,
                 logdir,
                 debug=False,
                 worker_type='RAW'):

        self.worker_id = worker_id
        self.address = address
        self.port = port
        self.logdir = logdir
        self.debug = debug
        self.worker_type = worker_type
        self.serializer = FuncXSerializer()
        self.serialize = self.serializer.serialize
        self.deserialize = self.serializer.deserialize

        global logger
        logger = set_file_logger(
            '{}/funcx_worker_{}.log'.format(logdir, worker_id),
            name="worker_log",
            level=logging.DEBUG if debug else logging.INFO)

        logger.info('Initializing worker {}'.format(worker_id))
        logger.info('Worker is of type: {}'.format(worker_type))

        if debug:
            logger.debug('Debug logging enabled')

        self.context = zmq.Context()
        self.poller = zmq.Poller()
        self.identity = worker_id.encode()

        self.task_socket = self.context.socket(zmq.DEALER)
        self.task_socket.setsockopt(zmq.IDENTITY, self.identity)

        logger.info('Trying to connect to : tcp://{}:{}'.format(
            self.address, self.port))
        self.task_socket.connect('tcp://{}:{}'.format(self.address, self.port))
        self.poller.register(self.task_socket, zmq.POLLIN)
예제 #16
0
def server(port=0, host="", debug=False, datasize=102400):

    try:
        from funcx.serialize import FuncXSerializer

        fxs = FuncXSerializer(use_offprocess_checker=False)
        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
            s.bind((host, port))
            bound_port = s.getsockname()[1]
            print(f"BINDING TO:{bound_port}", flush=True)
            s.listen(1)
            conn, addr = s.accept()  # we only expect one incoming connection here.
            with conn:
                while True:

                    b_msg = conn.recv(datasize)
                    if not b_msg:
                        print("Exiting")
                        return

                    msg = pickle.loads(b_msg)

                    if msg == "PING":
                        ret_value = ("PONG", None)
                    else:
                        try:
                            method = fxs.deserialize(msg)  # noqa
                            del method
                        except Exception as e:
                            ret_value = ("DESERIALIZE_FAIL", str(e))

                        else:
                            ret_value = ("SUCCESS", None)

                    ret_buf = pickle.dumps(ret_value)
                    conn.sendall(ret_buf)
    except Exception as e:
        print(f"OFF_PROCESS_CHECKER FAILURE, Exception:{e}")
        sys.exit()
예제 #17
0
def test(endpoint_id=None, tasks=10, duration=1, hostname=None, port=None):
    tasks_rq = RedisQueue(f'task_{endpoint_id}', hostname)
    results_rq = RedisQueue('results', hostname)
    fxs = FuncXSerializer()

    ser_code = fxs.serialize(slow_double)
    fn_code = fxs.pack_buffers([ser_code])

    tasks_rq.connect()
    results_rq.connect()

    while True:
        try:
            _ = results_rq.get(timeout=1)
        except Exception:
            print("No more results left")
            break

    start = time.time()
    for i in range(tasks):
        ser_args = fxs.serialize([i])
        ser_kwargs = fxs.serialize({'duration': duration})
        input_data = fxs.pack_buffers([ser_args, ser_kwargs])
        payload = fn_code + input_data
        container_id = "odd" if i % 2 else "even"
        tasks_rq.put(f"0{i};{container_id}", payload)

    d1 = time.time() - start
    print("Time to launch {} tasks: {:8.3f} s".format(tasks, d1))

    print(f"Launched {tasks} tasks")
    for i in range(tasks):
        _ = results_rq.get(timeout=300)
        # print("Result : ", res)

    delta = time.time() - start
    print("Time to complete {} tasks: {:8.3f} s".format(tasks, delta))
    print("Throughput : {:8.3f} Tasks/s".format(tasks / delta))
    return delta
예제 #18
0
    def __init__(self,
                 http_timeout=None,
                 funcx_home=os.path.join('~', '.funcx'),
                 force_login=False,
                 fx_authorizer=None,
                 funcx_service_address='https://api.funcx.org/v1',
                 **kwargs):
        """ Initialize the client

        Parameters
        ----------
        http_timeout: int
        Timeout for any call to service in seconds.
        Default is no timeout

        force_login: bool
        Whether to force a login to get new credentials.

        fx_authorizer:class:`GlobusAuthorizer <globus_sdk.authorizers.base.GlobusAuthorizer>`:
        A custom authorizer instance to communicate with funcX.
        Default: ``None``, will be created.

        funcx_service_address: str
        The address of the funcX web service to communicate with.
        Default: https://api.funcx.org/v1

        Keyword arguments are the same as for BaseClient.
        """
        self.func_table = {}
        self.ep_registration_path = 'register_endpoint_2'
        self.funcx_home = os.path.expanduser(funcx_home)

        if not os.path.exists(self.TOKEN_DIR):
            os.makedirs(self.TOKEN_DIR)

        tokens_filename = os.path.join(self.TOKEN_DIR, self.TOKEN_FILENAME)
        self.native_client = NativeClient(
            client_id=self.CLIENT_ID,
            app_name="FuncX SDK",
            token_storage=JSONTokenStorage(tokens_filename))

        # TODO: if fx_authorizer is given, we still need to get an authorizer for Search
        fx_scope = "https://auth.globus.org/scopes/facd7ccc-c5f4-42aa-916b-a0e270e2c2a9/all"
        search_scope = "urn:globus:auth:scope:search.api.globus.org:all"
        scopes = [fx_scope, search_scope, "openid"]

        search_authorizer = None

        if not fx_authorizer:
            self.native_client.login(
                requested_scopes=scopes,
                no_local_server=kwargs.get("no_local_server", True),
                no_browser=kwargs.get("no_browser", True),
                refresh_tokens=kwargs.get("refresh_tokens", True),
                force=force_login)

            all_authorizers = self.native_client.get_authorizers_by_scope(
                requested_scopes=scopes)
            fx_authorizer = all_authorizers[fx_scope]
            search_authorizer = all_authorizers[search_scope]
            openid_authorizer = all_authorizers["openid"]

        super(FuncXClient, self).__init__("funcX",
                                          environment='funcx',
                                          authorizer=fx_authorizer,
                                          http_timeout=http_timeout,
                                          base_url=funcx_service_address,
                                          **kwargs)
        self.fx_serializer = FuncXSerializer()

        authclient = AuthClient(authorizer=openid_authorizer)
        user_info = authclient.oauth2_userinfo()
        self.searcher = SearchHelper(authorizer=search_authorizer,
                                     owner_uuid=user_info['sub'])
        self.funcx_service_address = funcx_service_address
예제 #19
0
    def __init__(self,
                 task_q,
                 result_q,
                 executor,
                 endpoint_id,
                 heartbeat_threshold=60,
                 endpoint_addr=None,
                 redis_address=None,
                 logdir="forwarder_logs",
                 logging_level=logging.INFO,
                 max_heartbeats_missed=2):
        """
        Parameters
        ----------
        task_q : A queue object
        Any queue object that has get primitives. This must be a thread-safe queue.

        result_q : A queue object
        Any queue object that has put primitives. This must be a thread-safe queue.

        executor: Executor object
        Executor to which tasks are to be forwarded

        endpoint_id: str
        Usually a uuid4 as string that identifies the executor

        endpoint_addr: str
        Endpoint ip address as a string

        heartbeat_threshold : int
        Heartbeat threshold in seconds

        logdir: str
        Path to logdir

        logging_level : int
        Logging level as defined in the logging module. Default: logging.INFO (20)

        max_heartbeats_missed : int
        The maximum heartbeats missed before the forwarder terminates

        """
        super().__init__()
        self.logdir = logdir
        os.makedirs(self.logdir, exist_ok=True)

        global logger
        logger = logging.getLogger(endpoint_id)

        if len(logger.handlers) == 0:
            logger = set_file_logger(os.path.join(
                self.logdir, "forwarder.{}.log".format(endpoint_id)),
                                     name=endpoint_id,
                                     level=logging_level)

        logger.info(
            "Initializing forwarder for endpoint:{}".format(endpoint_id))
        logger.info("Log level set to {}".format(loglevels[logging_level]))

        self.endpoint_addr = endpoint_addr
        self.task_q = task_q
        self.result_q = result_q
        self.heartbeat_threshold = heartbeat_threshold
        self.executor = executor
        self.endpoint_id = endpoint_id
        self.endpoint_addr = endpoint_addr
        self.redis_address = redis_address
        self.internal_q = Queue()
        self.client_ports = None
        self.fx_serializer = FuncXSerializer()
        self.kill_event = threading.Event()
        self.max_heartbeats_missed = max_heartbeats_missed
예제 #20
0
    def __init__(self,
                 config,
                 client_address="127.0.0.1",
                 interchange_address="127.0.0.1",
                 client_ports=(50055, 50056, 50057),
                 worker_ports=None,
                 worker_port_range=(54000, 55000),
                 cores_per_worker=1.0,
                 worker_debug=False,
                 launch_cmd=None,
                 heartbeat_threshold=60,
                 logdir=".",
                 logging_level=logging.INFO,
                 poll_period=10,
                 endpoint_id=None,
                 suppress_failure=False,
                 max_heartbeats_missed=2
                 ):
        """
        Parameters
        ----------
        config : funcx.Config object
             Funcx config object that describes how compute should be provisioned

        client_address : str
             The ip address at which the parsl client can be reached. Default: "127.0.0.1"

        interchange_address : str
             The ip address at which the workers will be able to reach the Interchange. Default: "127.0.0.1"

        client_ports : triple(int, int, int)
             The ports at which the client can be reached

        launch_cmd : str
             TODO : update

        worker_ports : tuple(int, int)
             The specific two ports at which workers will connect to the Interchange. Default: None

        worker_port_range : tuple(int, int)
             The interchange picks ports at random from the range which will be used by workers.
             This is overridden when the worker_ports option is set. Defauls: (54000, 55000)

        cores_per_worker : float
             cores to be assigned to each worker. Oversubscription is possible
             by setting cores_per_worker < 1.0. Default=1

        worker_debug : Bool
             Enables worker debug logging.

        heartbeat_threshold : int
             Number of seconds since the last heartbeat after which worker is considered lost.

        logdir : str
             Parsl log directory paths. Logs and temp files go here. Default: '.'

        logging_level : int
             Logging level as defined in the logging module. Default: logging.INFO (20)

        endpoint_id : str
             Identity string that identifies the endpoint to the broker

        poll_period : int
             The main thread polling period, in milliseconds. Default: 10ms

        suppress_failure : Bool
             When set to True, the interchange will attempt to suppress failures. Default: False

        max_heartbeats_missed : int
             Number of heartbeats missed before setting kill_event

        """
        self.logdir = logdir
        try:
            os.makedirs(self.logdir)
        except FileExistsError:
            pass

        start_file_logger("{}/interchange.log".format(self.logdir), level=logging_level)
        logger.info("logger location {}".format(logger.handlers))
        logger.info("Initializing Interchange process with Endpoint ID: {}".format(endpoint_id))
        self.config = config
        logger.info("Got config : {}".format(config))

        self.strategy = self.config.strategy
        self.client_address = client_address
        self.interchange_address = interchange_address
        self.suppress_failure = suppress_failure
        self.poll_period = poll_period

        self.serializer = FuncXSerializer()
        logger.info("Attempting connection to client at {} on ports: {},{},{}".format(
            client_address, client_ports[0], client_ports[1], client_ports[2]))
        self.context = zmq.Context()
        self.task_incoming = self.context.socket(zmq.DEALER)
        self.task_incoming.set_hwm(0)
        self.task_incoming.RCVTIMEO = 10  # in milliseconds
        logger.info("Task incoming on tcp://{}:{}".format(client_address, client_ports[0]))
        self.task_incoming.connect("tcp://{}:{}".format(client_address, client_ports[0]))

        self.results_outgoing = self.context.socket(zmq.DEALER)
        self.results_outgoing.set_hwm(0)
        logger.info("Results outgoing on tcp://{}:{}".format(client_address, client_ports[1]))
        self.results_outgoing.connect("tcp://{}:{}".format(client_address, client_ports[1]))

        self.command_channel = self.context.socket(zmq.DEALER)
        self.command_channel.RCVTIMEO = 1000  # in milliseconds
        # self.command_channel.set_hwm(0)
        logger.info("Command channel on tcp://{}:{}".format(client_address, client_ports[2]))
        self.command_channel.connect("tcp://{}:{}".format(client_address, client_ports[2]))
        logger.info("Connected to client")

        self.pending_task_queue = {}
        self.containers = {}
        self.total_pending_task_count = 0
        self.fxs = FuncXClient()

        logger.info("Interchange address is {}".format(self.interchange_address))
        self.worker_ports = worker_ports
        self.worker_port_range = worker_port_range

        self.task_outgoing = self.context.socket(zmq.ROUTER)
        self.task_outgoing.set_hwm(0)
        self.results_incoming = self.context.socket(zmq.ROUTER)
        self.results_incoming.set_hwm(0)

        # initalize the last heartbeat time to start the loop
        self.last_heartbeat = time.time()
        self.max_heartbeats_missed = max_heartbeats_missed

        self.endpoint_id = endpoint_id
        if self.worker_ports:
            self.worker_task_port = self.worker_ports[0]
            self.worker_result_port = self.worker_ports[1]

            self.task_outgoing.bind("tcp://*:{}".format(self.worker_task_port))
            self.results_incoming.bind("tcp://*:{}".format(self.worker_result_port))

        else:
            self.worker_task_port = self.task_outgoing.bind_to_random_port('tcp://*',
                                                                           min_port=worker_port_range[0],
                                                                           max_port=worker_port_range[1], max_tries=100)
            self.worker_result_port = self.results_incoming.bind_to_random_port('tcp://*',
                                                                                min_port=worker_port_range[0],
                                                                                max_port=worker_port_range[1], max_tries=100)

        logger.info("Bound to ports {},{} for incoming worker connections".format(
            self.worker_task_port, self.worker_result_port))

        self._ready_manager_queue = {}

        self.heartbeat_threshold = heartbeat_threshold
        self.blocks = {}  # type: Dict[str, str]
        self.block_id_map = {}
        self.launch_cmd = launch_cmd
        self.last_core_hr_counter = 0
        if not launch_cmd:
            self.launch_cmd = ("funcx-manager {debug} {max_workers} "
                               "-c {cores_per_worker} "
                               "--poll {poll_period} "
                               "--task_url={task_url} "
                               "--result_url={result_url} "
                               "--logdir={logdir} "
                               "--block_id={{block_id}} "
                               "--hb_period={heartbeat_period} "
                               "--hb_threshold={heartbeat_threshold} "
                               "--worker_mode={worker_mode} "
                               "--scheduler_mode={scheduler_mode} "
                               "--worker_type={{worker_type}} ")

        self.current_platform = {'parsl_v': PARSL_VERSION,
                                 'python_v': "{}.{}.{}".format(sys.version_info.major,
                                                               sys.version_info.minor,
                                                               sys.version_info.micro),
                                 'os': platform.system(),
                                 'hname': platform.node(),
                                 'dir': os.getcwd()}

        logger.info("Platform info: {}".format(self.current_platform))
        self._block_counter = 0
        try:
            self.load_config()
        except Exception as e:
            logger.exception("Caught exception")
            raise
예제 #21
0
def _get_packed_code(
    func: t.Callable, serializer: t.Optional[FuncXSerializer] = None
) -> str:
    serializer = serializer if serializer else FuncXSerializer()
    return serializer.pack_buffers([serializer.serialize(func)])
    def __init__(self,
                 funcx_eid,
                 mdata_store_path,
                 source_eid=None,
                 dest_eid=None,
                 gdrive_token=None,
                 extractor_finder='gdrive',
                 prefetch_remote=False,
                 data_prefetch_path=None,
                 dataset_mdata=None):

        prefetch_remote = False

        # TODO -- fix this.
        # self.crawl_type = 'from_file'

        self.write_cpe = False

        self.dataset_mdata = dataset_mdata

        self.t_crawl_start = time.time()
        self.t_send_batch = 0
        self.t_transfer = 0

        self.prefetch_remote = prefetch_remote
        self.data_prefetch_path = data_prefetch_path

        self.extractor_finder = extractor_finder

        self.funcx_eid = funcx_eid
        self.func_dict = {
            "image": xtract_images.ImageExtractor(),
            "images": xtract_images.ImageExtractor(),
            "tabular": xtract_tabular.TabularExtractor(),
            "text": xtract_keyword.KeywordExtractor(),
            "matio": xtract_matio.MatioExtractor()
        }

        self.fx_ser = FuncXSerializer()

        self.send_status = "STARTING"
        self.poll_status = "STARTING"
        self.commit_completed = False

        self.source_endpoint = source_eid
        self.dest_endpoint = dest_eid
        self.gdrive_token = gdrive_token

        self.num_families_fetched = 0
        self.get_families_start_time = time.time()
        self.last_checked = time.time()

        self.pre_launch_counter = 0

        self.success_returns = 0
        self.failed_returns = 0

        self.to_send_queue = Queue()

        self.poll_gap_s = 5

        self.get_families_status = "STARTING"

        self.task_dict = {
            "active": Queue(),
            "pending": Queue(),
            "failed": Queue()
        }

        # Batch size we use to send tasks to funcx.  (and the subbatch size)
        self.map_size = 8
        self.fx_batch_size = 16
        self.fx_task_sublist_size = 500

        # Want to store attributes about funcX requests/responses.
        self.tot_fx_send_payload_size = 0
        self.tot_fx_poll_payload_size = 0
        self.tot_fx_poll_result_size = 0
        self.num_send_reqs = 0
        self.num_poll_reqs = 0
        self.t_first_funcx_invoke = None
        self.max_result_size = 0

        # Number (current and max) of number of tasks sent to funcX for extraction.
        self.max_extracting_tasks = 5
        self.num_extracting_tasks = 0

        self.max_pre_prefetch = 15000  # TODO: Integrate this to actually fix timing bug.

        self.status_things = Queue()

        # If this is turned on, should mean that we hit our local task maximum and don't want to pull down new work...
        self.pause_q_consume = False

        self.file_count = 0
        self.current_batch = []
        self.extract_end = None

        self.mdata_store_path = mdata_store_path
        self.n_fams_transferred = 0

        self.prefetcher_tid = None
        self.prefetch_status = None

        self.fx_headers = {
            "Authorization": f"Bearer {self.headers['FuncX']}",
            'FuncX': self.headers['FuncX']
        }

        self.family_headers = None
        if 'Petrel' in self.headers:
            self.fx_headers['Petrel'] = self.headers['Petrel']
            self.family_headers = {
                'Authorization': f"Bearer {self.headers['Petrel']}",
                'Transfer': self.headers['Transfer'],
                'FuncX': self.headers['FuncX'],
                'Petrel': self.headers['Petrel']
            }

        self.logger = logging.getLogger(__name__)
        handler = logging.StreamHandler()
        formatter = logging.Formatter(
            '%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
        handler.setFormatter(formatter)
        self.logger.addHandler(handler)
        self.logger.setLevel(
            logging.INFO)  # TODO: let's make this configurable.
        self.families_to_process = Queue()
        self.to_validate_q = Queue()

        self.sqs_push_threads = {}
        self.thr_ls = []
        self.commit_threads = 1
        self.get_family_threads = 20

        if self.prefetch_remote:
            self.logger.info("Launching prefetcher...")

            self.logger.info("Prefetcher successfully launched!")

            prefetch_thread = threading.Thread(
                target=self.prefetcher.main_poller_loop, args=())
            prefetch_thread.start()

        for i in range(0, self.commit_threads):
            thr = threading.Thread(target=self.validate_enqueue_loop,
                                   args=(i, ))
            self.thr_ls.append(thr)
            thr.start()
            self.sqs_push_threads[i] = True
        self.logger.info(
            f"Successfully started {len(self.sqs_push_threads)} SQS push threads!"
        )

        if self.crawl_type != 'from_file':
            for i in range(0, self.get_family_threads):
                self.logger.info(
                    f"Attempting to start get_next_families() as its own thread [{i}]... "
                )
                consumer_thr = threading.Thread(
                    target=self.get_next_families_loop, args=())
                consumer_thr.start()
                print(
                    f"Successfully started the get_next_families() thread number {i} "
                )
        else:
            print("ATTEMPTING TO LAUNCH **FILE** CRAWL THREAD. ")
            file_crawl_thr = threading.Thread(
                target=self.read_next_families_from_file_loop, args=())
            file_crawl_thr.start()
            print("Successfully started the **FILE** CRAWL thread!")

        for i in range(0, 15):
            fx_push_thr = threading.Thread(target=self.send_subbatch_thread,
                                           args=())
            fx_push_thr.start()
        print("Successfully spun up {i} send threads!")

        with open("cpe_times.csv", 'w') as f:
            f.close()
예제 #23
0
    def __init__(self,
                 endpoints,
                 strategy='round-robin',
                 runtime_predictor='rolling-average',
                 last_n=3,
                 train_every=1,
                 log_level='INFO',
                 import_model_file=None,
                 transfer_model_file=None,
                 sync_level='exists',
                 max_backups=0,
                 backup_delay_threshold=2.0,
                 *args,
                 **kwargs):
        self._fxc = FuncXClient(*args, **kwargs)

        # Initialize a transfer client
        self._transfer_manger = TransferManager(endpoints=endpoints,
                                                sync_level=sync_level,
                                                log_level=log_level)

        # Info about FuncX endpoints we can execute on
        self._endpoints = endpoints
        self._dead_endpoints = set()
        self.last_result_time = defaultdict(float)
        self.temperature = defaultdict(lambda: 'WARM')
        self._imports = defaultdict(list)
        self._imports_required = defaultdict(list)

        # Track which endpoints a function can't run on
        self._blocked = defaultdict(set)

        # Track pending tasks
        # We will provide the client our own task ids, since we may submit the
        # same task multiple times to the FuncX service, and sometimes we may
        # wait to submit a task to FuncX (e.g., wait for a data transfer).
        self._task_id_translation = {}
        self._pending = {}
        self._pending_by_endpoint = defaultdict(set)
        self._task_info = {}
        # List of endpoints a (virtual) task was scheduled to
        self._endpoints_sent_to = defaultdict(list)
        self.max_backups = max_backups
        self.backup_delay_threshold = backup_delay_threshold
        self._latest_status = {}
        self._last_task_ETA = defaultdict(float)
        # Maximum ETA, if any, of a task which we allow to be scheduled on an
        # endpoint. This is to prevent backfill tasks to be longer than the
        # estimated time for when a pending data transfer will finish.
        self._transfer_ETAs = defaultdict(dict)
        # Estimated error in the pending-task time of an endpoint.
        # Updated every time a task result is received from an endpoint.
        self._queue_error = defaultdict(float)

        # Set logging levels
        logger.setLevel(log_level)
        self.execution_log = []

        # Intialize serializer
        self.fx_serializer = FuncXSerializer()
        self.fx_serializer.use_custom('03\n', 'code')

        # Initialize runtime predictor
        self.runtime = init_runtime_predictor(runtime_predictor,
                                              endpoints=endpoints,
                                              last_n=last_n,
                                              train_every=train_every)
        logger.info(f"Runtime predictor using strategy {self.runtime}")

        # Initialize transfer-time predictor
        self.transfer_time = TransferPredictor(endpoints=endpoints,
                                               train_every=train_every,
                                               state_file=transfer_model_file)

        # Initialize import-time predictor
        self.import_predictor = ImportPredictor(endpoints=endpoints,
                                                state_file=import_model_file)

        # Initialize scheduling strategy
        self.strategy = init_strategy(strategy,
                                      endpoints=endpoints,
                                      runtime_predictor=self.runtime,
                                      queue_predictor=self.queue_delay,
                                      cold_start_predictor=self.cold_start,
                                      transfer_predictor=self.transfer_time)
        logger.info(f"Scheduler using strategy {self.strategy}")

        # Start thread to check on endpoints regularly
        self._endpoint_watchdog = Thread(target=self._check_endpoints)
        self._endpoint_watchdog.start()

        # Start thread to monitor tasks and send tasks to FuncX service
        self._scheduled_tasks = Queue()
        self._task_watchdog_sleep = 0.15
        self._task_watchdog = Thread(target=self._monitor_tasks)
        self._task_watchdog.start()
예제 #24
0
파일: client.py 프로젝트: funcx-faas/funcX
    def __init__(
        self,
        http_timeout=None,
        funcx_home=_FUNCX_HOME,
        force_login=False,
        fx_authorizer=None,
        search_authorizer=None,
        openid_authorizer=None,
        funcx_service_address=None,
        check_endpoint_version=False,
        asynchronous=False,
        loop=None,
        results_ws_uri=None,
        use_offprocess_checker=True,
        environment=None,
        **kwargs,
    ):
        """
        Initialize the client

        Parameters
        ----------
        http_timeout: int
            Timeout for any call to service in seconds.
            Default is no timeout

        force_login: bool
            Whether to force a login to get new credentials.

        fx_authorizer:class:`GlobusAuthorizer \
            <globus_sdk.authorizers.base.GlobusAuthorizer>`:
            A custom authorizer instance to communicate with funcX.
            Default: ``None``, will be created.

        search_authorizer:class:`GlobusAuthorizer \
            <globus_sdk.authorizers.base.GlobusAuthorizer>`:
            A custom authorizer instance to communicate with Globus Search.
            Default: ``None``, will be created.

        openid_authorizer:class:`GlobusAuthorizer \
            <globus_sdk.authorizers.base.GlobusAuthorizer>`:
            A custom authorizer instance to communicate with OpenID.
            Default: ``None``, will be created.

        funcx_service_address: str
            For internal use only. The address of the web service.

        results_ws_uri: str
            For internal use only. The address of the websocket service.

        environment: str
            For internal use only. The name of the environment to use.

        asynchronous: bool
        Should the API use asynchronous interactions with the web service? Currently
        only impacts the run method
        Default: False

        loop: AbstractEventLoop
        If asynchronous mode is requested, then you can provide an optional event loop
        instance. If None, then we will access asyncio.get_event_loop()
        Default: None

        use_offprocess_checker: Bool,
            Use this option to disable the offprocess_checker in the FuncXSerializer
            used by the client.
            Default: True

        Keyword arguments are the same as for BaseClient.

        """
        # resolve URLs if not set
        if funcx_service_address is None:
            funcx_service_address = get_web_service_url(environment)
        if results_ws_uri is None:
            results_ws_uri = get_web_socket_url(environment)

        self.func_table = {}
        self.use_offprocess_checker = use_offprocess_checker
        self.funcx_home = os.path.expanduser(funcx_home)
        self.session_task_group_id = str(uuid.uuid4())

        if not os.path.exists(self.TOKEN_DIR):
            os.makedirs(self.TOKEN_DIR)

        tokens_filename = os.path.join(self.TOKEN_DIR, self.TOKEN_FILENAME)
        self.native_client = NativeClient(
            client_id=self.FUNCX_SDK_CLIENT_ID,
            app_name="FuncX SDK",
            token_storage=JSONTokenStorage(tokens_filename),
        )

        # TODO: if fx_authorizer is given, we still need to get an authorizer for Search
        search_scope = "urn:globus:auth:scope:search.api.globus.org:all"
        scopes = [self.FUNCX_SCOPE, search_scope, "openid"]

        if not fx_authorizer or not search_authorizer or not openid_authorizer:
            self.native_client.login(
                requested_scopes=scopes,
                no_local_server=kwargs.get("no_local_server", True),
                no_browser=kwargs.get("no_browser", True),
                refresh_tokens=kwargs.get("refresh_tokens", True),
                force=force_login,
            )

            all_authorizers = self.native_client.get_authorizers_by_scope(
                requested_scopes=scopes
            )
            fx_authorizer = all_authorizers[self.FUNCX_SCOPE]
            search_authorizer = all_authorizers[search_scope]
            openid_authorizer = all_authorizers["openid"]

        self.web_client = FuncxWebClient(
            base_url=funcx_service_address, authorizer=fx_authorizer
        )
        self.fx_serializer = FuncXSerializer(
            use_offprocess_checker=self.use_offprocess_checker
        )

        authclient = AuthClient(authorizer=openid_authorizer)
        user_info = authclient.oauth2_userinfo()
        self.searcher = SearchHelper(
            authorizer=search_authorizer, owner_uuid=user_info["sub"]
        )
        self.funcx_service_address = funcx_service_address
        self.check_endpoint_version = check_endpoint_version

        self.version_check()

        self.results_ws_uri = results_ws_uri
        self.asynchronous = asynchronous
        if asynchronous:
            self.loop = loop if loop else asyncio.get_event_loop()

            # Start up an asynchronous polling loop in the background
            self.ws_polling_task = WebSocketPollingTask(
                self,
                self.loop,
                init_task_group_id=self.session_task_group_id,
                results_ws_uri=self.results_ws_uri,
            )
        else:
            self.loop = None
예제 #25
0
 def __init__(self):
     self.tasks = []
     self.fx_serializer = FuncXSerializer()
예제 #26
0
import json
import sys
import argparse
import time
import funcx
from funcx import FuncXClient
from funcx.serialize import FuncXSerializer
fxs = FuncXSerializer()

# funcx.set_stream_logger()


def double(x):
    return x * 2


def test(fxc, ep_id, task_count=10):

    fn_uuid = fxc.register_function(double, description="Yadu double")
    print("FN_UUID : ", fn_uuid)

    start = time.time()
    task_ids = fxc.map_run(list(range(task_count)),
                           endpoint_id=ep_id,
                           function_id=fn_uuid)
    delta = time.time() - start
    print("Time to launch {} tasks: {:8.3f} s".format(task_count, delta))
    print("Got {} tasks_ids ".format(len(task_ids)))

    for i in range(3):
        x = fxc.get_batch_status(task_ids)
예제 #27
0
    def __init__(
            self,
            task_q_url="tcp://127.0.0.1:50097",
            result_q_url="tcp://127.0.0.1:50098",
            max_queue_size=10,
            cores_per_worker=1,
            max_workers=float('inf'),
            uid=None,
            heartbeat_threshold=120,
            heartbeat_period=30,
            logdir=None,
            debug=False,
            block_id=None,
            internal_worker_port_range=(50000, 60000),
            mode="singularity_reuse",
            container_image=None,
            # TODO : This should be 10ms
            poll_period=100):
        """
        Parameters
        ----------
        worker_url : str
             Worker url on which workers will attempt to connect back

        uid : str
             string unique identifier

        cores_per_worker : float
             cores to be assigned to each worker. Oversubscription is possible
             by setting cores_per_worker < 1.0. Default=1

        max_workers : int
             caps the maximum number of workers that can be launched.
             default: infinity

        heartbeat_threshold : int
             Seconds since the last message from the interchange after which the
             interchange is assumed to be un-available, and the manager initiates shutdown. Default:120s

             Number of seconds since the last message from the interchange after which the worker
             assumes that the interchange is lost and the manager shuts down. Default:120

        heartbeat_period : int
             Number of seconds after which a heartbeat message is sent to the interchange

        internal_worker_port_range : tuple(int, int)
             Port range from which the port(s) for the workers to connect to the manager is picked.
             Default: (50000,60000)

        mode : str
             Pick between 3 supported modes for the worker:
              1. no_container : Worker launched without containers
              2. singularity_reuse : Worker launched inside a singularity container that will be reused
              3. singularity_single_use : Each worker and task runs inside a new container instance.

        container_image : str
             Path or identifier for the container to be used. Default: None

        poll_period : int
             Timeout period used by the manager in milliseconds. Default: 10ms
        """

        logger.info("Manager started")

        self.context = zmq.Context()
        self.task_incoming = self.context.socket(zmq.DEALER)
        self.task_incoming.setsockopt(zmq.IDENTITY, uid.encode('utf-8'))
        # Linger is set to 0, so that the manager can exit even when there might be
        # messages in the pipe
        self.task_incoming.setsockopt(zmq.LINGER, 0)
        self.task_incoming.connect(task_q_url)

        self.logdir = logdir
        self.debug = debug
        self.block_id = block_id
        self.result_outgoing = self.context.socket(zmq.DEALER)
        self.result_outgoing.setsockopt(zmq.IDENTITY, uid.encode('utf-8'))
        self.result_outgoing.setsockopt(zmq.LINGER, 0)
        self.result_outgoing.connect(result_q_url)
        logger.info("Manager connected")

        self.uid = uid

        self.mode = mode
        self.container_image = container_image
        self.cores_on_node = multiprocessing.cpu_count()
        self.max_workers = max_workers
        self.cores_per_workers = cores_per_worker
        self.available_mem_on_node = round(
            psutil.virtual_memory().available / (2**30), 1)
        self.worker_count = min(
            max_workers, math.floor(self.cores_on_node / cores_per_worker))
        self.worker_map = WorkerMap(self.worker_count)

        self.internal_worker_port_range = internal_worker_port_range

        self.funcx_task_socket = self.context.socket(zmq.ROUTER)
        self.funcx_task_socket.set_hwm(0)
        self.address = '127.0.0.1'
        self.worker_port = self.funcx_task_socket.bind_to_random_port(
            "tcp://*",
            min_port=self.internal_worker_port_range[0],
            max_port=self.internal_worker_port_range[1])

        logger.info(
            "Manager listening on {} port for incoming worker connections".
            format(self.worker_port))

        self.task_queues = {'RAW': queue.Queue()}

        self.pending_result_queue = multiprocessing.Queue()

        self.max_queue_size = max_queue_size + self.worker_count
        self.tasks_per_round = 1

        self.heartbeat_period = heartbeat_period
        self.heartbeat_threshold = heartbeat_threshold
        self.poll_period = poll_period
        self.serializer = FuncXSerializer()
        self.next_worker_q = []  # FIFO queue for spinning up workers.
예제 #28
0
    def __init__(
        self,
        config,
        client_address="127.0.0.1",
        interchange_address="127.0.0.1",
        client_ports: Tuple[int, int, int] = (50055, 50056, 50057),
        launch_cmd=None,
        logdir=".",
        endpoint_id=None,
        keys_dir=".curve",
        suppress_failure=True,
        endpoint_dir=".",
        endpoint_name="default",
        reg_info=None,
        funcx_client_options=None,
        results_ack_handler=None,
    ):
        """
        Parameters
        ----------
        config : funcx.Config object
             Funcx config object that describes how compute should be provisioned

        client_address : str
             The ip address at which the parsl client can be reached.
             Default: "127.0.0.1"

        interchange_address : str
             The ip address at which the workers will be able to reach the Interchange.
             Default: "127.0.0.1"

        client_ports : Tuple[int, int, int]
             The ports at which the client can be reached

        launch_cmd : str
             TODO : update

        logdir : str
             Parsl log directory paths. Logs and temp files go here. Default: '.'

        keys_dir : str
             Directory from where keys used for communicating with the funcX
             service (forwarders) are stored

        endpoint_id : str
             Identity string that identifies the endpoint to the broker

        suppress_failure : Bool
             When set to True, the interchange will attempt to suppress failures.
             Default: False

        endpoint_dir : str
             Endpoint directory path to store registration info in

        endpoint_name : str
             Name of endpoint

        reg_info : Dict
             Registration info from initial registration on endpoint start, if it
             succeeded

        funcx_client_options : Dict
             FuncXClient initialization options
        """
        self.logdir = logdir
        log.info(
            "Initializing EndpointInterchange process with Endpoint ID: {}".
            format(endpoint_id))
        self.config = config
        log.info(f"Got config: {config}")

        self.client_address = client_address
        self.interchange_address = interchange_address
        self.client_ports = client_ports
        self.suppress_failure = suppress_failure

        self.endpoint_dir = endpoint_dir
        self.endpoint_name = endpoint_name

        if funcx_client_options is None:
            funcx_client_options = {}
        self.funcx_client = FuncXClient(**funcx_client_options)

        self.initial_registration_complete = False
        if reg_info:
            self.initial_registration_complete = True
            self.apply_reg_info(reg_info)

        self.heartbeat_period = self.config.heartbeat_period
        self.heartbeat_threshold = self.config.heartbeat_threshold
        # initalize the last heartbeat time to start the loop
        self.last_heartbeat = time.time()
        self.keys_dir = keys_dir
        self.serializer = FuncXSerializer()

        self.pending_task_queue = Queue()
        self.containers = {}
        self.total_pending_task_count = 0

        self._quiesce_event = threading.Event()
        self._kill_event = threading.Event()

        self.results_ack_handler = results_ack_handler

        log.info(f"Interchange address is {self.interchange_address}")

        self.endpoint_id = endpoint_id

        self.current_platform = {
            "parsl_v":
            PARSL_VERSION,
            "python_v":
            "{}.{}.{}".format(sys.version_info.major, sys.version_info.minor,
                              sys.version_info.micro),
            "libzmq_v":
            zmq.zmq_version(),
            "pyzmq_v":
            zmq.pyzmq_version(),
            "os":
            platform.system(),
            "hname":
            platform.node(),
            "funcx_sdk_version":
            funcx_sdk_version,
            "funcx_endpoint_version":
            funcx_endpoint_version,
            "registration":
            self.endpoint_id,
            "dir":
            os.getcwd(),
        }

        log.info(f"Platform info: {self.current_platform}")
        try:
            self.load_config()
        except Exception:
            log.exception("Caught exception")
            raise

        self.tasks = set()
        self.task_status_deltas = {}

        self._test_start = False
예제 #29
0
from funcx_endpoint.executors.high_throughput.messages import Task


def double(x):
    return x * 2


if __name__ == "__main__":

    results_queue = Queue()
    #    set_file_logger('executor.log', name='funcx_endpoint', level=logging.DEBUG)
    htex = HighThroughputExecutor(interchange_local=True, passthrough=True)

    htex.start(results_passthrough=results_queue)
    htex._start_remote_interchange_process()
    fx_serializer = FuncXSerializer()

    for i in range(10):
        task_id = str(uuid.uuid4())
        args = (i, )
        kwargs = {}

        fn_code = fx_serializer.serialize(double)
        ser_code = fx_serializer.pack_buffers([fn_code])
        ser_params = fx_serializer.pack_buffers(
            [fx_serializer.serialize(args),
             fx_serializer.serialize(kwargs)])

        payload = Task(task_id, "RAW", ser_code + ser_params)
        f = htex.submit_raw(payload.pack())
        time.sleep(0.5)
예제 #30
0
    def __init__(
        self,
        task_q_url="tcp://127.0.0.1:50097",
        result_q_url="tcp://127.0.0.1:50098",
        max_queue_size=10,
        cores_per_worker=1,
        max_workers=float("inf"),
        uid=None,
        heartbeat_threshold=120,
        heartbeat_period=30,
        logdir=None,
        debug=False,
        block_id=None,
        internal_worker_port_range=(50000, 60000),
        worker_mode="singularity_reuse",
        container_cmd_options="",
        scheduler_mode="hard",
        worker_type=None,
        worker_max_idletime=60,
        # TODO : This should be 10ms
        poll_period=100,
    ):
        """
        Parameters
        ----------
        worker_url : str
             Worker url on which workers will attempt to connect back

        uid : str
             string unique identifier

        cores_per_worker : float
             cores to be assigned to each worker. Oversubscription is possible
             by setting cores_per_worker < 1.0. Default=1

        max_workers : int
             caps the maximum number of workers that can be launched.
             default: infinity

        heartbeat_threshold : int
             Seconds since the last message from the interchange after which the
             interchange is assumed to be un-available, and the manager initiates
             shutdown. Default:120s

             Number of seconds since the last message from the interchange after which
             the worker assumes that the interchange is lost and the manager shuts down.
             Default:120

        heartbeat_period : int
             Number of seconds after which a heartbeat message is sent to the
             interchange

        internal_worker_port_range : tuple(int, int)
             Port range from which the port(s) for the workers to connect to the manager
             is picked.
             Default: (50000,60000)

        worker_mode : str
             Pick between 3 supported modes for the worker:
              1. no_container : Worker launched without containers
              2. singularity_reuse : Worker launched inside a singularity container that
                                     will be reused
              3. singularity_single_use : Each worker and task runs inside a new
                                          container instance.

        container_cmd_options: str
              Container command strings to be added to associated container command.
              For example, singularity exec {container_cmd_options}

        scheduler_mode : str
             Pick between 2 supported modes for the manager:
              1. hard: the manager cannot change the launched container type
              2. soft: the manager can decide whether to launch different containers

        worker_type : str
             If set, the worker type for this manager is fixed. Default: None

        poll_period : int
             Timeout period used by the manager in milliseconds. Default: 10ms
        """
        log.info("Manager started")

        self.context = zmq.Context()
        self.task_incoming = self.context.socket(zmq.DEALER)
        self.task_incoming.setsockopt(zmq.IDENTITY, uid.encode("utf-8"))
        # Linger is set to 0, so that the manager can exit even when there might be
        # messages in the pipe
        self.task_incoming.setsockopt(zmq.LINGER, 0)
        self.task_incoming.connect(task_q_url)

        self.logdir = logdir
        self.debug = debug
        self.block_id = block_id
        self.result_outgoing = self.context.socket(zmq.DEALER)
        self.result_outgoing.setsockopt(zmq.IDENTITY, uid.encode("utf-8"))
        self.result_outgoing.setsockopt(zmq.LINGER, 0)
        self.result_outgoing.connect(result_q_url)

        log.info("Manager connected")

        self.uid = uid

        self.worker_mode = worker_mode
        self.container_cmd_options = container_cmd_options
        self.scheduler_mode = scheduler_mode
        self.worker_type = worker_type
        self.worker_max_idletime = worker_max_idletime
        self.cores_on_node = multiprocessing.cpu_count()
        self.max_workers = max_workers
        self.cores_per_workers = cores_per_worker
        self.available_mem_on_node = round(
            psutil.virtual_memory().available / (2**30), 1)
        self.max_worker_count = min(
            max_workers, math.floor(self.cores_on_node / cores_per_worker))
        self.worker_map = WorkerMap(self.max_worker_count)

        self.internal_worker_port_range = internal_worker_port_range

        self.funcx_task_socket = self.context.socket(zmq.ROUTER)
        self.funcx_task_socket.set_hwm(0)
        self.address = "127.0.0.1"
        self.worker_port = self.funcx_task_socket.bind_to_random_port(
            "tcp://*",
            min_port=self.internal_worker_port_range[0],
            max_port=self.internal_worker_port_range[1],
        )

        log.info(
            "Manager listening on {} port for incoming worker connections".
            format(self.worker_port))

        self.task_queues = {}
        if worker_type:
            self.task_queues[worker_type] = queue.Queue()
        self.outstanding_task_count = {}
        self.task_type_mapping = {}

        self.pending_result_queue = mpQueue()

        self.max_queue_size = max_queue_size + self.max_worker_count
        self.tasks_per_round = 1

        self.heartbeat_period = heartbeat_period
        self.heartbeat_threshold = heartbeat_threshold
        self.poll_period = poll_period
        self.serializer = FuncXSerializer()
        self.next_worker_q = []  # FIFO queue for spinning up workers.
        self.worker_procs = {}

        self.task_status_deltas = {}

        self._kill_event = threading.Event()
        self._result_pusher_thread = threading.Thread(
            target=self.push_results, args=(self._kill_event, ))
        self._status_report_thread = threading.Thread(
            target=self._status_report_loop, args=(self._kill_event, ))
        self.container_switch_count = 0

        self.poller = zmq.Poller()
        self.poller.register(self.task_incoming, zmq.POLLIN)
        self.poller.register(self.funcx_task_socket, zmq.POLLIN)
        self.task_worker_map = {}

        self.task_done_counter = 0
        self.task_finalization_lock = threading.Lock()