예제 #1
0
파일: executor.py 프로젝트: Parsl/parsl
    def __init__(self,
                 label: str = 'HighThroughputExecutor',
                 provider: ExecutionProvider = LocalProvider(),
                 launch_cmd: Optional[str] = None,
                 address: Optional[str] = None,
                 worker_ports: Optional[Tuple[int, int]] = None,
                 worker_port_range: Optional[Tuple[int, int]] = (54000, 55000),
                 interchange_port_range: Optional[Tuple[int, int]] = (55000, 56000),
                 storage_access: Optional[List[Staging]] = None,
                 working_dir: Optional[str] = None,
                 worker_debug: bool = False,
                 cores_per_worker: float = 1.0,
                 mem_per_worker: Optional[float] = None,
                 max_workers: Union[int, float] = float('inf'),
                 cpu_affinity: str = 'none',
                 available_accelerators: Union[int, Sequence[str]] = (),
                 prefetch_capacity: int = 0,
                 heartbeat_threshold: int = 120,
                 heartbeat_period: int = 30,
                 poll_period: int = 10,
                 address_probe_timeout: Optional[int] = None,
                 managed: bool = True,
                 worker_logdir_root: Optional[str] = None,
                 block_error_handler: bool = True):

        logger.debug("Initializing HighThroughputExecutor")

        BlockProviderExecutor.__init__(self, provider=provider, block_error_handler=block_error_handler)
        self.label = label
        self.launch_cmd = launch_cmd
        self.worker_debug = worker_debug
        self.storage_access = storage_access
        self.working_dir = working_dir
        self.managed = managed
        self.cores_per_worker = cores_per_worker
        self.mem_per_worker = mem_per_worker
        self.max_workers = max_workers
        self.prefetch_capacity = prefetch_capacity
        self.address = address
        self.address_probe_timeout = address_probe_timeout
        if self.address:
            self.all_addresses = address
        else:
            self.all_addresses = ','.join(get_all_addresses())

        mem_slots = max_workers
        cpu_slots = max_workers
        if hasattr(self.provider, 'mem_per_node') and \
                self.provider.mem_per_node is not None and \
                mem_per_worker is not None and \
                mem_per_worker > 0:
            mem_slots = math.floor(self.provider.mem_per_node / mem_per_worker)
        if hasattr(self.provider, 'cores_per_node') and \
                self.provider.cores_per_node is not None:
            cpu_slots = math.floor(self.provider.cores_per_node / cores_per_worker)

        # Set the list of available accelerators
        if isinstance(available_accelerators, int):
            # If the user provide an integer, create some names for them
            available_accelerators = list(map(str, range(available_accelerators)))
        self.available_accelerators = list(available_accelerators)

        # Determine the number of workers per node
        self._workers_per_node = min(max_workers, mem_slots, cpu_slots)
        if len(self.available_accelerators) > 0:
            self._workers_per_node = min(self._workers_per_node, len(available_accelerators))
        if self._workers_per_node == float('inf'):
            self._workers_per_node = 1  # our best guess-- we do not have any provider hints

        self._task_counter = 0
        self.run_id = None  # set to the correct run_id in dfk
        self.hub_address = None  # set to the correct hub address in dfk
        self.hub_port = None  # set to the correct hub port in dfk
        self.worker_ports = worker_ports
        self.worker_port_range = worker_port_range
        self.interchange_port_range = interchange_port_range
        self.heartbeat_threshold = heartbeat_threshold
        self.heartbeat_period = heartbeat_period
        self.poll_period = poll_period
        self.run_dir = '.'
        self.worker_logdir_root = worker_logdir_root
        self.cpu_affinity = cpu_affinity

        if not launch_cmd:
            self.launch_cmd = ("process_worker_pool.py {debug} {max_workers} "
                               "-a {addresses} "
                               "-p {prefetch_capacity} "
                               "-c {cores_per_worker} "
                               "-m {mem_per_worker} "
                               "--poll {poll_period} "
                               "--task_port={task_port} "
                               "--result_port={result_port} "
                               "--logdir={logdir} "
                               "--block_id={{block_id}} "
                               "--hb_period={heartbeat_period} "
                               "{address_probe_timeout_string} "
                               "--hb_threshold={heartbeat_threshold} "
                               "--cpu-affinity {cpu_affinity} "
                               "--available-accelerators {accelerators}")
예제 #2
0
    def __init__(self,
                 label: str = 'HighThroughputExecutor',
                 provider: ExecutionProvider = LocalProvider(),
                 launch_cmd: Optional[str] = None,
                 address: Optional[str] = None,
                 worker_ports: Optional[Tuple[int, int]] = None,
                 worker_port_range: Optional[Tuple[int, int]] = (54000, 55000),
                 interchange_port_range: Optional[Tuple[int,
                                                        int]] = (55000, 56000),
                 storage_access: Optional[List[Staging]] = None,
                 working_dir: Optional[str] = None,
                 worker_debug: bool = False,
                 cores_per_worker: float = 1.0,
                 mem_per_worker: Optional[float] = None,
                 max_workers: Union[int, float] = float('inf'),
                 cpu_affinity: str = 'none',
                 prefetch_capacity: int = 0,
                 heartbeat_threshold: int = 120,
                 heartbeat_period: int = 30,
                 poll_period: int = 10,
                 address_probe_timeout: Optional[int] = None,
                 managed: bool = True,
                 worker_logdir_root: Optional[str] = None):

        logger.debug("Initializing HighThroughputExecutor")

        StatusHandlingExecutor.__init__(self, provider)
        self.label = label
        self.launch_cmd = launch_cmd
        self.worker_debug = worker_debug
        self.storage_access = storage_access
        self.working_dir = working_dir
        self.managed = managed
        self.blocks = {}  # type: Dict[str, str]
        self.cores_per_worker = cores_per_worker
        self.mem_per_worker = mem_per_worker
        self.max_workers = max_workers
        self.prefetch_capacity = prefetch_capacity
        self.address = address
        self.address_probe_timeout = address_probe_timeout
        if self.address:
            self.all_addresses = address
        else:
            self.all_addresses = ','.join(get_all_addresses())

        mem_slots = max_workers
        cpu_slots = max_workers
        if hasattr(self.provider, 'mem_per_node') and \
                self.provider.mem_per_node is not None and \
                mem_per_worker is not None and \
                mem_per_worker > 0:
            mem_slots = math.floor(self.provider.mem_per_node / mem_per_worker)
        if hasattr(self.provider, 'cores_per_node') and \
                self.provider.cores_per_node is not None:
            cpu_slots = math.floor(self.provider.cores_per_node /
                                   cores_per_worker)

        self.workers_per_node = min(max_workers, mem_slots, cpu_slots)
        if self.workers_per_node == float('inf'):
            self.workers_per_node = 1  # our best guess-- we do not have any provider hints

        self._task_counter = 0
        self.hub_address = None  # set to the correct hub address in dfk
        self.hub_port = None  # set to the correct hub port in dfk
        self.worker_ports = worker_ports
        self.worker_port_range = worker_port_range
        self.interchange_port_range = interchange_port_range
        self.heartbeat_threshold = heartbeat_threshold
        self.heartbeat_period = heartbeat_period
        self.poll_period = poll_period
        self.run_dir = '.'
        self.worker_logdir_root = worker_logdir_root
        self.cpu_affinity = cpu_affinity

        if not launch_cmd:
            self.launch_cmd = ("process_worker_pool.py {debug} {max_workers} "
                               "-a {addresses} "
                               "-p {prefetch_capacity} "
                               "-c {cores_per_worker} "
                               "-m {mem_per_worker} "
                               "--poll {poll_period} "
                               "--task_port={task_port} "
                               "--result_port={result_port} "
                               "--logdir={logdir} "
                               "--block_id={{block_id}} "
                               "--hb_period={heartbeat_period} "
                               "{address_probe_timeout_string} "
                               "--hb_threshold={heartbeat_threshold} "
                               "--cpu-affinity {cpu_affinity} ")
예제 #3
0
파일: probe.py 프로젝트: xinyixiang/parsl
        # Linger is set to 0, so that the manager can exit even when there might be
        # messages in the pipe
        self.task_incoming.setsockopt(zmq.LINGER, 0)

        address = probe_addresses(addresses, port)
        print("Viable address :", address)
        self.task_incoming.connect("tcp://{}:{}".format(address, port))
        print("Here")

    def heartbeat(self):
        """ Send heartbeat to the incoming task queue
        """
        HEARTBEAT_CODE = (2**32) - 1
        heartbeat = (HEARTBEAT_CODE).to_bytes(4, "little")
        r = self.task_incoming.send(heartbeat)
        print("Return from heartbeat: {}".format(r))


if __name__ == "__main__":

    parser = argparse.ArgumentParser()
    parser.add_argument("-p",
                        "--port",
                        required=True,
                        help="Port to connect to")

    args = parser.parse_args()
    addresses = get_all_addresses()
    worker = TestWorker(addresses, args.port)
    worker.heartbeat()