def partition_by(self, partitioner_func=default_hash, num_of_partitions=-1): """ Create partitions by a Partition Func. :param partitioner_func: A Function distribute data on partitions based on For example, hash function. :param num_of_partitions: number of partitions to be created :return: >>> dds = DDS().load(range(6)).map(lambda x: (x, x)) >>> dds.partition_by(num_of_partitions=3).collect(True) [[(0, 0), (3, 3)], [(1, 1), (4, 4)], [(2, 2), (5, 5)]] """ def combine_lists(_partition): # Elements of the partition are grouped by their previous partitions ret = list() for _li in _partition: ret.extend(_li) return ret nop = len(self.partitions) if num_of_partitions == -1 \ else num_of_partitions grouped = defaultdict(list) if self.paac: for collection in self.partitions: col = [[] for _ in range(nop)] with event_master(3002): distribute_partition(col, self.func, partitioner_func, None, collection) cdo(collection) for _i in range(nop): grouped[_i].append(col[_i]) else: for _part in self.partitions: col = [[] for _ in range(nop)] with event_master(3002): distribute_partition(col, self.func, partitioner_func, _part) for _i in range(nop): grouped[_i].append(col[_i]) future_partitions = list() for key in sorted(grouped.keys()): future_partitions.append(grouped[key]) return DDS().load(future_partitions, -1, True)\ .map_partitions(combine_lists)
def start_runtime(log_level="off", tracing=0, interactive=False): # type: (str, int, bool) -> None """ Starts the COMPSs runtime. Starts the runtime by calling the external python library that calls the bindings-common. :param log_level: Log level [ "trace" | "debug" | "info" | "api" | "off" ]. :param tracing: Tracing level [0 (deactivated) | 1 (basic) | 2 (advanced)]. :param interactive: Boolean if interactive (ipython or jupyter). :return: None """ if __debug__: logger.info("Starting COMPSs...") if tracing > 0 and not interactive: # Enabled only if not interactive - extrae issues within jupyter. enable_trace_master() with event_master(START_RUNTIME_EVENT): if interactive and context.in_master(): COMPSs.load_runtime(external_process=True) else: COMPSs.load_runtime(external_process=False) if log_level == "trace": # Could also be "debug" or True, but we only show the C extension # debug in the maximum tracing level. COMPSs.set_debug(True) OT.enable_report() COMPSs.start_runtime() if __debug__: logger.info("COMPSs started")
def delete_object(obj): # type: (object) -> bool """ Remove object. Removes a used object from the internal structures and calls the external python library (that calls the bindings-common) in order to request a its corresponding file removal. :param obj: Object to remove. :return: True if success. False otherwise. """ with event_master(DELETE_OBJECT_EVENT): app_id = 0 obj_id = OT.is_tracked(obj) if obj_id is None: # Not being tracked return False else: try: file_name = OT.get_file_name(obj_id) COMPSs.delete_file(app_id, file_name, False) OT.stop_tracking(obj) except KeyError: pass return True
def master_stop_storage(logger): # type: (typing.Any) -> None """ Stops the persistent storage. This function emits the event in the master. :param logger: Logger where to log the messages. :return: None """ with event_master(MASTER_STOP_STORAGE_EVENT): __stop_storage__(logger)
def master_init_storage(storage_conf, logger): # noqa # type: (str, typing.Any) -> bool """ Call to init storage from the master. This function emits the event in the master. :param storage_conf: Storage configuration file. :param logger: Logger where to log the messages. :return: True if initialized. False on the contrary. """ with event_master(MASTER_INIT_STORAGE_EVENT): return __init_storage__(storage_conf, logger)
def close_task_group(group_name): # type: (str) -> None """ Close task group. Calls the external python library (that calls the bindings-common) in order to request a group closure. :param group_name: Group name. :return: None """ with event_master(CLOSE_TASK_GROUP_EVENT): app_id = 0 COMPSs.close_task_group(group_name, app_id)
def stop_runtime(code=0, hard_stop=False): # type: (int, bool) -> None """ Stops the COMPSs runtime. Stops the runtime by calling the external python library that calls the bindings-common. Also cleans objects and temporary files created during runtime. If the code is different from 0, all running or waiting tasks will be cancelled. :parameter code: Stop code (if code != 0 ==> cancel application tasks). :param hard_stop: Stop compss when runtime has died. :return: None """ with event_master(STOP_RUNTIME_EVENT): app_id = 0 if __debug__: logger.info("Stopping runtime...") # Stopping a possible wall clock limit signal.alarm(0) if code != 0: if __debug__: logger.info("Canceling all application tasks...") COMPSs.cancel_application_tasks(app_id, 0) if __debug__: logger.info("Cleaning objects...") _clean_objects(hard_stop=hard_stop) if __debug__: reporting = OT.is_report_enabled() if reporting: logger.info("Generating Object tracker report...") target_path = get_log_path() OT.generate_report(target_path) OT.clean_report() if __debug__: logger.info("Stopping COMPSs...") COMPSs.stop_runtime(code) if __debug__: logger.info("Cleaning temps...") _clean_temps() context.set_pycompss_context(context.OUT_OF_SCOPE) if __debug__: logger.info("COMPSs stopped")
def open_task_group(group_name, implicit_barrier): # type: (str, bool) -> None """ Open task group. Calls the external python library (that calls the bindings-common) in order to request an opening of a group. :param group_name: Group name. :param implicit_barrier: Perform a wait on all group tasks before closing. :return: None """ with event_master(OPEN_TASK_GROUP_EVENT): app_id = 0 COMPSs.open_task_group(group_name, implicit_barrier, app_id)
def barrier_group(group_name): # type: (str) -> str """ Wait for all tasks of the given group. Calls the external python library (that calls the bindings-common) in order to request a barrier of a group. :param group_name: Group name. :return: None or string with exception message. """ with event_master(BARRIER_GROUP_EVENT): app_id = 0 # Call the Runtime group barrier return str(COMPSs.barrier_group(app_id, group_name))
def get_file(file_name): # type: (str) -> None """ Retrieve a file. Calls the external python library (that calls the bindings-common) in order to request last version of file. :param file_name: File name to remove. :return: None """ with event_master(GET_FILE_EVENT): app_id = 0 if __debug__: logger.debug("Getting file %s" % file_name) COMPSs.get_file(app_id, file_name)
def get_directory(dir_name): # type: (str) -> None """ Retrieve a directory. Calls the external python library (that calls the bindings-common) in order to request last version of file. :param dir_name: dir name to retrieve. :return: None """ with event_master(GET_DIRECTORY_EVENT): app_id = 0 if __debug__: logger.debug("Getting directory %s" % dir_name) COMPSs.get_directory(app_id, dir_name)
def get_log_path(): # type: () -> str """ Get logging path. Requests the logging path to the external python library (that calls the bindings-common). :return: The path where to store the logs. """ with event_master(GET_LOG_PATH_EVENT): if __debug__: logger.debug("Requesting log path") log_path = COMPSs.get_logging_path() if __debug__: logger.debug("Log path received: %s" % log_path) return log_path
def get_number_of_resources(): # type: () -> int """ Get the number of resources. Calls the external python library (that calls the bindings-common) in order to request for the number of active resources. :return: Number of active resources. """ with event_master(GET_NUMBER_RESOURCES_EVENT): app_id = 0 if __debug__: logger.debug("Request the number of active resources") # Call the Runtime return COMPSs.get_number_of_resources(app_id)
def accessed_file(file_name): # type: (str) -> bool """ Check if the file has been accessed. Calls the external python library (that calls the bindings-common) in order to check if a file has been accessed. :param file_name: <String> File name. :return: True if accessed, False otherwise. """ with event_master(ACCESSED_FILE_EVENT): app_id = 0 if __debug__: logger.debug("Checking if file %s has been accessed." % file_name) if os.path.exists(file_name): return True else: return COMPSs.accessed_file(app_id, file_name)
def set_wall_clock(wall_clock_limit): # type: (int) -> None """ Sets the application wall clock limit. :param wall_clock_limit: Wall clock limit in seconds. :return: None """ with event_master(WALL_CLOCK_LIMIT_EVENT): app_id = 0 if __debug__: logger.debug("Set a wall clock limit of " + str(wall_clock_limit)) # Activate wall clock limit alarm signal.signal(signal.SIGALRM, _wall_clock_exceed) signal.alarm(wall_clock_limit) # Call the Runtime to set a timer in case wall clock is reached in a synch COMPSs.set_wall_clock(app_id, wall_clock_limit)
def barrier(no_more_tasks=False): # type: (bool) -> None """ Wait for all submitted tasks. Calls the external python library (that calls the bindings-common) in order to request a barrier. :param no_more_tasks: If no more tasks are going to be submitted, remove all objects. :return: None """ with event_master(BARRIER_EVENT): if __debug__: logger.debug("Barrier. No more tasks? %s" % str(no_more_tasks)) # If noMoreFlags is set, clean up the objects if no_more_tasks: _clean_objects() app_id = 0 # Call the Runtime barrier (appId 0, not needed for the signature) COMPSs.barrier(app_id, no_more_tasks)
def delete_file(file_name): # type: (str) -> bool """ Remove a file. Calls the external python library (that calls the bindings-common) in order to request a file removal. :param file_name: File name to remove. :return: True if success. False otherwise. """ with event_master(DELETE_FILE_EVENT): app_id = 0 if __debug__: logger.debug("Deleting file %s" % file_name) result = COMPSs.delete_file(app_id, file_name, True) == "true" if __debug__: if result: logger.debug("File %s successfully deleted." % file_name) else: logger.error("Failed to remove file %s." % file_name) return result
def open_file(file_name, mode): # type: (str, str) -> str """ Opens a file (retrieves if necessary). Calls the external python library (that calls the bindings-common) in order to request a file. :param file_name: <String> File name. :param mode: Open file mode ('r', 'rw', etc.). :return: The current name of the file requested (that may have been renamed during runtime). """ with event_master(OPEN_FILE_EVENT): app_id = 0 compss_mode = get_compss_direction(mode) if __debug__: logger.debug("Getting file %s with mode %s" % (file_name, compss_mode)) compss_name = COMPSs.open_file(app_id, file_name, compss_mode) if __debug__: logger.debug("COMPSs file name is %s" % compss_name) return compss_name
def free_resources(num_resources, group_name): # type: (int, str) -> None """ Liberate resources. Calls the external python library (that calls the bindings-common) in order to request for the destruction of the given resources. :param num_resources: Number of resources to destroy. :param group_name: Task group to notify upon resource creation. :return: None """ with event_master(FREE_RESOURCES_EVENT): app_id = 0 if group_name is None: group_name = "NULL" if __debug__: logger.debug("Request the destruction of " + str(num_resources) + " resources with notification to task group " + str(group_name)) # Call the Runtime COMPSs.free_resources(app_id, num_resources, group_name)
def wait_on(*args, **kwargs): # type: (*typing.Any, **typing.Any) -> typing.Any """ Wait on a set of objects. Waits on a set of objects defined in args with the options defined in kwargs. :param args: Objects to wait on. :param kwargs: Options: Write enable? [True | False] Default = True. May include: master_event: Emit master event. [Default: True | False] False will emit the event inside task (for nested). :return: Real value of the objects requested. """ master_event = True if "master_event" in kwargs: master_event = kwargs["master_event"] if master_event: with event_master(WAIT_ON_EVENT): return __wait_on__(*args, **kwargs) else: with event_inside_worker(WAIT_ON_EVENT): return __wait_on__(*args, **kwargs)
def nested_barrier(): # type: () -> None """ Wait for all submitted tasks within nested task. Calls the external python library (that calls the bindings-common) in order to request a barrier. CAUTION: When using agents (nesting), we can not remove all object tracker objects as with normal barrier (and no_more_tasks==True), nor leave all objects with (no_more_tasks==False). In this case, it is necessary to perform a smart object tracker cleanup (remove in, but not inout nor out). :return: None """ with event_master(BARRIER_EVENT): if __debug__: logger.debug("Nested Barrier.") _clean_objects() # Call the Runtime barrier (appId 0 -- not needed for the signature, and # no_more_tasks == True) COMPSs.barrier(0, True)
def register_ce(core_element): # noqa # type: (CE) -> None """ Register a core element. Calls the external python library (that calls the bindings-common) in order to notify the runtime about a core element that needs to be registered. Java Examples: // METHOD System.out.println('Registering METHOD implementation'); String core_elementSignature = 'methodClass.methodName'; String impl_signature = 'methodClass.methodName'; String impl_constraints = 'ComputingUnits:2'; String impl_type = 'METHOD'; String[] impl_type_args = new String[] { 'methodClass', 'methodName' }; rt.registerCoreElement(coreElementSignature, impl_signature, impl_constraints, impl_type, impl_type_args); # noqa: E501 // MPI System.out.println('Registering MPI implementation'); core_elementSignature = 'methodClass1.methodName1'; impl_signature = 'mpi.MPI'; impl_constraints = 'StorageType:SSD'; impl_type = 'MPI'; impl_type_args = new String[] { 'mpiBinary', 'mpiWorkingDir', 'mpiRunner' }; # noqa: E501 rt.registerCoreElement(coreElementSignature, impl_signature, impl_constraints, impl_type, impl_type_args); # noqa: E501 // PYTHON MPI System.out.println('Registering PYTHON MPI implementation'); core_elementSignature = 'methodClass1.methodName1'; impl_signature = 'MPI.methodClass1.methodName'; impl_constraints = 'ComputingUnits:2'; impl_type = 'PYTHON_MPI'; impl_type_args = new String[] { 'methodClass', 'methodName', 'mpiWorkingDir', 'mpiRunner' }; # noqa: E501 rt.registerCoreElement(coreElementSignature, impl_signature, impl_constraints, impl_type, impl_type_args); # noqa: E501 // BINARY System.out.println('Registering BINARY implementation'); core_elementSignature = 'methodClass2.methodName2'; impl_signature = 'binary.BINARY'; impl_constraints = 'MemoryType:RAM'; impl_type = 'BINARY'; impl_type_args = new String[] { 'binary', 'binaryWorkingDir' }; rt.registerCoreElement(coreElementSignature, impl_signature, impl_constraints, impl_type, impl_type_args); # noqa: E501 // OMPSS System.out.println('Registering OMPSS implementation'); core_elementSignature = 'methodClass3.methodName3'; impl_signature = 'ompss.OMPSS'; impl_constraints = 'ComputingUnits:3'; impl_type = 'OMPSS'; impl_type_args = new String[] { 'ompssBinary', 'ompssWorkingDir' }; rt.registerCoreElement(coreElementSignature, impl_signature, impl_constraints, impl_type, impl_type_args); # noqa: E501 // OPENCL System.out.println('Registering OPENCL implementation'); core_elementSignature = 'methodClass4.methodName4'; impl_signature = 'opencl.OPENCL'; impl_constraints = 'ComputingUnits:4'; impl_type = 'OPENCL'; impl_type_args = new String[] { 'openclKernel', 'openclWorkingDir' }; rt.registerCoreElement(coreElementSignature, impl_signature, impl_constraints, impl_type, impl_type_args); # noqa: E501 // VERSIONING System.out.println('Registering METHOD implementation'); core_elementSignature = 'methodClass.methodName'; impl_signature = 'anotherClass.anotherMethodName'; impl_constraints = 'ComputingUnits:1'; impl_type = 'METHOD'; impl_type_args = new String[] { 'anotherClass', 'anotherMethodName' }; rt.registerCoreElement(coreElementSignature, impl_signature, impl_constraints, impl_type, impl_type_args); # noqa: E501 --------------------- Core Element fields: ce_signature: <String> Core Element signature (e.g.- "methodClass.methodName") # noqa: E501 impl_signature: <String> Implementation signature (e.g.- "methodClass.methodName") # noqa: E501 impl_constraints: <Dict> Implementation constraints (e.g.- "{ComputingUnits:2}") # noqa: E501 impl_type: <String> Implementation type ("METHOD" | "MPI" | "BINARY" | "OMPSS" | "OPENCL") # noqa: E501 impl_io: <String> IO Implementation impl_type_args: <List(Strings)> Implementation arguments (e.g.- ["methodClass", "methodName"]) # noqa: E501 :param core_element: <CE> Core Element to register. :return: None """ with event_master(REGISTER_CORE_ELEMENT_EVENT): # Retrieve Core element fields ce_signature = core_element.get_ce_signature() impl_signature_base = core_element.get_impl_signature() impl_signature = None if impl_signature_base == "" else impl_signature_base impl_constraints_base = core_element.get_impl_constraints() impl_constraints = None # type: typing.Any if impl_constraints_base == "": impl_constraints = dict() else: impl_constraints = impl_constraints_base impl_type_base = core_element.get_impl_type() impl_type = None if impl_type_base == "" else str(impl_type_base) impl_io = str(core_element.get_impl_io()) impl_type_args = core_element.get_impl_type_args() if __debug__: logger.debug("Registering CE with signature: %s" % ce_signature) logger.debug("\t - Implementation signature: %s" % impl_signature) # Build constraints string from constraints dictionary impl_constraints_lst = [] for key, value in impl_constraints.items(): if isinstance(value, int): val = str(value) elif isinstance(value, str): val = value elif isinstance(value, list): val = str(value).replace('\'', '') else: raise PyCOMPSsException( "Implementation constraints items must be str, int or list." ) kv_constraint = "".join((key, ':', str(val), ';')) impl_constraints_lst.append(kv_constraint) impl_constraints_str = "".join(impl_constraints_lst) if __debug__: logger.debug("\t - Implementation constraints: %s" % impl_constraints_str) logger.debug("\t - Implementation type: %s" % impl_type) logger.debug("\t - Implementation type arguments: %s" % " ".join(impl_type_args)) # Call runtime with the appropriate parameters COMPSs.register_core_element(ce_signature, impl_signature, impl_constraints_str, impl_type, impl_io, impl_type_args) if __debug__: logger.debug("CE with signature %s registered." % ce_signature)
def __decorator_body__(self, user_function, args, kwargs): # type: (typing.Callable, tuple, dict) -> typing.Any # Determine the context and decide what to do if context.in_master(): # @task being executed in the master # Each task will have a TaskMaster, so its content will # not be shared. self.__check_core_element__(kwargs, user_function) with event_master(TASK_INSTANTIATION): master = TaskMaster(self.decorator_arguments, self.user_function, self.core_element, self.registered, self.signature, self.interactive, self.module, self.function_arguments, self.function_name, self.module_name, self.function_type, self.class_name, self.hints, self.on_failure, self.defaults) result = master.call(args, kwargs) fo, self.core_element, self.registered, self.signature, self.interactive, self.module, self.function_arguments, self.function_name, self.module_name, self.function_type, self.class_name, self.hints = result # noqa: E501 del master return fo elif context.in_worker(): if "compss_key" in kwargs.keys(): if context.is_nesting_enabled(): # Update the whole logger since it will be in job out/err update_logger_handlers(kwargs["compss_log_cfg"], kwargs["compss_log_files"][0], kwargs["compss_log_files"][1]) # @task being executed in the worker with event_inside_worker(WORKER_TASK_INSTANTIATION): worker = TaskWorker(self.decorator_arguments, self.user_function, self.on_failure, self.defaults) result = worker.call(*args, **kwargs) # Force flush stdout and stderr sys.stdout.flush() sys.stderr.flush() # Remove worker del worker if context.is_nesting_enabled(): # Wait for all nested tasks to finish from pycompss.runtime.binding import nested_barrier nested_barrier() # Reestablish logger handlers update_logger_handlers(kwargs["compss_log_cfg"]) return result else: if context.is_nesting_enabled(): # Each task will have a TaskMaster, so its content will # not be shared. with event_master(TASK_INSTANTIATION): master = TaskMaster( self.decorator_arguments, self.user_function, self.core_element, self.registered, self.signature, self.interactive, self.module, self.function_arguments, self.function_name, self.module_name, self.function_type, self.class_name, self.hints, self.on_failure, self.defaults) result = master.call(args, kwargs) fo, self.core_element, self.registered, self.signature, self.interactive, self.module, self.function_arguments, self.function_name, self.module_name, self.function_type, self.class_name, self.hints = result # noqa: E501 del master return fo else: # Called from another task within the worker # Ignore the @task decorator and run it sequentially message = "".join( ("WARNING: Calling task: ", str(user_function.__name__), " from this task.\n", " It will be executed ", "sequentially within the caller task.")) print(message, file=sys.stderr) return self._sequential_call(*args, **kwargs) # We are neither in master nor in the worker, or the user has # stopped the interactive session. # Therefore, the user code is being executed with no # launch_compss/enqueue_compss/runcompss/interactive session return self._sequential_call(*args, **kwargs)
def launch_pycompss_application( app, # type: str func, # type: typing.Optional[str] log_level="off", # type: str o_c=False, # type: bool debug=False, # type: bool graph=False, # type: bool trace=False, # type: bool monitor=-1, # type: int project_xml="", # type: str resources_xml="", # type: str summary=False, # type: bool task_execution="compss", # type: str storage_impl="", # type: str storage_conf="", # type: str streaming_backend="", # type: str streaming_master_name="", # type: str streaming_master_port="", # type: str task_count=50, # type: int app_name="", # type: str uuid="", # type: str base_log_dir="", # type: str specific_log_dir="", # type: str extrae_cfg="", # type: str comm="NIO", # type: str conn=DEFAULT_CONN, # type: str master_name="", # type: str master_port="", # type: str scheduler=DEFAULT_SCHED, # type: str jvm_workers=DEFAULT_JVM_WORKERS, # type: str cpu_affinity="automatic", # type: str gpu_affinity="automatic", # type: str fpga_affinity="automatic", # type: str fpga_reprogram="", # type: str profile_input="", # type: str profile_output="", # type: str scheduler_config="", # type: str external_adaptation=False, # type: bool propagate_virtual_environment=True, # type: bool mpi_worker=False, # type: bool worker_cache=False, # type: typing.Union[bool, str] shutdown_in_node_failure=False, # type: bool io_executors=0, # type: int env_script="", # type: str reuse_on_block=True, # type: bool nested_enabled=False, # type: bool tracing_task_dependencies=False, # type: bool trace_label="", # type: str extrae_cfg_python="", # type: str wcl=0, # type: int cache_profiler=False, # type: bool *args, **kwargs): # NOSONAR # type: (...) -> typing.Any """ Launch PyCOMPSs application from function. :param app: Application path :param func: Function :param log_level: Logging level [ "trace"|"debug"|"info"|"api"|"off" ] (default: "off") :param o_c: Objects to string conversion [ True | False ] (default: False) :param debug: Debug mode [ True | False ] (default: False) (overrides log_level) :param graph: Generate graph [ True | False ] (default: False) :param trace: Generate trace [ True | False | "scorep" | "arm-map" | "arm-ddt"] (default: False) :param monitor: Monitor refresh rate (default: None) :param project_xml: Project xml file path :param resources_xml: Resources xml file path :param summary: Execution summary [ True | False ] (default: False) :param task_execution: Task execution (default: "compss") :param storage_impl: Storage implementation path :param storage_conf: Storage configuration file path :param streaming_backend: Streaming backend (default: None) :param streaming_master_name: Streaming master name (default: None) :param streaming_master_port: Streaming master port (default: None) :param task_count: Task count (default: 50) :param app_name: Application name (default: Interactive_date) :param uuid: UUId :param base_log_dir: Base logging directory :param specific_log_dir: Specific logging directory :param extrae_cfg: Extrae configuration file path :param comm: Communication library (default: NIO) :param conn: Connector (default: DefaultSSHConnector) :param master_name: Master Name (default: "") :param master_port: Master port (default: "") :param scheduler: Scheduler (default: es.bsc.compss.scheduler.loadbalancing.LoadBalancingScheduler) :param jvm_workers: Java VM parameters (default: "-Xms1024m,-Xmx1024m,-Xmn400m") :param cpu_affinity: CPU Core affinity (default: "automatic") :param gpu_affinity: GPU Core affinity (default: "automatic") :param fpga_affinity: FPA Core affinity (default: "automatic") :param fpga_reprogram: FPGA reprogram command (default: "") :param profile_input: Input profile (default: "") :param profile_output: Output profile (default: "") :param scheduler_config: Scheduler configuration (default: "") :param external_adaptation: External adaptation [ True | False ] (default: False) :param propagate_virtual_environment: Propagate virtual environment [ True | False ] (default: False) :param mpi_worker: Use the MPI worker [ True | False ] (default: False) :param worker_cache: Use the worker cache [ True | int(size) | False] (default: False) :param shutdown_in_node_failure: Shutdown in node failure [ True | False] (default: False) :param io_executors: <Integer> Number of IO executors :param env_script: <String> Environment script to be sourced in workers :param reuse_on_block: Reuse on block [ True | False] (default: True) :param nested_enabled: Nested enabled [ True | False] (default: False) :param tracing_task_dependencies: Include task dependencies in trace [ True | False] (default: False) :param trace_label: <String> Add trace label :param extrae_cfg_python: <String> Extrae configuration file for the workers :param wcl: <Integer> Wallclock limit. Stops the runtime if reached. 0 means forever. :param cache_profiler: Use the cache profiler [ True | False] (default: False) :param args: Positional arguments :param kwargs: Named arguments :return: Execution result """ # Check that COMPSs is available if "COMPSS_HOME" not in os.environ: # Do not allow to continue if COMPSS_HOME is not defined raise PyCOMPSsException( "ERROR: COMPSS_HOME is not defined in the environment") # Let the Python binding know we are at master context.set_pycompss_context(context.MASTER) # Then we can import the appropriate start and stop functions from the API from pycompss.api.api import compss_start, compss_stop ############################################################## # INITIALIZATION ############################################################## if debug: log_level = "debug" # Initial dictionary with the user defined parameters all_vars = parameters_to_dict( log_level, debug, o_c, graph, trace, monitor, project_xml, resources_xml, summary, task_execution, storage_impl, storage_conf, streaming_backend, streaming_master_name, streaming_master_port, task_count, app_name, uuid, base_log_dir, specific_log_dir, extrae_cfg, comm, conn, master_name, master_port, scheduler, jvm_workers, cpu_affinity, gpu_affinity, fpga_affinity, fpga_reprogram, profile_input, profile_output, scheduler_config, external_adaptation, propagate_virtual_environment, mpi_worker, worker_cache, shutdown_in_node_failure, io_executors, env_script, reuse_on_block, nested_enabled, tracing_task_dependencies, trace_label, extrae_cfg_python, wcl, cache_profiler) # Save all vars in global current flags so that events.py can restart # the notebook with the same flags export_current_flags(all_vars) # Check the provided flags flags, issues = check_flags(all_vars) if not flags: print_flag_issues(issues) return None # Prepare the environment env_vars = prepare_environment(False, o_c, storage_impl, app, debug, trace, mpi_worker) all_vars.update(env_vars) monitoring_vars = prepare_loglevel_graph_for_monitoring( monitor, graph, debug, log_level) all_vars.update(monitoring_vars) if RUNNING_IN_SUPERCOMPUTER: updated_vars = updated_variables_in_sc() all_vars.update(updated_vars) to_update = prepare_tracing_environment(all_vars["trace"], all_vars["extrae_lib"], all_vars["ld_library_path"]) all_vars["trace"], all_vars["ld_library_path"] = to_update inf_vars = check_infrastructure_variables(all_vars["project_xml"], all_vars["resources_xml"], all_vars["compss_home"], all_vars["app_name"], all_vars["file_name"], all_vars["external_adaptation"]) all_vars.update(inf_vars) create_init_config_file(**all_vars) ############################################################## # RUNTIME START ############################################################## # Runtime start compss_start(log_level, all_vars["trace"], True) # Setup logging binding_log_path = get_log_path() log_path = os.path.join(all_vars["compss_home"], "Bindings", "python", str(all_vars["major_version"]), "log") set_temporary_directory(binding_log_path) logging_cfg_file = get_logging_cfg_file(log_level) init_logging(os.path.join(log_path, logging_cfg_file), binding_log_path) logger = logging.getLogger("pycompss.runtime.launch") logger.debug("--- START ---") logger.debug("PyCOMPSs Log path: %s" % log_path) if storage_impl and storage_conf: logger.debug("Starting storage") persistent_storage = master_init_storage(all_vars["storage_conf"], logger) else: persistent_storage = False logger.debug("Starting streaming") streaming = init_streaming(all_vars["streaming_backend"], all_vars["streaming_master_name"], all_vars["streaming_master_port"]) saved_argv = sys.argv sys.argv = list(args) # Execution: with event_master(APPLICATION_RUNNING_EVENT): if func is None or func == "__main__": if IS_PYTHON3: exec(open(app).read()) else: execfile(app) # type: ignore result = None else: if IS_PYTHON3: from importlib.machinery import SourceFileLoader # noqa imported_module = SourceFileLoader( all_vars["file_name"], app).load_module() # type: ignore else: import imp # noqa imported_module = imp.load_source(all_vars["file_name"], app) # noqa method_to_call = getattr(imported_module, func) try: result = method_to_call(*args, **kwargs) except TypeError: result = method_to_call() # Recover the system arguments sys.argv = saved_argv # Stop streaming if streaming: stop_streaming() # Stop persistent storage if persistent_storage: master_stop_storage(logger) logger.debug("--- END ---") ############################################################## # RUNTIME STOP ############################################################## # Stop runtime compss_stop() clean_log_configs() return result
def compss_main(): # type: () -> None """ PyCOMPSs main function. General call: python $PYCOMPSS_HOME/pycompss/runtime/launch.py $wall_clock $log_level $PyObject_serialize $storage_conf $streaming_backend $streaming_master_name $streaming_master_port $fullAppPath $application_args :return: None """ global APP_PATH global STREAMING global PERSISTENT_STORAGE global LOGGER # Let the Python binding know we are at master context.set_pycompss_context(context.MASTER) # Then we can import the appropriate start and stop functions from the API from pycompss.api.api import compss_start # noqa from pycompss.api.api import compss_stop # noqa from pycompss.api.api import compss_set_wall_clock # noqa # See parse_arguments, defined above # In order to avoid parsing user arguments, we are going to remove user # args from sys.argv user_sys_argv = sys.argv[10:] sys.argv = sys.argv[:10] args = parse_arguments() # We are done, now sys.argv must contain user args only sys.argv = [args.app_path] + user_sys_argv # Get log_level log_level = args.log_level # Setup tracing tracing = int(args.tracing) # Get storage configuration at master storage_conf = args.storage_configuration # Load user imports before starting the runtime (can be avoided if # ENVIRONMENT_VARIABLE_LOAD -- defined in configuration.py -- # is set to false). # Reason: some cases like autoparallel can require to avoid loading. # It is disabled if using storage (with dataClay this can not be done) if preload_user_code() and not use_storage(storage_conf): with context.loading_context(): __load_user_module__(args.app_path, log_level) # Start the runtime compss_start(log_level, tracing, False) # Register @implements core elements (they can not be registered in # __load_user__module__). __register_implementation_core_elements__() # Get application wall clock limit wall_clock = int(args.wall_clock) if wall_clock > 0: compss_set_wall_clock(wall_clock) # Get object_conversion boolean set_object_conversion(args.object_conversion == "true") # Get application execution path APP_PATH = args.app_path # Setup logging binding_log_path = get_log_path() log_path = os.path.join(str(os.getenv("COMPSS_HOME")), "Bindings", "python", str(PYTHON_VERSION), "log") set_temporary_directory(binding_log_path) logging_cfg_file = get_logging_cfg_file(log_level) init_logging(os.path.join(log_path, logging_cfg_file), binding_log_path) LOGGER = logging.getLogger("pycompss.runtime.launch") # Get JVM options # jvm_opts = os.environ["JVM_OPTIONS_FILE"] # from pycompss.util.jvm.parser import convert_to_dict # opts = convert_to_dict(jvm_opts) # storage_conf = opts.get("-Dcompss.storage.conf") exit_code = 0 try: if __debug__: LOGGER.debug("--- START ---") LOGGER.debug("PyCOMPSs Log path: %s" % binding_log_path) # Start persistent storage PERSISTENT_STORAGE = master_init_storage(storage_conf, LOGGER) # Start STREAMING STREAMING = init_streaming(args.streaming_backend, args.streaming_master_name, args.streaming_master_port) # Show module warnings if __debug__: show_optional_module_warnings() # MAIN EXECUTION with event_master(APPLICATION_RUNNING_EVENT): # MAIN EXECUTION if IS_PYTHON3: with open(APP_PATH) as f: exec(compile(f.read(), APP_PATH, "exec"), globals()) else: execfile(APP_PATH, globals()) # type: ignore # End if __debug__: LOGGER.debug("--- END ---") except SystemExit as e: # NOSONAR - reraising would not allow to stop the runtime gracefully. if e.code != 0: print("[ ERROR ]: User program ended with exitcode %s." % e.code) print("\t\tShutting down runtime...") exit_code = e.code except SerializerException: exit_code = 1 # If an object that can not be serialized has been used as a parameter. print("[ ERROR ]: Serialization exception") exc_type, exc_value, exc_traceback = sys.exc_info() lines = traceback.format_exception(exc_type, exc_value, exc_traceback) for line in lines: if APP_PATH in line: print("[ ERROR ]: In: %s", line) exit_code = 1 except COMPSsException as e: # Any other exception occurred print("[ ERROR ]: A COMPSs exception occurred: " + str(e)) traceback.print_exc() exit_code = 0 # COMPSs exception is not considered an error except Exception as e: # Any other exception occurred print("[ ERROR ]: An exception occurred: " + str(e)) traceback.print_exc() exit_code = 1 finally: # Stop runtime stop_all(exit_code) clean_log_configs()
def process_task( signature, # type: str has_target, # type: bool names, # type: list values, # type: list num_returns, # type: int compss_types, # type: list compss_directions, # type: list compss_streams, # type: list compss_prefixes, # type: list content_types, # type: list weights, # type: list keep_renames, # type: list has_priority, # type: bool num_nodes, # type: int reduction, # type: bool chunk_size, # type: int replicated, # type: bool distributed, # type: bool on_failure, # type: str time_out, # type: int is_http=False # type: bool ): # type: (...) -> None """ Submit a task to the runtime. :param signature: Task signature :param has_target: Boolean if the task has self :param names: Task parameter names :param values: Task parameter values :param num_returns: Number of returns :param compss_types: List of parameter types :param compss_directions: List of parameter directions :param compss_streams: List of parameter streams :param compss_prefixes: List of parameter prefixes :param content_types: Content types :param weights: List of parameter weights :param keep_renames: Boolean keep renaming :param has_priority: Boolean has priority :param num_nodes: Number of nodes that the task must use :param reduction: Boolean indicating if the task is of type reduce :param chunk_size: Size of chunks for executing the reduce operation :param replicated: Boolean indicating if the task must be replicated :param distributed: Boolean indicating if the task must be distributed :param on_failure: Action on failure :param time_out: Time for a task time out :param is_http: If it is an http task (service) :return: The future object related to the task return """ with event_master(PROCESS_TASK_EVENT): app_id = 0 if __debug__: # Log the task submission values for debugging purposes. values_str = " ".join(str(v) for v in values) types_str = " ".join(str(t) for t in compss_types) direct_str = " ".join(str(d) for d in compss_directions) streams_str = " ".join(str(s) for s in compss_streams) prefixes_str = " ".join(str(p) for p in compss_prefixes) names_str = " ".join(x for x in names) ct_str = " ".join(str(x) for x in content_types) weights_str = " ".join(str(x) for x in weights) keep_renames_str = " ".join(str(x) for x in keep_renames) logger.debug("Processing task:") logger.debug("\t- App id: " + str(app_id)) logger.debug("\t- Signature: " + signature) logger.debug("\t- Has target: " + str(has_target)) logger.debug("\t- Names: " + names_str) logger.debug("\t- Values: " + values_str) logger.debug("\t- COMPSs types: " + types_str) logger.debug("\t- COMPSs directions: " + direct_str) logger.debug("\t- COMPSs streams: " + streams_str) logger.debug("\t- COMPSs prefixes: " + prefixes_str) logger.debug("\t- Content Types: " + ct_str) logger.debug("\t- Weights: " + weights_str) logger.debug("\t- Keep_renames: " + keep_renames_str) logger.debug("\t- Priority: " + str(has_priority)) logger.debug("\t- Num nodes: " + str(num_nodes)) logger.debug("\t- Reduce: " + str(reduction)) logger.debug("\t- Chunk Size: " + str(chunk_size)) logger.debug("\t- Replicated: " + str(replicated)) logger.debug("\t- Distributed: " + str(distributed)) logger.debug("\t- On failure behavior: " + on_failure) logger.debug("\t- Task time out: " + str(time_out)) logger.debug("\t- Is http: " + str(is_http)) # Check that there is the same amount of values as their types, as well # as their directions, streams and prefixes. assert (len(values) == len(compss_types) == len(compss_directions) == len(compss_streams) == len(compss_prefixes) == len(content_types) == len(weights) == len(keep_renames)) # Submit task to the runtime (call to the C extension): # Parameters: # 0 - <Integer> - application id (by default always 0 due to it is # not currently needed for the signature) # 1 - <String> - path of the module where the task is # # 2 - <String> - behavior if the task fails # # 3 - <String> - function name of the task (to be called from the # worker) # 4 - <String> - priority flag (true|false) # # 5 - <String> - has target (true|false). If the task is within an # object or not. # 6 - [<String>] - task parameters (basic types or file paths for # objects) # 7 - [<Integer>] - parameters types (number corresponding to the type # of each parameter) # 8 - [<Integer>] - parameters directions (number corresponding to the # direction of each parameter) # 9 - [<Integer>] - parameters streams (number corresponding to the # stream of each parameter) # 10 - [<String>] - parameters prefixes (string corresponding to the # prefix of each parameter) # 11 - [<String>] - parameters extra type (string corresponding to the # extra type of each parameter) # 12 - [<String>] - parameters weights (string corresponding to the # weight of each parameter # 13 - <String> - Keep renames flag (true|false) # if not is_http: COMPSs.process_task(app_id, signature, on_failure, time_out, has_priority, num_nodes, reduction, chunk_size, replicated, distributed, has_target, num_returns, values, names, compss_types, compss_directions, compss_streams, compss_prefixes, content_types, weights, keep_renames) else: COMPSs.process_http_task(app_id, signature, on_failure, time_out, has_priority, num_nodes, reduction, chunk_size, replicated, distributed, has_target, num_returns, values, names, compss_types, compss_directions, compss_streams, compss_prefixes, content_types, weights, keep_renames)