def __hard_stop__(debug, sync, logger, ipython): # type: (bool, bool, typing.Any, typing.Any) -> None """ The runtime has been stopped by any error and this method stops the remaining things in the binding. :param debug: If debugging. :param sync: Scope variables synchronization [ True | False ]. :param logger: Logger where to put the logging messages. :param ipython: Ipython instance. :return: None """ print("The runtime is not running.") # Check that everything is stopped as well: # Stop streaming if STREAMING: stop_streaming() # Stop persistent storage if PERSISTENT_STORAGE: master_stop_storage(logger) # Clean any left object in the object tracker OT.clean_object_tracker() # Cleanup events and files release_event_manager(ipython) __clean_temp_files__() # Stop watching stdout and stderr STDW.stop_watching(clean=not debug) # Retrieve the remaining messages that could have been captured. last_messages = STDW.get_messages() if last_messages: for message in last_messages: print(message) if sync: print("* Can not synchronize any future object.") return None
def start( log_level="off", # type: str debug=False, # type: bool o_c=False, # type: bool graph=False, # type: bool trace=False, # type: bool monitor=-1, # type: int project_xml="", # type: str resources_xml="", # type: str summary=False, # type: bool task_execution="compss", # type: str storage_impl="", # type: str storage_conf="", # type: str streaming_backend="", # type: str streaming_master_name="", # type: str streaming_master_port="", # type: str task_count=50, # type: int app_name=INTERACTIVE_FILE_NAME, # type: str uuid="", # type: str base_log_dir="", # type: str specific_log_dir="", # type: str extrae_cfg="", # type: str comm="NIO", # type: str conn=DEFAULT_CONN, # type: str master_name="", # type: str master_port="", # type: str scheduler=DEFAULT_SCHED, # type: str jvm_workers=DEFAULT_JVM_WORKERS, # type: str cpu_affinity="automatic", # type: str gpu_affinity="automatic", # type: str fpga_affinity="automatic", # type: str fpga_reprogram="", # type: str profile_input="", # type: str profile_output="", # type: str scheduler_config="", # type: str external_adaptation=False, # type: bool propagate_virtual_environment=True, # type: bool mpi_worker=False, # type: bool worker_cache=False, # type: typing.Union[bool, str] shutdown_in_node_failure=False, # type: bool io_executors=0, # type: int env_script="", # type: str reuse_on_block=True, # type: bool nested_enabled=False, # type: bool tracing_task_dependencies=False, # type: bool trace_label="", # type: str extrae_cfg_python="", # type: str wcl=0, # type: int cache_profiler=False, # type: bool verbose=False # type: bool ): # type: (...) -> None """ Start the runtime in interactive mode. :param log_level: Logging level [ "trace"|"debug"|"info"|"api"|"off" ] (default: "off") :param debug: Debug mode [ True | False ] (default: False) (overrides log-level) :param o_c: Objects to string conversion [ True|False ] (default: False) :param graph: Generate graph [ True|False ] (default: False) :param trace: Generate trace [ True|False|"scorep"|"arm-map"|"arm-ddt" ] (default: False) :param monitor: Monitor refresh rate (default: None) :param project_xml: Project xml file path (default: None) :param resources_xml: Resources xml file path (default: None) :param summary: Execution summary [ True | False ] (default: False) :param task_execution: Task execution (default: "compss") :param storage_impl: Storage implementation path (default: None) :param storage_conf: Storage configuration file path (default: None) :param streaming_backend: Streaming backend (default: None) :param streaming_master_name: Streaming master name (default: None) :param streaming_master_port: Streaming master port (default: None) :param task_count: Task count (default: 50) :param app_name: Application name default: INTERACTIVE_FILE_NAME) :param uuid: UUId (default: None) :param base_log_dir: Base logging directory (default: None) :param specific_log_dir: Specific logging directory (default: None) :param extrae_cfg: Extrae configuration file path (default: None) :param comm: Communication library (default: NIO) :param conn: Connector (default: DefaultSSHConnector) :param master_name: Master Name (default: "") :param master_port: Master port (default: "") :param scheduler: Scheduler (see runcompss) (default: es.bsc.compss.scheduler.loadbalancing.LoadBalancingScheduler) # noqa: E501 :param jvm_workers: Java VM parameters (default: "-Xms1024m,-Xmx1024m,-Xmn400m") :param cpu_affinity: CPU Core affinity (default: "automatic") :param gpu_affinity: GPU affinity (default: "automatic") :param fpga_affinity: FPGA affinity (default: "automatic") :param fpga_reprogram: FPGA repogram command (default: "") :param profile_input: Input profile (default: "") :param profile_output: Output profile (default: "") :param scheduler_config: Scheduler configuration (default: "") :param external_adaptation: External adaptation [ True|False ] (default: False) :param propagate_virtual_environment: Propagate virtual environment [ True|False ] # noqa: E501 (default: False) :param mpi_worker: Use the MPI worker [ True|False ] (default: False) :param worker_cache: Use the worker cache [ True | int(size) | False] (default: False) :param shutdown_in_node_failure: Shutdown in node failure [ True | False] (default: False) :param io_executors: <Integer> Number of IO executors :param env_script: <String> Environment script to be sourced in workers :param reuse_on_block: Reuse on block [ True | False] (default: True) :param nested_enabled: Nested enabled [ True | False] (default: True) :param tracing_task_dependencies: Include task dependencies in trace [ True | False] (default: False) :param trace_label: <String> Add trace label :param extrae_cfg_python: <String> Extrae configuration file for the workers :param wcl: <Integer> Wall clock limit. Stops the runtime if reached. 0 means forever. :param cache_profiler: Use the cache profiler [ True | False] (default: False) :param verbose: Verbose mode [ True|False ] (default: False) :return: None """ # Export global variables global GRAPHING if context.in_pycompss(): print("The runtime is already running") return None GRAPHING = graph __export_globals__() interactive_helpers.DEBUG = debug if debug: log_level = "debug" __show_flower__() # Let the Python binding know we are at master context.set_pycompss_context(context.MASTER) # Then we can import the appropriate start and stop functions from the API from pycompss.api.api import compss_start ############################################################## # INITIALIZATION ############################################################## # Initial dictionary with the user defined parameters all_vars = parameters_to_dict( log_level, debug, o_c, graph, trace, monitor, project_xml, resources_xml, summary, task_execution, storage_impl, storage_conf, streaming_backend, streaming_master_name, streaming_master_port, task_count, app_name, uuid, base_log_dir, specific_log_dir, extrae_cfg, comm, conn, master_name, master_port, scheduler, jvm_workers, cpu_affinity, gpu_affinity, fpga_affinity, fpga_reprogram, profile_input, profile_output, scheduler_config, external_adaptation, propagate_virtual_environment, mpi_worker, worker_cache, shutdown_in_node_failure, io_executors, env_script, reuse_on_block, nested_enabled, tracing_task_dependencies, trace_label, extrae_cfg_python, wcl, cache_profiler) # Save all vars in global current flags so that events.py can restart # the notebook with the same flags export_current_flags(all_vars) # Check the provided flags flags, issues = check_flags(all_vars) if not flags: print_flag_issues(issues) return None # Prepare the environment env_vars = prepare_environment(True, o_c, storage_impl, "undefined", debug, trace, mpi_worker) all_vars.update(env_vars) # Update the log level and graph values if monitoring is enabled monitoring_vars = prepare_loglevel_graph_for_monitoring( monitor, graph, debug, log_level) all_vars.update(monitoring_vars) # Check if running in supercomputer and update the variables accordingly # with the defined in the launcher and exported in environment variables. if RUNNING_IN_SUPERCOMPUTER: updated_vars = updated_variables_in_sc() if verbose: print("- Overridden project xml with: %s" % updated_vars["project_xml"]) print("- Overridden resources xml with: %s" % updated_vars["resources_xml"]) print("- Overridden master name with: %s" % updated_vars["master_name"]) print("- Overridden master port with: %s" % updated_vars["master_port"]) print("- Overridden uuid with: %s" % updated_vars["uuid"]) print("- Overridden base log dir with: %s" % updated_vars["base_log_dir"]) print("- Overridden specific log dir with: %s" % updated_vars["specific_log_dir"]) print("- Overridden storage conf with: %s" % updated_vars["storage_conf"]) print("- Overridden log level with: %s" % str(updated_vars["log_level"])) print("- Overridden debug with: %s" % str(updated_vars["debug"])) print("- Overridden trace with: %s" % str(updated_vars["trace"])) all_vars.update(updated_vars) # Update the tracing environment if set and set the appropriate trace # integer value tracing_vars = prepare_tracing_environment(all_vars["trace"], all_vars["extrae_lib"], all_vars["ld_library_path"]) all_vars["trace"], all_vars["ld_library_path"] = tracing_vars # Update the infrastructure variables if necessary inf_vars = check_infrastructure_variables(all_vars["project_xml"], all_vars["resources_xml"], all_vars["compss_home"], all_vars["app_name"], all_vars["file_name"], all_vars["external_adaptation"]) all_vars.update(inf_vars) # With all this information, create the configuration file for the # runtime start create_init_config_file(**all_vars) # Start the event manager (ipython hooks) ipython = globals()["__builtins__"]["get_ipython"]() setup_event_manager(ipython) ############################################################## # RUNTIME START ############################################################## print("* - Starting COMPSs runtime... *") sys.stdout.flush() # Force flush compss_start(log_level, all_vars["trace"], True) global LOG_PATH LOG_PATH = get_log_path() set_temporary_directory(LOG_PATH) print("* - Log path : " + LOG_PATH) # Setup logging binding_log_path = get_log_path() log_path = os.path.join(all_vars["compss_home"], "Bindings", "python", str(all_vars["major_version"]), "log") set_temporary_directory(binding_log_path) logging_cfg_file = get_logging_cfg_file(log_level) init_logging(os.path.join(log_path, logging_cfg_file), binding_log_path) logger = logging.getLogger("pycompss.runtime.launch") __print_setup__(verbose, all_vars) logger.debug("--- START ---") logger.debug("PyCOMPSs Log path: %s" % LOG_PATH) logger.debug("Starting storage") global PERSISTENT_STORAGE PERSISTENT_STORAGE = master_init_storage(all_vars["storage_conf"], logger) logger.debug("Starting streaming") global STREAMING STREAMING = init_streaming(all_vars["streaming_backend"], all_vars["streaming_master_name"], all_vars["streaming_master_port"]) # Start monitoring the stdout and stderr STDW.start_watching() # MAIN EXECUTION # let the user write an interactive application print("* - PyCOMPSs Runtime started... Have fun! *") print(LINE_SEPARATOR) # Emit the application start event (the 0 is in the stop function) emit_manual_event(APPLICATION_RUNNING_EVENT)
def stop(sync=False, _hard_stop=False): # type: (bool, bool) -> None """ Runtime stop. :param sync: Scope variables synchronization [ True | False ] (default: False) :param _hard_stop: Stop compss when runtime has died [ True | False ]. (default: False) :return: None """ logger = logging.getLogger(__name__) ipython = globals()["__builtins__"]["get_ipython"]() if not context.in_pycompss(): return __hard_stop__(interactive_helpers.DEBUG, sync, logger, ipython) from pycompss.api.api import compss_stop print(LINE_SEPARATOR) print("*************** STOPPING PyCOMPSs ******************") print(LINE_SEPARATOR) # Wait 5 seconds to give some time to process the remaining messages # of the STDW and check if there is some error that could have stopped # the runtime before continuing. print("Checking if any issue happened.") time.sleep(5) messages = STDW.get_messages() if messages: for message in messages: sys.stderr.write("".join((message, "\n"))) # Uncomment the following lines to see the ipython dictionary # in a structured way: # import pprint # pprint.pprint(ipython.__dict__, width=1) if sync and not _hard_stop: sync_msg = "Synchronizing all future objects left on the user scope." print(sync_msg) logger.debug(sync_msg) from pycompss.api.api import compss_wait_on reserved_names = ("quit", "exit", "get_ipython", "APP_PATH", "ipycompss", "In", "Out") raw_code = ipython.__dict__["user_ns"] for k in raw_code: obj_k = raw_code[k] if not k.startswith('_'): # not internal objects if type(obj_k) == Future: print("Found a future object: %s" % str(k)) logger.debug("Found a future object: %s" % str(k)) new_obj_k = compss_wait_on(obj_k) if new_obj_k == obj_k: print("\t - Could not retrieve object: %s" % str(k)) logger.debug("\t - Could not retrieve object: %s" % str(k)) else: ipython.__dict__["user_ns"][k] = new_obj_k elif k not in reserved_names: try: if OT.is_pending_to_synchronize(obj_k): print("Found an object to synchronize: %s" % str(k)) # noqa: E501 logger.debug("Found an object to synchronize: %s" % (k, )) # noqa: E501 ipython.__dict__["user_ns"][k] = compss_wait_on( obj_k) # noqa: E501 except TypeError: # Unhashable type: List - could be a collection if isinstance(obj_k, list): print("Found a list to synchronize: %s" % str(k)) logger.debug("Found a list to synchronize: %s" % (k, )) # noqa: E501 ipython.__dict__["user_ns"][k] = compss_wait_on( obj_k) # noqa: E501 else: print("Warning: some of the variables used with PyCOMPSs may") print(" have not been brought to the master.") # Stop streaming if STREAMING: stop_streaming() # Stop persistent storage if PERSISTENT_STORAGE: master_stop_storage(logger) # Emit the 0 for the APPLICATION_RUNNING_EVENT emitted on start function. emit_manual_event(0) # Stop runtime compss_stop(_hard_stop=_hard_stop) # Cleanup events and files release_event_manager(ipython) __clean_temp_files__() # Stop watching stdout and stderr STDW.stop_watching(clean=True) # Retrieve the remaining messages that could have been captured. last_messages = STDW.get_messages() if last_messages: for message in last_messages: print(message) # Let the Python binding know we are not at master anymore context.set_pycompss_context(context.OUT_OF_SCOPE) print(LINE_SEPARATOR) logger.debug("--- END ---")
def __pre_run_cell__(): # type: () -> None """ Like pre_run_cell, but is triggered prior to any execution. Sometimes code can be executed by libraries, etc. which skipping the history/display mechanisms, in which cases pre_run_cell will not fire. :return: None """ global POST_MESSAGE messages = STDW.get_messages() found_errors = False runtime_crashed = False if messages: for message in messages: if message.startswith("[ERRMGR]"): found_errors = True # Errors found, but maybe not critical, like for example # tasks that failed but recovered. if message == "[ERRMGR] - Shutting down COMPSs...": # A critical error occurred --> notify that COMPSs runtime # stopped working to avoid issues when running any PyCOMPSs # function. runtime_crashed = True if runtime_crashed: # Display popup with the error messages current_flags = str(os.environ["PYCOMPSS_CURRENT_FLAGS"]) header = [] footer = [] popup_body = header + messages + footer error_messages_html = "<p>" + "<br>".join(popup_body) + "</p>" error_messages_html = error_messages_html.replace("'", "") popup_title_html = "COMPSs RUNTIME STOPPED" popup_code = """require(["base/js/dialog"], function(dialog) OPENBRACKET function restartCOMPSs()OPENBRACKET var kernel = IPython.notebook.kernel; kernel.execute("import base64; import json; from pycompss.interactive import stop, start; stop(_hard_stop=True); _COMPSS_START_FLAGS=json.loads(base64.b64decode('" + '{2}' + "'.encode())); start(**_COMPSS_START_FLAGS)"); CLOSEBRACKET function continueWithoutCOMPSs()OPENBRACKET var kernel = IPython.notebook.kernel; kernel.execute("from pycompss.interactive import stop; stop(_hard_stop=True)"); CLOSEBRACKET dialog.modal(OPENBRACKET title: '{0}', body: $('{1}'), buttons: OPENBRACKET 'Continue without COMPSs': OPENBRACKET click: function() OPENBRACKET continueWithoutCOMPSs(); CLOSEBRACKET CLOSEBRACKET, 'Restart COMPSs': OPENBRACKET class: 'btn-primary', click: function() OPENBRACKET restartCOMPSs(); CLOSEBRACKET CLOSEBRACKET CLOSEBRACKET CLOSEBRACKET); CLOSEBRACKET );""".format(popup_title_html, error_messages_html, current_flags) # noqa: E501 popup_js = popup_code.replace("OPENBRACKET", '{').replace("CLOSEBRACKET", '}') # noqa: E501 popup = Javascript(popup_js) display(popup) # noqa warn_msg = "WARNING: Some objects may have not been synchronized and need to be recomputed." # noqa: E501 POST_MESSAGE = "\x1b[40;43m" + warn_msg + "\x1b[0m" elif found_errors: # Display popup with the warning messages header = [] footer = [] popup_body = header + messages + footer error_messages_html = "<p>" + "<br>".join(popup_body) + "</p>" error_messages_html = error_messages_html.replace("'", "") popup_title_html = "WARNING: Some tasks may have failed" popup_code = """require(["base/js/dialog"], function(dialog) OPENBRACKET dialog.modal(OPENBRACKET title: '{0}', body: $('{1}'), buttons: OPENBRACKET 'Continue': OPENBRACKET CLOSEBRACKET, CLOSEBRACKET CLOSEBRACKET); CLOSEBRACKET );""".format(popup_title_html, error_messages_html) popup_js = popup_code.replace("OPENBRACKET", '{').replace("CLOSEBRACKET", '}') # noqa: E501 popup = Javascript(popup_js) display(popup) # noqa info_msg = "INFO: The runtime has recovered the failed tasks." POST_MESSAGE = "\x1b[40;46m" + info_msg + "\x1b[0m" else: # No issue pass