def run(self): """ Get the required data from the operation queue and launch the operation. """ operation_id = self.operation_id run_params = [TvbProfile.current.PYTHON_INTERPRETER_PATH, '-m', 'tvb.core.operation_async_launcher', str(operation_id), TvbProfile.CURRENT_PROFILE_NAME] current_operation = dao.get_operation_by_id(operation_id) storage_interface = StorageInterface() project_folder = storage_interface.get_project_folder(current_operation.project.name) in_usage = storage_interface.is_in_usage(project_folder) storage_interface.inc_running_op_count(project_folder) if not in_usage: storage_interface.sync_folders(project_folder) # In the exceptional case where the user pressed stop while the Thread startup is done, # We should no longer launch the operation. if self.stopped() is False: env = os.environ.copy() env['PYTHONPATH'] = os.pathsep.join(sys.path) # anything that was already in $PYTHONPATH should have been reproduced in sys.path launched_process = Popen(run_params, stdout=PIPE, stderr=PIPE, env=env) LOGGER.debug("Storing pid=%s for operation id=%s launched on local machine." % (operation_id, launched_process.pid)) op_ident = OperationProcessIdentifier(operation_id, pid=launched_process.pid) dao.store_entity(op_ident) if self.stopped(): # In the exceptional case where the user pressed stop while the Thread startup is done. # and stop_operation is concurrently asking about OperationProcessIdentity. self.stop_pid(launched_process.pid) subprocess_result = launched_process.communicate() LOGGER.info("Finished with launch of operation %s" % operation_id) returned = launched_process.wait() LOGGER.info("Return code: {}. Stopped: {}".format(returned, self.stopped())) LOGGER.info("Thread: {}".format(self)) if returned != 0 and not self.stopped(): # Process did not end as expected. (e.g. Segmentation fault) burst_service = BurstService() operation = dao.get_operation_by_id(self.operation_id) LOGGER.error("Operation suffered fatal failure! Exit code: %s Exit message: %s" % (returned, subprocess_result)) burst_service.persist_operation_state(operation, STATUS_ERROR, "Operation failed unexpectedly! Please check the log files.") del launched_process storage_interface.check_and_delete(project_folder) # Give back empty spot now that you finished your operation CURRENT_ACTIVE_THREADS.remove(self) LOCKS_QUEUE.put(1)
def _run_hpc_job(operation_identifier): # type: (int) -> None operation = dao.get_operation_by_id(operation_identifier) project_folder = HPCSchedulerClient.storage_interface.get_project_folder( operation.project.name) storage_interface = StorageInterface() storage_interface.inc_running_op_count(project_folder) is_group_launch = operation.fk_operation_group is not None simulator_gid = operation.view_model_gid try: HPCSchedulerClient._launch_job_with_pyunicore( operation, simulator_gid, is_group_launch) except Exception as exception: LOGGER.error("Failed to submit job HPC", exc_info=True) operation.mark_complete( STATUS_ERROR, exception.response.text if isinstance( exception, HTTPError) else repr(exception)) dao.store_entity(operation) storage_interface.check_and_delete(project_folder)