def __init__(self, pipe_template_dir, dbm, forTest=False): '''Load json file. Args: pipe_template_dir: Path to pipeline directory ''' self.forTest = forTest self.dbm = dbm self.file_man = AppFileMan(self.dbm.lostconfig) if pipe_template_dir.endswith('/'): pipe_template_dir = pipe_template_dir[:-1] self.src_pipe_template_path = pipe_template_dir self.dst_pipe_template_path = os.path.join( self.file_man.pipe_path, os.path.basename(self.src_pipe_template_path)) self.json_files = glob(os.path.join(pipe_template_dir, '*.json')) self.pipes = [] self.namespace = os.path.basename( self.src_pipe_template_path).strip('/') for json_path in self.json_files: with open(json_path) as jfile: pipe = json.load(jfile) pipe['namespace'] = self.namespace pipe['name'] = self._namespaced_name( os.path.splitext(os.path.basename(json_path))[0]) self.pipes.append(pipe) # Set name to name of the script file for pe in pipe['elements']: if 'script' in pe: pe['script']['name'] = self._namespaced_name( pe['script']['path']) self.checker = PipeDefChecker(logging)
def main(): parser = argparse.ArgumentParser(description='Run LOST cronjobs') parser.add_argument('--debug', action='store_true', help='start cronjobs just once for debugging') args = parser.parse_args() lostconfig = config.LOSTConfig() fm = AppFileMan(lostconfig) log_name = 'cron_jobs' logger = get_file_logger(log_name, fm.get_app_log_path('cron_jobs.log')) logger.info('Starting cron jobs!') if args.debug: t = threading.Thread(target=worker_lifesign_loop, args=(log_name, ), daemon=True) t.start() client = Client('{}:{}'.format(lostconfig.scheduler_ip, lostconfig.scheduler_port)) process_pipes(log_name, client) else: jobs = [ process_pipes_loop, worker_lifesign_loop, release_annos_loop, remove_empty_annos_loop ] if lostconfig.worker_management == 'dynamic': jobs.append(dask_session.release_client_by_timeout_loop) jobs += lostconfig.extra_cron_jobs threads = [] for j in jobs: t = threading.Thread(target=j, args=(log_name, ), daemon=True) t.start() threads.append(t) [t.join() for t in threads]
def exec_script_in_subprocess(pipe_element_id): try: lostconfig = LOSTConfig() dbm = DBMan(lostconfig) pipe_e = dbm.get_pipe_element(pipe_e_id=pipe_element_id) logger = logging if lostconfig.worker_management == 'static': worker = CurrentWorker(dbm, lostconfig) if not worker.enough_resources(pipe_e.script): # logger.warning('Not enough resources! Rejected {} (PipeElement ID {})'.format(pipe_e.script.path, pipe_e.idx)) raise Exception('Not enough resources') pipe_e.state = state.PipeElement.IN_PROGRESS dbm.save_obj(pipe_e) file_man = AppFileMan(lostconfig) pipe = pipe_e.pipe cmd = gen_run_cmd("pudb3", pipe_e, lostconfig) debug_script_path = file_man.get_debug_path(pipe_e) debug_script_path = os.path.join(debug_script_path, 'debug.sh') with open(debug_script_path, 'w') as sfile: sfile.write(cmd) cmd = gen_run_cmd("python3", pipe_e, lostconfig) # file_man.create_debug_path(pipe_e) start_script_path = file_man.get_debug_path(pipe_e) start_script_path = os.path.join(start_script_path, 'start.sh') with open(start_script_path, 'w') as sfile: sfile.write(cmd) p = subprocess.Popen('bash {}'.format(start_script_path), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) logger.info("{} ({}): Started script\n{}".format(pipe.name, pipe.idx, cmd)) if lostconfig.worker_management == 'static': worker.add_script(pipe_e, pipe_e.script) out, err = p.communicate() if lostconfig.worker_management == 'static': worker.remove_script(pipe_e, pipe_e.script) if p.returncode != 0: raise Exception(err.decode('utf-8')) logger.info('{} ({}): Executed script successful: {}'.format(pipe.name, pipe.idx, pipe_e.script.path)) dbm.close_session() except: pipe = pipe_e.pipe logger.info('{} ({}): Exception occurred in script: {}'.format(pipe.name, pipe.idx, pipe_e.script.path)) msg = traceback.format_exc() logger.error(msg) script_api.report_script_err(pipe_e, pipe, dbm, msg) dbm.close_session()
def __init__(self, dbm, pipe, lostconfig, client, logger_name=''): ''' :type dbm: lost.db.access.DBMan :type pipe: lost.db.model.Pipe ''' super().__init__(dbm=dbm, pipe=pipe) self.lostconfig = lostconfig #type: lost.logic.config.LOSTConfig self.file_man = AppFileMan(self.lostconfig) # self.logger = lost.logic.log.get_file_logger( # 'Executor: {}'.format(self.lostconfig.env_name), # self.file_man.get_app_log_path('PipeEngine.log')) # self.logger = get_task_logger(__name__) self.logger = logging.getLogger('{}.{}'.format( logger_name, self.__class__.__name__) ) self.client = client
class PipeProjectPackageMan(object): '''Remember which pipe project versions are present for which client''' def __init__(self): self.mem = dict() self.fm = AppFileMan(config) def _get_import_and_update(self, client, pp_path, script_import_name, pp_hash=None): client_id = id(client) if pp_hash is None: pp_hash = exec_utils.get_module_hash(pp_path) if client_id in self.mem: if pp_path in self.mem[client_id]: if pp_hash == self.mem[client_id][pp_path]['hash']: return False, self.mem[client_id][pp_path]['import'] else: self.mem[client_id][pp_path]['hash'] = pp_hash self.mem[client_id][pp_path]['import'] = script_import_name return True, script_import_name else: self.mem[client_id][pp_path] = dict() self.mem[client_id][pp_path]['hash'] = pp_hash self.mem[client_id][pp_path]['import'] = script_import_name return True, script_import_name else: self.mem[client_id] = {pp_path: dict()} self.mem[client_id][pp_path]['hash'] = pp_hash self.mem[client_id][pp_path]['import'] = script_import_name return True, script_import_name def prepare_import(self, client, pp_path, script_name, logger=None): if logger is not None: logger.info('pp_path: {}'.format(pp_path)) timestamp = datetime.now().strftime("%m%d%Y%H%M%S") packed_pp_path = self.fm.get_packed_pipe_path( f'{os.path.basename(pp_path)}.zip', timestamp) if logger is not None: logger.info('packed_pp_path: {}'.format(packed_pp_path)) new_import_name = exec_utils.get_import_name_by_script( script_name, timestamp) update, import_name = self._get_import_and_update( client, pp_path, new_import_name) if update: exec_utils.zipdir(pp_path, packed_pp_path, timestamp) upload_ret = client.upload_file(packed_pp_path) if logger is not None: logger.info(f'Upload file:{upload_ret}') if logger is not None: logger.info(f'import_name:{import_name}') return import_name
def update_version_log(): fm = AppFileMan(LOSTConfig()) path = fm.get_version_log_path() if not os.path.exists(path): print('Patchsystem: Created version log file: {}'.format(path)) versions = [] versions.append(lost.__version__) with open(path, 'w') as json_file: json.dump(versions, json_file) else: with open(path) as json_file: versions = json.load(json_file) print("Versions: ", versions) if versions[-1] == lost.__version__: print('Patchsystem: No version change!') else: print('Patchsystem: We maybe need to patch!') dbp = DBPatcher() dbp.patch() versions.append(lost.__version__) with open(path, 'w') as json_file: json.dump(versions, json_file)
def __init__(self): self.mem = dict() self.fm = AppFileMan(config)
class PipeImporter(object): def __init__(self, pipe_template_dir, dbm, forTest=False): '''Load json file. Args: pipe_template_dir: Path to pipeline directory ''' self.forTest = forTest self.dbm = dbm self.file_man = AppFileMan(self.dbm.lostconfig) if pipe_template_dir.endswith('/'): pipe_template_dir = pipe_template_dir[:-1] self.src_pipe_template_path = pipe_template_dir self.dst_pipe_template_path = os.path.join( self.file_man.pipe_path, os.path.basename(self.src_pipe_template_path)) self.json_files = glob(os.path.join(pipe_template_dir, '*.json')) self.pipes = [] self.namespace = os.path.basename( self.src_pipe_template_path).strip('/') for json_path in self.json_files: with open(json_path) as jfile: pipe = json.load(jfile) pipe['namespace'] = self.namespace pipe['name'] = self._namespaced_name( os.path.splitext(os.path.basename(json_path))[0]) self.pipes.append(pipe) # Set name to name of the script file for pe in pipe['elements']: if 'script' in pe: pe['script']['name'] = self._namespaced_name( pe['script']['path']) self.checker = PipeDefChecker(logging) def _namespaced_name(self, name): return '{}.{}'.format(self.namespace, name) # def update_pipe_project(self): # if os.path.exists(self.dst_pipe_template_path): # logging.info('\n\n++++++++++++++++++++++\n\n') # for pipe in self.pipes: # if not self.checker.check(pipe): # logging.error('Pipeline was not updated!') # return False # for pipe in self.pipes: # self.update_pipe(pipe) # # dir_util.copy_tree(self.src_pipe_template_path, self.dst_pipe_template_path) # # logging.info("Copyed pipeline template dir from %s to %s"%(self.src_pipe_template_path, # # self.dst_pipe_template_path)) # else: # logging.warning(('Cannot update. No such pipe project: *{}*. ' # 'Maybe you want to import a pipeline instead ' # 'of updating it.').format(self.namespace)) # def update_pipe(self, pipe): # for db_pipe in self.dbm.get_all_pipeline_templates(): # db_json = json.loads(db_pipe.json_template) # # update pipeline if already present in db # if db_json['name'].lower() == pipe['name'].lower(): # # Do everything relative from pipeline definition file path. # oldwd = os.getcwd() # os.chdir(self.src_pipe_template_path) # logging.info('Updated pipeline: {}'.format(db_json['name'])) # for pe_j in pipe['elements']: # if 'script' in pe_j: # element_j = pe_j['script'] # script = parse_script(element_j) # db_script = self.dbm.get_script(name=self._get_script_name(script)) # script_arguments = get_default_script_arguments(script.path) # script_envs = get_default_script_envs(script.path) # script_resources = get_default_script_resources(script.path) # if 'arguments' in element_j: # for arg in element_j['arguments']: # if arg not in script_arguments: # logging.error("Invalid argument >> {} << in pipeline definition json".format(arg)) # valid_args = "" # for v_arg in script_arguments: # valid_args += ">> {} <<\n".format(v_arg) # logging.error("Valid arguments are: \n{}".format(valid_args[:-1])) # raise Exception('Invalid arguments. Start Cleanup') # if db_script is None: # self.dbm.add(script) # self.dbm.commit() # script_out_path = os.path.join(self.src_pipe_template_path, script.path) # script.path = self.file_man.make_path_relative(script_out_path) # script.arguments = json.dumps(script_arguments) # script.envs = json.dumps(script_envs) # script.resources = json.dumps(script_resources) # self.dbm.save_obj(script) # logging.info("Added script to database") # else: # script_out_path = os.path.join(self.src_pipe_template_path, script.path) # db_script.path = self.file_man.make_path_relative(script_out_path) # db_script.arguments = json.dumps(script_arguments) # db_script.envs = json.dumps(script_envs) # db_script.description = script.description # db_script.resources = json.dumps(script_resources) # self.dbm.save_obj(db_script) # logging.info('Updated script: {}'.format(db_script.name)) # # self._fix_sia_config(pe_j) # db_pipe.json_template = json.dumps(pipe) # self.dbm.save_obj(db_pipe) # os.chdir(oldwd) # Change dir back to old working directory. # return True # # import pipe if not already present in database # self.import_pipe(pipe) def _get_script_name(self, script): return self._namespaced_name(os.path.basename(script.path)) def start_import(self): logging.info('\n\n++++++++++++++++++++++ \n\n') logging.info('Start pipe project import for: {}'.format( self.src_pipe_template_path)) for pipe in self.pipes: if not self.checker.check(pipe): logging.error( 'Wrong pipeline definition! Did not import pipe project!') return False # if os.path.exists(self.dst_pipe_template_path): # logging.warning('Cannot import pipeline!') # logging.warning('Pipe Template Dir already exist: {}'.format( # self.dst_pipe_template_path # )) # return # dir_util.copy_tree(self.src_pipe_template_path, self.dst_pipe_template_path) # logging.info("Copyed pipeline template dir from %s to %s"%(self.src_pipe_template_path, # self.dst_pipe_template_path)) for pipe in self.pipes: self.import_pipe(pipe) def import_pipe(self, pipe): try: logging.info('\n---\n') # Do everything relative from pipeline definition file path. oldwd = os.getcwd() os.chdir(self.src_pipe_template_path) # for db_pipe in self.dbm.get_all_pipeline_templates(): # db_json = json.loads(db_pipe.json_template) # if db_json['name'].lower() == pipe['name'].lower(): # logging.warning("PipeTemplate in database.") # logging.warning("Name of this template is: %s"%(pipe['name'],)) # logging.warning("Will not import PipeTemplate.") # return db_pipe.idx for pe_j in pipe['elements']: if 'script' in pe_j: element_j = pe_j['script'] script = parse_script(element_j) db_script = self.dbm.get_script( name=self._get_script_name(script)) script_arguments = get_default_script_arguments( script.path) script_envs = get_default_script_envs(script.path) script_resources = get_default_script_resources( script.path) extra_pip = get_script_args(script.path, 'EXTRA_PIP', to_lower=False) extra_pip = ' '.join(extra_pip) if extra_pip is None: extra_pip = '' extra_conda = get_script_args(script.path, 'EXTRA_CONDA', to_lower=False) extra_conda = ' '.join(extra_conda) if extra_conda is None: extra_conda = '' if 'arguments' in element_j: for arg in element_j['arguments']: if arg not in script_arguments: logging.error( "Invalid argument >> {} << in pipeline definition json" .format(arg)) valid_args = "" for v_arg in script_arguments: valid_args += ">> {} <<\n".format(v_arg) logging.error( "Valid arguments are: \n{}".format( valid_args[:-1])) raise Exception( 'Invalid arguments. Start Cleanup') if db_script is None: self.dbm.add(script) self.dbm.commit() script_out_path = os.path.join( self.src_pipe_template_path, script.path) script.path = self.file_man.make_path_relative( script_out_path) script.arguments = json.dumps(script_arguments) script.envs = json.dumps(script_envs) script.resources = json.dumps(script_resources) script.extra_packages = _dump_extra_packages( extra_pip, extra_conda) self.dbm.save_obj(script) logging.info("Added script to database\n") else: # logging.warning("Script is already present in database.\n") script_out_path = os.path.join( self.src_pipe_template_path, script.path) db_script.path = self.file_man.make_path_relative( script_out_path) db_script.arguments = json.dumps(script_arguments) db_script.envs = json.dumps(script_envs) db_script.description = script.description db_script.resources = json.dumps(script_resources) db_script.extra_packages = _dump_extra_packages( extra_pip, extra_conda) self.dbm.save_obj(db_script) logging.warning((str(db_script.idx), db_script.name, db_script.path)) os.chdir(oldwd) # Change dir back to old working directory. pipe_in_db = False for db_pipe in self.dbm.get_all_pipeline_templates(): db_json = json.loads(db_pipe.json_template) if db_json['name'].lower() == pipe['name'].lower(): pipe_in_db = True logging.warning( f"PipeTemplate already in database: {db_json['name'].lower()}" ) return db_pipe.idx if not pipe_in_db: pipe_temp = model.PipeTemplate(json_template=json.dumps(pipe), timestamp=datetime.now()) self.dbm.save_obj(pipe_temp) logging.info("Added Pipeline: *** %s ***" % (pipe['name'], )) return pipe_temp.idx except Exception as e: logging.error(e, exc_info=True) if not self.forTest: self.remove_pipe_project() logging.error('Cleanup successful. Removed buggy pipeline.') def remove_pipe_project(self): '''Remove an imported pipeline project from lost system. Note: Pipeline folder in LOST filesystem and all related db entrys will be deleted. ''' # clean_filesystem = True for pipe in self.pipes: if not self.remove_pipeline(pipe): pass # clean_filesystem = False # if clean_filesystem: # shutil.rmtree(self.dst_pipe_template_path) # logging.info('Removed pipeline project from lost filesystem {}'.format( # self.dst_pipe_template_path # )) # logging.info('Whole pipeline project {} was successfull removed'.format( # self.namespace # )) else: logging.info('''Pipeline project {} was not completely removed since some pipes are still in use'''.format(self.namespace)) def remove_pipeline(self, pipe): '''Remove all related db entrys of a pipeline from lost database. ''' for db_pipe in self.dbm.get_all_pipeline_templates(): db_json = json.loads(db_pipe.json_template) if db_json['name'].lower() == pipe['name'].lower(): t = self.dbm.get_pipe(pipe_template_id=db_pipe.idx) if t is None: for pe_j in db_json['elements']: if 'script' in pe_j: script_man.remove_script( self.dbm, os.path.join( self.namespace, os.path.basename(pe_j['script']['path']))) self.dbm.delete(db_pipe) else: logging.warning( "Cannot remove pipeline. It is already in use by task with ID: %s" % (t.idx, )) return False self.dbm.commit() logging.info("Removed pipeline successfull: {}".format( pipe['name'])) return True return True
import shutil logging.basicConfig(level=logging.INFO, format='(%(levelname)s): %(message)s') if __name__ == "__main__": parser = argparse.ArgumentParser( description='Import a pipeline into the portal') parser.add_argument('pipe_dir', help='Path directory with pipeline definition files.') parser.add_argument('--copy', default=False, action='store_true', help='Copy to default pipe location before import') args = parser.parse_args() lostconfig = config.LOSTConfig() if args.copy: fm = AppFileMan(lostconfig) pp_path = fm.get_pipe_project_path() pipe_dir = args.pipe_dir if pipe_dir.endswith('/'): pipe_dir = pipe_dir[:-1] dst_path = os.path.join(pp_path, os.path.basename(pipe_dir)) shutil.copytree(args.pipe_dir, dst_path, dirs_exist_ok=True) else: dst_path = args.pipe_dir dbm = access.DBMan(lostconfig) importer = template_import.PipeImporter(dst_path, dbm) importer.start_import() dbm.close_session()
from flask import Flask from lost import settings # from lost.taskman import make_celery from lost.logic.file_man import AppFileMan from flask_mail import Mail import os import traceback app = Flask(__name__) import logging from logging import FileHandler file_man = AppFileMan(settings.LOST_CONFIG) logfile_path = file_man.get_app_log_path('flask.log') # log_file_stream = file_man.fs.open(logfile_path, 'a') # file_handler = StreamHandler(log_file_stream) file_handler = FileHandler(logfile_path) if settings.LOST_CONFIG.debug: logging.basicConfig(level=logging.INFO) file_handler.setLevel(logging.INFO) else: file_handler.setLevel(logging.WARNING) formatter = logging.Formatter( "%(asctime)s - %(name)s - %(levelname)s - %(message)s") file_handler.setFormatter(formatter) app.logger.addHandler(file_handler) # app.config['CELERY_BROKER_URL'] = settings.CELERY_BROKER_URL # app.config['CELERY_RESULT_BACKEND'] = settings.CELERY_RESULT_BACKEND
class PipeEngine(pipe_model.PipeEngine): def __init__(self, dbm, pipe, lostconfig, client, logger_name=''): ''' :type dbm: lost.db.access.DBMan :type pipe: lost.db.model.Pipe ''' super().__init__(dbm=dbm, pipe=pipe) self.lostconfig = lostconfig #type: lost.logic.config.LOSTConfig self.file_man = AppFileMan(self.lostconfig) # self.logger = lost.logic.log.get_file_logger( # 'Executor: {}'.format(self.lostconfig.env_name), # self.file_man.get_app_log_path('PipeEngine.log')) # self.logger = get_task_logger(__name__) self.logger = logging.getLogger('{}.{}'.format( logger_name, self.__class__.__name__) ) self.client = client def process_annotask(self, pipe_e): anno_task = self.dbm.get_anno_task(pipe_element_id=pipe_e.idx) if anno_task.state == state.AnnoTask.IN_PROGRESS or \ anno_task.state == state.AnnoTask.PAUSED: if not at_man.has_annotation_in_iteration(self.dbm, anno_task.idx, pipe_e.iteration): at_man.set_finished(self.dbm, anno_task.idx) self.logger.warning('No Annotations have been requested for AnnoTask {}'\ .format(anno_task.idx)) self.logger.warning("%d: AnnoTask has been finished (ID: %d, Name: %s)"\ %(self.pipe.idx, anno_task.idx, anno_task.name)) # if pipe_e.anno_task.dtype == dtype.AnnoTask.MIA: # if anno_task.progress is None: # anno_task.progress = 0.0 # if anno_task.progress >= 100.0: # anno_task.state = state.AnnoTask.FINISHED # self.dbm.add(anno_task) # pipe_e.state = state.PipeElement.FINISHED # self.dbm.add(pipe_e) # self.dbm.commit() # print("%d: AnnoTask has been finished (ID: %d, Name: %s)"\ # %(self.pipe.idx, anno_task.idx, anno_task.name)) # else: # return # state = finished will be set in annotation tool if anno_task.state == state.AnnoTask.PENDING: anno_task.state = state.AnnoTask.IN_PROGRESS self.dbm.save_obj(anno_task) self.logger.info("%d: AnnoTask IN_PROGRESS (ID: %d, Name: %s)"\ %(self.pipe.idx, anno_task.idx, anno_task.name)) def __gen_run_cmd(self, program, pipe_e): # script = self.dbm.get_script(pipe_e.script_id) script_path = os.path.join(self.lostconfig.app_path, pipe_e.script.path) cmd = self.lostconfig.py3_init + " && " cmd += program + " " + script_path + " --idx " + str(pipe_e.idx) return cmd def make_debug_session(self, pipe_e): debug_path = self.file_man.create_debug_path(pipe_element=pipe_e) debug_file_path = os.path.join(debug_path, 'debug.sh') # init = self.lostconfig.py3_init + '\n' cmd = self.__gen_run_cmd('pudb3', pipe_e) # script_content = init + cmd script_content = cmd with open(debug_file_path, 'w') as dfile: dfile.write(script_content) script_path = os.path.join(self.lostconfig.app_path, pipe_e.script.path) dsession_str = "For DEBUG start: bash " + debug_file_path dsession_str += "<br>If you want to EDIT go to: " + script_path pipe_e.debug_session = dsession_str self.dbm.save_obj(pipe_e) self.logger.info('Created debug script: {}'.format(debug_file_path)) self.logger.info(pipe_e.debug_session) def __release_loop_iteration(self, pipe_e): pipe_e.loop.iteration += 1 self.logger.info('{}: Run loop with id {} in iteration {}'.format(self.pipe.idx, pipe_e.loop.idx, pipe_e.loop.iteration)) loop_pes = self.get_loop_pes(pipe_e.loop.pe_jump, pipe_e) for pe in loop_pes: pe.iteration += 1 pe.state = state.PipeElement.PENDING if pe.dtype == dtype.PipeElement.ANNO_TASK: pe.anno_task.state = state.AnnoTask.PENDING elif pe.dtype == dtype.PipeElement.SCRIPT: pe.progress = 0.0 elif pe.dtype == dtype.PipeElement.LOOP: # Check for loop in loop case; Set iteration of all inner loops # to zero. if pe is not pipe_e: pe.loop.iteration = 0 self.set_to_visit(pe) self.dbm.add(pe) def process_loop(self, pipe_e): if pipe_e.loop.break_loop: pipe_e.state = state.PipeElement.FINISHED self.dbm.add(pipe_e) self.logger.info('{}: Break loop with id {}'.format(self.pipe.idx, pipe_e.loop.idx)) return if pipe_e.loop.max_iteration is not None: if pipe_e.loop.iteration is None: pipe_e.loop.iteration = 0 if pipe_e.loop.iteration < pipe_e.loop.max_iteration-1: self.__release_loop_iteration(pipe_e) else: pipe_e.state = state.PipeElement.FINISHED self.logger.info('{}: Loop ({}) terminated. Max iterations = {}'\ .format(self.pipe.idx, pipe_e.loop.idx, pipe_e.loop.max_iteration)) else: self.__release_loop_iteration(pipe_e) self.dbm.add(pipe_e) def select_env_for_script(self, pipe_e): '''Select an environment where the script should be executed''' w_man = WorkerMan(self.dbm, self.lostconfig) if pipe_e.script.envs is not None: script_envs = json.loads(pipe_e.script.envs) if len(script_envs) == 0: return 'celery' else: script_envs = list() return 'celery' # Return default queue worker_envs = w_man.get_worker_envs() for script_env in script_envs: if script_env in worker_envs: return script_env self.logger.warning('No suitable env to execute script: {}'.format(pipe_e.script.path)) return None def dask_done_callback(self, fut): self.logger.info(f'fut.done: {fut.done()}') self.logger.info(f'fut.cancelled: {fut.cancelled()}') exc = fut.exception() if exc is None: self.logger.info(fut.result()) else: self.logger.info(f'exception:\n{fut.exception()}') self.logger.error('traceback:\n{}'.format( ''.join( [f'{x}' for x in traceback.format_tb(fut.traceback())] ) )) # class User(): # def __init__(self, idx): # self.idx = idx # # client = ds_man.get_dask_client(User(1)) # self.logger.info(f'shutdown cluster: {ds_man.shutdown_cluster(User(1))}') # self.logger.info(f'client.restart: {client.restart()}') def _install_extra_packages(self, client, packages): def install(cmd): import subprocess output = subprocess.check_output(f'{cmd}',stderr=subprocess.STDOUT, shell=True) return output # import os # os.system(f'{install_cmd} {packages}') pip_cmd, conda_cmd = gen_extra_install_cmd(packages, self.lostconfig) if pip_cmd is not None: self.logger.info(f'Start install cmd: {pip_cmd}') self.logger.info(client.run(install, pip_cmd)) self.logger.info(f'Install finished: {pip_cmd}') if conda_cmd is not None: self.logger.info(f'Start install cmd: {conda_cmd}') self.logger.info(client.run(install, conda_cmd)) self.logger.info(f'Install finished: {conda_cmd}') def exec_dask_direct(self, client, pipe_e, worker=None): scr = pipe_e.script self._install_extra_packages(client, scr.extra_packages) # extra_packages = json.loads(scr.extra_packages) # if self.lostconfig.allow_extra_pip: # self._install_extra_packages(client, 'pip install', extra_packages['pip']) # if self.lostconfig.allow_extra_conda: # self._install_extra_packages(client, 'conda install', extra_packages['conda']) pp_path = self.file_man.get_pipe_project_path(pipe_e.script) # self.logger.info('pp_path: {}'.format(pp_path)) # timestamp = datetime.now().strftime("%m%d%Y%H%M%S") # packed_pp_path = self.file_man.get_packed_pipe_path( # f'{os.path.basename(pp_path)}.zip', timestamp # ) # self.logger.info('packed_pp_path: {}'.format(packed_pp_path)) # if ppp_man.should_i_update(client, pp_path): # exec_utils.zipdir(pp_path, packed_pp_path, timestamp) # self.logger.info(f'Upload file:{client.upload_file(packed_pp_path)}') # import_name = exec_utils.get_import_name_by_script( # pipe_e.script.name, timestamp) # self.logger.info(f'import_name:{import_name}') import_name = ppp_man.prepare_import( client, pp_path, pipe_e.script.name, self.logger ) fut = client.submit(exec_utils.exec_dyn_class, pipe_e.idx, import_name, workers=worker ) fut.add_done_callback(self.dask_done_callback) def start_script(self,pipe_e): if self.client is not None: # Workermanagement == static env = self.select_env_for_script(pipe_e) if env is None: return # celery_exec_script.apply_async(args=[pipe_e.idx], queue=env) worker = env client = self.client else: # If client is None, try to get client form dask_session user = self.dbm.get_user_by_id(self.pipe.manager_id) client = ds_man.get_dask_client(user) ds_man.refresh_user_session(user) self.logger.info('Process script with dask client: {}'.format(client)) self.logger.info('dask_session: {}'.format(ds_man.session)) # logger.info('pipe.manager_id: {}'.format(p.manager_id)) # logger.info('pipe.name: {}'.format(p.name)) # logger.info('pipe.group_id: {}'.format(p.group_id)) worker = None # client.submit(exec_script_in_subprocess, pipe_e.idx) if self.lostconfig.script_execution == 'subprocess': fut = client.submit(exec_script_in_subprocess, pipe_e.idx, workers=worker) fut.add_done_callback(self.dask_done_callback) else: self.exec_dask_direct(client, pipe_e, worker) def process_pipe_element(self): pipe_e = self.get_next_element() while (pipe_e is not None): # if pipe_e is None: # return if pipe_e.dtype == dtype.PipeElement.SCRIPT: if pipe_e.state != state.PipeElement.SCRIPT_ERROR: # if pipe_e.is_debug_mode: # pipe_e.state = state.PipeElement.IN_PROGRESS # self.dbm.save_obj(pipe_e) # self.make_debug_session(pipe_e) # else: if pipe_e.state == state.PipeElement.PENDING: self.start_script(pipe_e) pipe = pipe_e.pipe self.logger.info('PipeElementID: {} Excuting script: {}'.format(pipe_e.idx, pipe_e.script.name)) elif pipe_e.dtype == dtype.PipeElement.ANNO_TASK: if pipe_e.state == state.PipeElement.PENDING: update_anno_task(self.dbm, pipe_e.anno_task.idx) try: email.send_annotask_available(self.dbm, pipe_e.anno_task) except: msg = "Could not send Email. \n" msg += traceback.format_exc() self.logger.error(msg) pipe_e.state = state.PipeElement.IN_PROGRESS self.dbm.save_obj(pipe_e) self.process_annotask(pipe_e) elif pipe_e.dtype == dtype.PipeElement.DATASOURCE: pipe_e.state = state.PipeElement.FINISHED self.dbm.save_obj(pipe_e) elif pipe_e.dtype == dtype.PipeElement.VISUALIZATION: pipe_e.state = state.PipeElement.FINISHED self.dbm.save_obj(pipe_e) elif pipe_e.dtype == dtype.PipeElement.DATA_EXPORT: pipe_e.state = state.PipeElement.FINISHED self.dbm.save_obj(pipe_e) elif pipe_e.dtype == dtype.PipeElement.LOOP: self.process_loop(pipe_e) self.dbm.commit() pipe_e = self.get_next_element() def refesh_dask_user_session(self): if self.client is None: user = self.dbm.get_user_by_id(self.pipe.manager_id) ds_man.refresh_user_session(user) # self.logger.info('Refreshed dask user session for user: {}'.format(user.idx)) def process_pipeline(self): try: p = self.pipe # print('Process pipe: {}'.format(self.pipe.name)) if p.is_locked is None: p.is_locked = False if not p.is_locked: p.is_locked = True self.dbm.save_obj(p) else: return if p.state == state.Pipe.PENDING: self.refesh_dask_user_session() p.state = state.Pipe.IN_PROGRESS self.dbm.save_obj(p) self.process_pipe_element() elif p.state == state.Pipe.IN_PROGRESS: self.refesh_dask_user_session() self.process_pipe_element() elif p.state == state.Pipe.FINISHED: return elif p.state == state.Pipe.ERROR: self.__report_error(p) else: raise Exception("Unknown PipeState!") p.is_locked = False self.dbm.save_obj(p) except: p.is_locked = False self.dbm.save_obj(p) raise def get_next_element(self): pe_wait = None for candidate in self.get_to_visit(): if candidate is None: if self.pipe_finished(): self.pipe.state = state.Pipe.FINISHED self.pipe.timestamp_finished = datetime.now() self.dbm.save_obj(self.pipe) self.logger.info("%d: Task is finished (Name: %s)"%(self.pipe.idx, self.pipe.name)) try: email.send_pipeline_finished(self.pipe) except: msg = "Could not send Email. \n" msg += traceback.format_exc() self.logger.error(msg) return None else: continue else: pe = self.check_candiate(candidate) if pe is None: continue #If there is a loop under candidates, it should be executed as #last possible element. Since a loop will set all other elements #within the loop to pending when processed. So if the last element #before the loop has subsequent elements. These elements would never #be executed since the loop would set the last element in the loop #to pending. elif pe.dtype == dtype.PipeElement.LOOP: pe_wait = pe continue else: self.set_visited(pe) return pe return pe_wait def pipe_finished(self): for pe in self.get_final_pes(): if pe.state != state.PipeElement.FINISHED: return False return True def check_candiate(self, candidate): # If all prev elements are finished return candidate for pe_prev in self.get_prev_pes(candidate): if pe_prev is not None: if pe_prev.state != state.PipeElement.FINISHED: return None # if pe_prev.state == state.PipeElement.FINISHED: # if candidate.state == state.PipeElement.PENDING: # return candidate # elif candidate.dtype == dtype.PipeElement.ANNOTATION_TASK and\ # candidate.state == state.PipeElement.IN_PROGRESS: # return candidate else: # if pe_prev is None and candidate.state == PENDING if candidate.state == state.PipeElement.PENDING: return candidate return candidate def __report_error(self, pipe): for pipe_element in self.dbm.get_script_errors(pipe.idx): # Send mail to inform user about script error. try: email.send_script_error(pipe, pipe_element) pipe_element.error_reported = True self.dbm.add(pipe_element) self.dbm.commit() except: pipe_element.error_reported = True pipe_element.error_msg += traceback.format_exc() self.dbm.add(pipe_element) self.dbm.commit()